diff --git a/Cargo.lock b/Cargo.lock index 0c1fd9cf8d5..dc2289e7f9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6610,6 +6610,7 @@ dependencies = [ "camino", "dropshot", "expectorate", + "ipnet", "libc", "nexus-types", "omicron-common", @@ -7387,6 +7388,7 @@ dependencies = [ "crucible-agent-client", "dns-server", "dns-service-client", + "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=cc8e02a0800034c431c8cf96b889ea638da3d194)", "dropshot", "futures", "gateway-messages", @@ -8282,6 +8284,7 @@ dependencies = [ "illumos-utils", "internal-dns-resolver", "internal-dns-types", + "ipnet", "ipnetwork", "itertools 0.14.0", "jiff", @@ -8780,12 +8783,14 @@ dependencies = [ "oximeter-producer", "oxnet", "pretty_assertions", + "progenitor 0.10.0", "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=3f1752e6cee9a2f8ecdce6e2ad3326781182e2d9)", "propolis-mock-server", "propolis_api_types", "rand 0.9.2", "range-requests", "rcgen", + "regress", "repo-depot-api", "repo-depot-client", "reqwest", @@ -8810,6 +8815,7 @@ dependencies = [ "slog-error-chain", "slog-term", "smf 0.2.3", + "sp-sim", "sprockets-tls", "static_assertions", "strum 0.27.2", diff --git a/common/src/address.rs b/common/src/address.rs index 2cc89deee30..4334179148c 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -52,6 +52,55 @@ pub const IPV6_SSM_SUBNET: oxnet::Ipv6Net = oxnet::Ipv6Net::new_unchecked( 12, ); +/// IPv4 multicast address range (224.0.0.0/4). +/// See RFC 5771 (IPv4 Multicast Address Assignments): +/// +pub const IPV4_MULTICAST_RANGE: Ipv4Net = + Ipv4Net::new_unchecked(Ipv4Addr::new(224, 0, 0, 0), 4); + +/// IPv4 link-local multicast subnet (224.0.0.0/24). +/// This range is reserved for local network control protocols and should not +/// be routed beyond the local link. Includes addresses for protocols like +/// OSPF (224.0.0.5), RIPv2 (224.0.0.9), and other local routing protocols. +/// See RFC 5771 Section 4: +/// +pub const IPV4_LINK_LOCAL_MULTICAST_SUBNET: Ipv4Net = + Ipv4Net::new_unchecked(Ipv4Addr::new(224, 0, 0, 0), 24); + +/// IPv6 multicast address range (ff00::/8). +/// See RFC 4291 (IPv6 Addressing Architecture): +/// +pub const IPV6_MULTICAST_RANGE: Ipv6Net = + Ipv6Net::new_unchecked(Ipv6Addr::new(0xff00, 0, 0, 0, 0, 0, 0, 0), 8); + +/// IPv6 multicast prefix (ff00::/8) mask/value for scope checking. +pub const IPV6_MULTICAST_PREFIX: u16 = 0xff00; + +/// Admin-scoped IPv6 multicast prefix (ff04::/16) as u16 for address +/// construction and normalization of underlay multicast addresses. +pub const IPV6_ADMIN_SCOPED_MULTICAST_PREFIX: u16 = 0xff04; + +/// IPv6 interface-local multicast subnet (ff01::/16). +/// These addresses are not routable and should not be added to IP pools. +/// See RFC 4291 Section 2.7 (multicast scope field): +/// +pub const IPV6_INTERFACE_LOCAL_MULTICAST_SUBNET: oxnet::Ipv6Net = + oxnet::Ipv6Net::new_unchecked( + Ipv6Addr::new(0xff01, 0, 0, 0, 0, 0, 0, 0), + 16, + ); + +/// IPv6 link-local multicast subnet (ff02::/16). +/// These addresses are not routable beyond the local link and should not be +/// added to IP pools. +/// See RFC 4291 Section 2.7 (multicast scope field): +/// +pub const IPV6_LINK_LOCAL_MULTICAST_SUBNET: oxnet::Ipv6Net = + oxnet::Ipv6Net::new_unchecked( + Ipv6Addr::new(0xff02, 0, 0, 0, 0, 0, 0, 0), + 16, + ); + /// maximum possible value for a tcp or udp port pub const MAX_PORT: u16 = u16::MAX; diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 651e8629700..a0101fd7e84 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -957,6 +957,8 @@ pub enum ResourceType { LldpLinkConfig, LoopbackAddress, MetricProducer, + MulticastGroup, + MulticastGroupMember, NatEntry, Oximeter, PhysicalDisk, @@ -2523,6 +2525,12 @@ impl Vni { /// The VNI for the builtin services VPC. pub const SERVICES_VNI: Self = Self(100); + /// VNI default if no VPC is provided for a multicast group. + /// + /// This is a low-numbered VNI to avoid colliding with user VNIs. + /// However, it is not in the Oxide-reserved range yet. + pub const DEFAULT_MULTICAST_VNI: Self = Self(77); + /// Oxide reserves a slice of initial VNIs for its own use. pub const MIN_GUEST_VNI: u32 = 1024; diff --git a/common/src/vlan.rs b/common/src/vlan.rs index 67c9d4c343e..64eecb33478 100644 --- a/common/src/vlan.rs +++ b/common/src/vlan.rs @@ -16,7 +16,7 @@ pub const VLAN_MAX: u16 = 4094; /// Wrapper around a VLAN ID, ensuring it is valid. #[derive(Debug, PartialEq, Serialize, Deserialize, Clone, Copy, JsonSchema)] -#[serde(rename = "VlanId")] +#[serde(transparent)] pub struct VlanID(u16); impl VlanID { diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 7195f3eb963..ec0798a91b0 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -2,9 +2,9 @@ EXECUTING COMMAND: omdb ["db", "--db-url", "postgresql://root@[::1]:REDACTED_POR termination: Exited(0) --------------------------------------------- stdout: - SERIAL IP ROLE POLICY STATE ID - sim-..................... [::1]:REDACTED_PORT scrimlet in service active ..................... - sim-..................... [::1]:REDACTED_PORT scrimlet in service active ..................... + SERIAL IP ROLE POLICY STATE ID + SimGimlet01 [::1]:REDACTED_PORT scrimlet in service active ..................... + SimGimlet00 [::1]:REDACTED_PORT scrimlet in service active ..................... --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable @@ -136,6 +136,11 @@ task: "metrics_producer_gc" unregisters Oximeter metrics producers that have not renewed their lease +task: "multicast_reconciler" + reconciles multicast group and member state with dendrite switch + configuration + + task: "nat_garbage_collector" prunes soft-deleted NAT entries from nat_entry table based on a predetermined retention policy @@ -364,6 +369,11 @@ task: "metrics_producer_gc" unregisters Oximeter metrics producers that have not renewed their lease +task: "multicast_reconciler" + reconciles multicast group and member state with dendrite switch + configuration + + task: "nat_garbage_collector" prunes soft-deleted NAT entries from nat_entry table based on a predetermined retention policy @@ -579,6 +589,11 @@ task: "metrics_producer_gc" unregisters Oximeter metrics producers that have not renewed their lease +task: "multicast_reconciler" + reconciles multicast group and member state with dendrite switch + configuration + + task: "nat_garbage_collector" prunes soft-deleted NAT entries from nat_entry table based on a predetermined retention policy @@ -683,9 +698,9 @@ EXECUTING COMMAND: omdb ["db", "sleds"] termination: Exited(0) --------------------------------------------- stdout: - SERIAL IP ROLE POLICY STATE ID - sim-..................... [::1]:REDACTED_PORT scrimlet in service active ..................... - sim-..................... [::1]:REDACTED_PORT scrimlet in service active ..................... + SERIAL IP ROLE POLICY STATE ID + SimGimlet01 [::1]:REDACTED_PORT scrimlet in service active ..................... + SimGimlet00 [::1]:REDACTED_PORT scrimlet in service active ..................... --------------------------------------------- stderr: note: database URL not specified. Will search DNS. @@ -698,9 +713,9 @@ EXECUTING COMMAND: omdb ["--dns-server", "[::1]:REDACTED_PORT", "db", "sleds"] termination: Exited(0) --------------------------------------------- stdout: - SERIAL IP ROLE POLICY STATE ID - sim-..................... [::1]:REDACTED_PORT scrimlet in service active ..................... - sim-..................... [::1]:REDACTED_PORT scrimlet in service active ..................... + SERIAL IP ROLE POLICY STATE ID + SimGimlet01 [::1]:REDACTED_PORT scrimlet in service active ..................... + SimGimlet00 [::1]:REDACTED_PORT scrimlet in service active ..................... --------------------------------------------- stderr: note: database URL not specified. Will search DNS. diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 05981f6085d..0d3c4203166 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -89,9 +89,9 @@ EXECUTING COMMAND: omdb ["db", "sleds"] termination: Exited(0) --------------------------------------------- stdout: - SERIAL IP ROLE POLICY STATE ID - sim-..................... [::1]:REDACTED_PORT scrimlet in service active ..................... - sim-..................... [::1]:REDACTED_PORT scrimlet in service active ..................... + SERIAL IP ROLE POLICY STATE ID + SimGimlet01 [::1]:REDACTED_PORT scrimlet in service active ..................... + SimGimlet00 [::1]:REDACTED_PORT scrimlet in service active ..................... --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable @@ -102,9 +102,9 @@ EXECUTING COMMAND: omdb ["db", "sleds", "-F", "discretionary"] termination: Exited(0) --------------------------------------------- stdout: - SERIAL IP ROLE POLICY STATE ID - sim-..................... [::1]:REDACTED_PORT scrimlet in service active ..................... - sim-..................... [::1]:REDACTED_PORT scrimlet in service active ..................... + SERIAL IP ROLE POLICY STATE ID + SimGimlet01 [::1]:REDACTED_PORT scrimlet in service active ..................... + SimGimlet00 [::1]:REDACTED_PORT scrimlet in service active ..................... --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable @@ -371,6 +371,11 @@ task: "metrics_producer_gc" unregisters Oximeter metrics producers that have not renewed their lease +task: "multicast_reconciler" + reconciles multicast group and member state with dendrite switch + configuration + + task: "nat_garbage_collector" prunes soft-deleted NAT entries from nat_entry table based on a predetermined retention policy @@ -704,6 +709,12 @@ task: "metrics_producer_gc" started at (s ago) and ran for ms warning: unknown background task: "metrics_producer_gc" (don't know how to interpret details: Object {"expiration": String(""), "pruned": Array []}) +task: "multicast_reconciler" + configured period: every m + last completed activation: , triggered by + started at (s ago) and ran for ms +warning: unknown background task: "multicast_reconciler" (don't know how to interpret details: Object {"disabled": Bool(false), "errors": Array [], "groups_created": Number(0), "groups_deleted": Number(0), "groups_verified": Number(0), "members_deleted": Number(0), "members_processed": Number(0)}) + task: "phantom_disks" configured period: every s last completed activation: , triggered by @@ -1266,6 +1277,12 @@ task: "metrics_producer_gc" started at (s ago) and ran for ms warning: unknown background task: "metrics_producer_gc" (don't know how to interpret details: Object {"expiration": String(""), "pruned": Array []}) +task: "multicast_reconciler" + configured period: every m + last completed activation: , triggered by + started at (s ago) and ran for ms +warning: unknown background task: "multicast_reconciler" (don't know how to interpret details: Object {"disabled": Bool(false), "errors": Array [], "groups_created": Number(0), "groups_deleted": Number(0), "groups_verified": Number(0), "members_deleted": Number(0), "members_processed": Number(0)}) + task: "phantom_disks" configured period: every s last completed activation: , triggered by @@ -1851,30 +1868,30 @@ termination: Exited(0) --------------------------------------------- stdout: Installed RoT Bootloader Software -BASEBOARD_ID STAGE0_VERSION STAGE0_NEXT_VERSION -FAKE_SIM_SIDECAR:SimSidecar0 unknown unknown -FAKE_SIM_SIDECAR:SimSidecar1 unknown unknown -sim-gimlet:sim-..................... unknown unknown -sim-gimlet:sim-..................... unknown unknown +BASEBOARD_ID STAGE0_VERSION STAGE0_NEXT_VERSION +FAKE_SIM_SIDECAR:SimSidecar0 unknown unknown +FAKE_SIM_SIDECAR:SimSidecar1 unknown unknown +i86pc:SimGimlet00 unknown unknown +i86pc:SimGimlet01 unknown unknown Installed RoT Software -BASEBOARD_ID SLOT_A_VERSION SLOT_B_VERSION -FAKE_SIM_SIDECAR:SimSidecar0 unknown (active) unknown -FAKE_SIM_SIDECAR:SimSidecar1 unknown (active) unknown -sim-gimlet:sim-..................... unknown unknown (ACTIVE SLOT UNKNOWN) -sim-gimlet:sim-..................... unknown unknown (ACTIVE SLOT UNKNOWN) +BASEBOARD_ID SLOT_A_VERSION SLOT_B_VERSION +FAKE_SIM_SIDECAR:SimSidecar0 unknown (active) unknown +FAKE_SIM_SIDECAR:SimSidecar1 unknown (active) unknown +i86pc:SimGimlet00 unknown (active) unknown +i86pc:SimGimlet01 unknown (active) unknown Installed SP Software -BASEBOARD_ID SLOT0_VERSION SLOT1_VERSION -FAKE_SIM_SIDECAR:SimSidecar0 unknown unknown -FAKE_SIM_SIDECAR:SimSidecar1 unknown unknown -sim-gimlet:sim-..................... unknown unknown -sim-gimlet:sim-..................... unknown unknown +BASEBOARD_ID SLOT0_VERSION SLOT1_VERSION +FAKE_SIM_SIDECAR:SimSidecar0 unknown unknown +FAKE_SIM_SIDECAR:SimSidecar1 unknown unknown +i86pc:SimGimlet00 unknown unknown +i86pc:SimGimlet01 unknown unknown Installed Host Phase 1 Software -BASEBOARD_ID SLED_ID SLOT_A_VERSION SLOT_B_VERSION -sim-gimlet:sim-..................... ..................... unknown unknown (ACTIVE SLOT UNKNOWN) -sim-gimlet:sim-..................... ..................... unknown unknown (ACTIVE SLOT UNKNOWN) +BASEBOARD_ID SLED_ID SLOT_A_VERSION SLOT_B_VERSION +i86pc:SimGimlet00 ..................... unknown (active) unknown +i86pc:SimGimlet01 ..................... unknown (active) unknown Installed Host Phase 2 Software SLED_ID SLOT_A_VERSION SLOT_B_VERSION diff --git a/end-to-end-tests/src/instance_launch.rs b/end-to-end-tests/src/instance_launch.rs index e04ace8c64e..02b02f19a44 100644 --- a/end-to-end-tests/src/instance_launch.rs +++ b/end-to-end-tests/src/instance_launch.rs @@ -80,6 +80,7 @@ async fn instance_launch() -> Result<()> { auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), cpu_platform: None, + multicast_groups: Vec::new(), }) .send() .await?; diff --git a/illumos-utils/src/opte/illumos.rs b/illumos-utils/src/opte/illumos.rs index 3d1f0c8c707..2cef857393d 100644 --- a/illumos-utils/src/opte/illumos.rs +++ b/illumos-utils/src/opte/illumos.rs @@ -52,6 +52,11 @@ pub enum Error { #[error("Tried to update external IPs on non-existent port ({0}, {1:?})")] ExternalIpUpdateMissingPort(uuid::Uuid, NetworkInterfaceKind), + #[error( + "Tried to update multicast groups on non-existent port ({0}, {1:?})" + )] + MulticastUpdateMissingPort(uuid::Uuid, NetworkInterfaceKind), + #[error("Could not find Primary NIC")] NoPrimaryNic, diff --git a/illumos-utils/src/opte/mod.rs b/illumos-utils/src/opte/mod.rs index 9f5c25462c5..82a2b2feab1 100644 --- a/illumos-utils/src/opte/mod.rs +++ b/illumos-utils/src/opte/mod.rs @@ -31,6 +31,7 @@ use oxide_vpc::api::RouterTarget; pub use oxide_vpc::api::Vni; use oxnet::IpNet; pub use port::Port; +pub use port_manager::MulticastGroupCfg; pub use port_manager::PortCreateParams; pub use port_manager::PortManager; pub use port_manager::PortTicket; @@ -71,7 +72,7 @@ impl Gateway { } } -/// Convert a nexus `IpNet` to an OPTE `IpCidr`. +/// Convert a nexus [IpNet] to an OPTE [IpCidr]. fn net_to_cidr(net: IpNet) -> IpCidr { match net { IpNet::V4(net) => IpCidr::Ip4(Ipv4Cidr::new( @@ -85,9 +86,10 @@ fn net_to_cidr(net: IpNet) -> IpCidr { } } -/// Convert a nexus `RouterTarget` to an OPTE `RouterTarget`. +/// Convert a nexus [shared::RouterTarget] to an OPTE [RouterTarget]. /// -/// This is effectively a `From` impl, but defined for two out-of-crate types. +/// This is effectively a [`From`] impl, but defined for two +/// out-of-crate types. /// We map internet gateways that target the (single) "system" VPC IG to /// `InternetGateway(None)`. Everything else is mapped directly, translating IP /// address types as needed. diff --git a/illumos-utils/src/opte/non_illumos.rs b/illumos-utils/src/opte/non_illumos.rs index 3624a63547b..4b0204439c7 100644 --- a/illumos-utils/src/opte/non_illumos.rs +++ b/illumos-utils/src/opte/non_illumos.rs @@ -46,6 +46,11 @@ pub enum Error { #[error("Tried to update external IPs on non-existent port ({0}, {1:?})")] ExternalIpUpdateMissingPort(uuid::Uuid, NetworkInterfaceKind), + #[error( + "Tried to update multicast groups on non-existent port ({0}, {1:?})" + )] + MulticastUpdateMissingPort(uuid::Uuid, NetworkInterfaceKind), + #[error("Could not find Primary NIC")] NoPrimaryNic, diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index 97eba85e621..4e1526d0604 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -14,6 +14,8 @@ use crate::opte::opte_firewall_rules; use crate::opte::port::PortData; use ipnetwork::IpNetwork; use macaddr::MacAddr6; +use omicron_common::address::IPV4_MULTICAST_RANGE; +use omicron_common::address::IPV6_MULTICAST_RANGE; use omicron_common::api::external; use omicron_common::api::internal::shared::ExternalIpGatewayMap; use omicron_common::api::internal::shared::InternetGatewayRouterTarget; @@ -70,6 +72,21 @@ struct RouteSet { active_ports: usize, } +/// Configuration for multicast groups on an OPTE port. +/// +/// TODO: This type should be moved to [oxide_vpc::api] when OPTE dependencies +/// are updated, following the same pattern as other VPC configuration types +/// like [ExternalIpCfg], [IpCfg], etc. +/// +/// TODO: Eventually remove. +#[derive(Debug, Clone, PartialEq)] +pub struct MulticastGroupCfg { + /// The multicast group IP address (IPv4 or IPv6). + pub group_ip: IpAddr, + /// For Source-Specific Multicast (SSM), list of source addresses. + pub sources: Vec, +} + #[derive(Debug)] struct PortManagerInner { log: Logger, @@ -595,7 +612,7 @@ impl PortManager { } /// Set Internet Gateway mappings for all external IPs in use - /// by attached `NetworkInterface`s. + /// by attached [NetworkInterface]s. /// /// Returns whether the internal mappings were changed. pub fn set_eip_gateways(&self, mappings: ExternalIpGatewayMap) -> bool { @@ -751,6 +768,71 @@ impl PortManager { Ok(()) } + /// Validate multicast group memberships for an OPTE port. + /// + /// This method validates multicast group configurations but does not yet + /// configure OPTE port-level multicast group membership. The actual + /// multicast forwarding is currently handled by the reconciler + DPD + /// at the dataplane switch level. + /// + /// TODO: Once OPTE kernel module supports multicast group APIs, this + /// method should be updated to configure OPTE port-level multicast + /// group membership. Note: multicast groups are fleet-wide and can span + /// across VPCs. + pub fn multicast_groups_ensure( + &self, + nic_id: Uuid, + nic_kind: NetworkInterfaceKind, + multicast_groups: &[MulticastGroupCfg], + ) -> Result<(), Error> { + let ports = self.inner.ports.lock().unwrap(); + let port = ports.get(&(nic_id, nic_kind)).ok_or_else(|| { + Error::MulticastUpdateMissingPort(nic_id, nic_kind) + })?; + + debug!( + self.inner.log, + "Validating multicast group configuration for OPTE port"; + "port_name" => port.name(), + "nic_id" => ?nic_id, + "groups" => ?multicast_groups, + ); + + // Validate multicast group configurations + for group in multicast_groups { + if !group.group_ip.is_multicast() { + error!( + self.inner.log, + "Invalid multicast IP address"; + "group_ip" => %group.group_ip, + "port_name" => port.name(), + ); + return Err(Error::InvalidPortIpConfig); + } + } + + // TODO: Configure firewall rules to allow multicast traffic. + // Add exceptions in source/dest MAC/L3 addr checking for multicast + // addresses matching known groups, only doing cidr-checking on the + // multicasst destination side. + + info!( + self.inner.log, + "OPTE port configured for multicast traffic"; + "port_name" => port.name(), + "ipv4_range" => %IPV4_MULTICAST_RANGE, + "ipv6_range" => %IPV6_MULTICAST_RANGE, + "multicast_groups" => multicast_groups.len(), + ); + + // TODO: Configure OPTE port for specific multicast group membership + // once OPTE kernel module APIs are available. This is distinct from + // zone vNIC underlay configuration (see instance.rs + // `join_multicast_group_inner`). + + Ok(()) + } + pub fn firewall_rules_ensure( &self, vni: external::Vni, diff --git a/nexus-config/Cargo.toml b/nexus-config/Cargo.toml index d76b736b550..ae61a65c792 100644 --- a/nexus-config/Cargo.toml +++ b/nexus-config/Cargo.toml @@ -10,6 +10,7 @@ workspace = true anyhow.workspace = true camino.workspace = true dropshot.workspace = true +ipnet.workspace = true nexus-types.workspace = true omicron-common.workspace = true omicron-uuid-kinds.workspace = true diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index fe3c391fb81..2f0dd9f565b 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -10,7 +10,9 @@ use anyhow::anyhow; use camino::{Utf8Path, Utf8PathBuf}; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; +use ipnet::Ipv6Net; use nexus_types::deployment::ReconfiguratorConfig; +use omicron_common::address::IPV6_ADMIN_SCOPED_MULTICAST_PREFIX; use omicron_common::address::Ipv6Subnet; use omicron_common::address::NEXUS_TECHPORT_EXTERNAL_PORT; use omicron_common::address::RACK_PREFIX; @@ -26,6 +28,7 @@ use serde_with::serde_as; use std::collections::HashMap; use std::fmt; use std::net::IpAddr; +use std::net::Ipv6Addr; use std::net::SocketAddr; use std::time::Duration; use uuid::Uuid; @@ -445,6 +448,8 @@ pub struct BackgroundTaskConfig { pub fm: FmTasksConfig, /// configuration for networking probe distributor pub probe_distributor: ProbeDistributorConfig, + /// configuration for multicast reconciler (group+members) task + pub multicast_reconciler: MulticastGroupReconcilerConfig, } #[serde_as] @@ -874,6 +879,59 @@ impl Default for SpEreportIngesterConfig { } } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct MulticastGroupReconcilerConfig { + /// period (in seconds) for periodic activations of the background task that + /// reconciles multicast group state with dendrite switch configuration + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, + + /// TTL (in seconds) for the sled-to-switch-port mapping cache. + /// + /// This cache maps sled IDs to their physical switch ports. It changes when + /// sleds are added/removed or inventory is updated. + /// + /// Default: 3600 seconds (1 hour) + #[serde( + default = "MulticastGroupReconcilerConfig::default_sled_cache_ttl_secs" + )] + #[serde_as(as = "DurationSeconds")] + pub sled_cache_ttl_secs: Duration, + + /// TTL (in seconds) for the backplane hardware topology cache. + /// + /// This cache stores the hardware platform's port mapping. It effectively + /// never changes during normal operation. + /// + /// Default: 86400 seconds (24 hours) with smart invalidation + #[serde( + default = "MulticastGroupReconcilerConfig::default_backplane_cache_ttl_secs" + )] + #[serde_as(as = "DurationSeconds")] + pub backplane_cache_ttl_secs: Duration, +} + +impl MulticastGroupReconcilerConfig { + const fn default_sled_cache_ttl_secs() -> Duration { + Duration::from_secs(3600) // 1 hour + } + + const fn default_backplane_cache_ttl_secs() -> Duration { + Duration::from_secs(86400) // 24 hours + } +} + +impl Default for MulticastGroupReconcilerConfig { + fn default() -> Self { + Self { + period_secs: Duration::from_secs(60), + sled_cache_ttl_secs: Self::default_sled_cache_ttl_secs(), + backplane_cache_ttl_secs: Self::default_backplane_cache_ttl_secs(), + } + } +} + #[serde_as] #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct FmTasksConfig { @@ -899,6 +957,30 @@ impl Default for FmTasksConfig { } } +/// Fixed underlay admin-scoped IPv6 multicast network (ff04::/64) used for +/// internal multicast group allocation and external→underlay mapping. +/// This /64 subnet within the admin-scoped space provides 2^64 host addresses +/// (ample for collision resistance) and is not configurable. +pub const DEFAULT_UNDERLAY_MULTICAST_NET: Ipv6Net = Ipv6Net::new_assert( + Ipv6Addr::new(IPV6_ADMIN_SCOPED_MULTICAST_PREFIX, 0, 0, 0, 0, 0, 0, 0), + 64, +); + +/// Configuration for multicast options. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct MulticastConfig { + /// Whether multicast functionality is enabled or not. + /// + /// When false, multicast API calls remain accessible but no actual + /// multicast operations occur (no switch programming, reconciler disabled). + /// Instance sagas will skip multicast operations. This allows gradual + /// rollout and testing of multicast configuration. + /// + /// Default: false (experimental feature, disabled by default) + #[serde(default)] + pub enabled: bool, +} + #[serde_as] #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct ProbeDistributorConfig { @@ -944,6 +1026,9 @@ pub struct PackageConfig { pub initial_reconfigurator_config: Option, /// Background task configuration pub background_tasks: BackgroundTaskConfig, + /// Multicast feature configuration + #[serde(default)] + pub multicast: MulticastConfig, /// Default Crucible region allocation strategy pub default_region_allocation_strategy: RegionAllocationStrategy, } @@ -1079,10 +1164,7 @@ mod test { // "unexpected eof encountered at line 1 column 6" // ); } else { - panic!( - "Got an unexpected error, expected Parse but got {:?}", - error - ); + panic!("Got an unexpected error, expected Parse but got {error:?}"); } } @@ -1096,10 +1178,7 @@ mod test { assert_eq!(error.span(), Some(0..0)); assert_eq!(error.message(), "missing field `deployment`"); } else { - panic!( - "Got an unexpected error, expected Parse but got {:?}", - error - ); + panic!("Got an unexpected error, expected Parse but got {error:?}"); } } @@ -1214,6 +1293,7 @@ mod test { fm.sitrep_load_period_secs = 48 fm.sitrep_gc_period_secs = 49 probe_distributor.period_secs = 50 + multicast_reconciler.period_secs = 60 [default_region_allocation_strategy] type = "random" seed = 0 @@ -1466,7 +1546,13 @@ mod test { probe_distributor: ProbeDistributorConfig { period_secs: Duration::from_secs(50), }, + multicast_reconciler: MulticastGroupReconcilerConfig { + period_secs: Duration::from_secs(60), + sled_cache_ttl_secs: MulticastGroupReconcilerConfig::default_sled_cache_ttl_secs(), + backplane_cache_ttl_secs: MulticastGroupReconcilerConfig::default_backplane_cache_ttl_secs(), + }, }, + multicast: MulticastConfig { enabled: false }, default_region_allocation_strategy: crate::nexus_config::RegionAllocationStrategy::Random { seed: Some(0) @@ -1567,6 +1653,7 @@ mod test { fm.sitrep_load_period_secs = 45 fm.sitrep_gc_period_secs = 46 probe_distributor.period_secs = 47 + multicast_reconciler.period_secs = 60 [default_region_allocation_strategy] type = "random" @@ -1634,10 +1721,7 @@ mod test { error ); } else { - panic!( - "Got an unexpected error, expected Parse but got {:?}", - error - ); + panic!("Got an unexpected error, expected Parse but got {error:?}"); } } @@ -1689,10 +1773,7 @@ mod test { r#"invalid "max_vpc_ipv4_subnet_prefix": "IPv4 subnet prefix must"#, )); } else { - panic!( - "Got an unexpected error, expected Parse but got {:?}", - error - ); + panic!("Got an unexpected error, expected Parse but got {error:?}"); } } diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index f5b13dba8fb..e145f96b84b 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -50,6 +50,7 @@ iddqd.workspace = true illumos-utils.workspace = true internal-dns-resolver.workspace = true internal-dns-types.workspace = true +ipnet.workspace = true ipnetwork.workspace = true itertools.workspace = true jiff.workspace = true diff --git a/nexus/auth/src/authz/api_resources.rs b/nexus/auth/src/authz/api_resources.rs index fdf8af03a05..7f7126b2813 100644 --- a/nexus/auth/src/authz/api_resources.rs +++ b/nexus/auth/src/authz/api_resources.rs @@ -471,6 +471,72 @@ impl AuthorizedResource for IpPoolList { } } +/// Synthetic, fleet-scoped resource representing the `/v1/multicast-groups` +/// collection. +/// +/// **Authorization Model:** +/// - Multicast groups are fleet-wide resources (similar to IP pools). +/// - Any authenticated user within a silo in the fleet can create, list, read, +/// and modify groups. This includes project collaborators, silo collaborators, +/// and silo admins. +/// - Cross-silo multicast communication is enabled by fleet-wide access. +/// +/// The fleet-level collection endpoint (`/v1/multicast-groups`) allows: +/// - Any authenticated user within the fleet's silos to create and list groups. +/// - Instances from different projects and silos can join the same multicast groups. +/// +/// See `omicron.polar` for the detailed policy rules that grant fleet-wide +/// access to authenticated silo users for multicast group operations. +#[derive(Clone, Copy, Debug)] +pub struct MulticastGroupList; + +/// Singleton representing the [`MulticastGroupList`] itself for authz purposes. +pub const MULTICAST_GROUP_LIST: MulticastGroupList = MulticastGroupList; + +impl Eq for MulticastGroupList {} + +impl PartialEq for MulticastGroupList { + fn eq(&self, _: &Self) -> bool { + true + } +} + +impl oso::PolarClass for MulticastGroupList { + fn get_polar_class_builder() -> oso::ClassBuilder { + oso::Class::builder() + .with_equality_check() + .add_attribute_getter("fleet", |_: &MulticastGroupList| FLEET) + } +} + +impl AuthorizedResource for MulticastGroupList { + fn load_roles<'fut>( + &'fut self, + opctx: &'fut OpContext, + authn: &'fut authn::Context, + roleset: &'fut mut RoleSet, + ) -> futures::future::BoxFuture<'fut, Result<(), Error>> { + // There are no roles on the MulticastGroupList, only permissions. But we + // still need to load the Fleet-related roles to verify that the actor's + // role on the Fleet (possibly conferred from a Silo role). + load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed() + } + + fn on_unauthorized( + &self, + _: &Authz, + error: Error, + _: AnyActor, + _: Action, + ) -> Error { + error + } + + fn polar_class(&self) -> oso::Class { + Self::get_polar_class() + } +} + // Similar to IpPoolList, the audit log is a collection that doesn't exist in // the database as an entity distinct from its children (IP pools, or in this // case, audit log entries). We need a dummy resource here because we need @@ -1325,6 +1391,39 @@ authz_resource! { polar_snippet = InProjectLimited, } +// MulticastGroup Authorization +// +// MulticastGroups are **fleet-scoped resources** (parent = "Fleet"), similar to +// IP pools, to enable efficient cross-project and cross-silo multicast +// communication. +// +// Authorization rules: +// - Creating/modifying groups: Any authenticated user within a silo in the fleet. +// This includes project collaborators, silo collaborators, and silo admins. +// - Listing groups: Any authenticated user within a silo in the fleet +// - Viewing individual groups: Any authenticated user within a silo in the fleet +// - Attaching instances to groups: only requires Instance::Modify permission +// (users can attach their own instances to any fleet-scoped group) +// +// Fleet::Admin role can also perform all operations via the parent Fleet relation. +// +// See omicron.polar for the special `has_permission` rules that grant create/modify/ +// list/read access to authenticated silo users (including project collaborators), +// enabling cross-project and cross-silo multicast communication without requiring +// Fleet::Admin or Fleet::Viewer roles. +// +// Member management: `MulticastGroup` member attachments/detachments (instances +// joining/leaving groups) use the existing `MulticastGroup` and `Instance` +// authz resources rather than creating a separate `MulticastGroupMember` authz +// resource. +authz_resource! { + name = "MulticastGroup", + parent = "Fleet", + primary_key = Uuid, + roles_allowed = false, + polar_snippet = FleetChild, +} + // Customer network integration resources nested below "Fleet" authz_resource! { diff --git a/nexus/auth/src/authz/omicron.polar b/nexus/auth/src/authz/omicron.polar index ddb5baec8dc..a536370b2f0 100644 --- a/nexus/auth/src/authz/omicron.polar +++ b/nexus/auth/src/authz/omicron.polar @@ -492,7 +492,48 @@ has_relation(fleet: Fleet, "parent_fleet", ip_pool_list: IpPoolList) has_permission(actor: AuthenticatedActor, "create_child", ip_pool: IpPool) if actor.is_user and silo in actor.silo and silo.fleet = ip_pool.fleet; -# Describes the policy for reading and writing the audit log +# Describes the policy for accessing "/v1/multicast-groups" in the API +resource MulticastGroupList { + permissions = [ + "list_children", + "create_child", + ]; + + relations = { parent_fleet: Fleet }; + # Fleet Administrators can create multicast groups + "create_child" if "admin" on "parent_fleet"; + + # Fleet Viewers can list multicast groups + "list_children" if "viewer" on "parent_fleet"; +} +has_relation(fleet: Fleet, "parent_fleet", multicast_group_list: MulticastGroupList) + if multicast_group_list.fleet = fleet; + +# Any authenticated user can create multicast groups in their fleet. +# This is necessary to allow silo users to create multicast groups for +# cross-project and cross-silo communication without requiring Fleet::Admin. +has_permission(actor: AuthenticatedActor, "create_child", multicast_group_list: MulticastGroupList) + if silo in actor.silo and silo.fleet = multicast_group_list.fleet; + +# Any authenticated user can list multicast groups in their fleet. +# This is necessary because multicast groups are fleet-scoped resources that +# silo users need to discover and attach their instances to, without requiring +# Fleet::Viewer role. +has_permission(actor: AuthenticatedActor, "list_children", multicast_group_list: MulticastGroupList) + if silo in actor.silo and silo.fleet = multicast_group_list.fleet; + +# Any authenticated user can read and modify individual multicast groups in their fleet. +# Users can create, modify, and consume (attach instances to) multicast groups. +# This enables cross-project and cross-silo multicast while maintaining +# appropriate security boundaries via API authorization and underlay group +# membership validation. +has_permission(actor: AuthenticatedActor, "read", multicast_group: MulticastGroup) + if silo in actor.silo and silo.fleet = multicast_group.fleet; + +has_permission(actor: AuthenticatedActor, "modify", multicast_group: MulticastGroup) + if silo in actor.silo and silo.fleet = multicast_group.fleet; + +# Describes the policy for reading and writing the audit log resource AuditLog { permissions = [ "list_children", # retrieve audit log diff --git a/nexus/auth/src/authz/oso_generic.rs b/nexus/auth/src/authz/oso_generic.rs index b6d627acc33..f2d0403eb55 100644 --- a/nexus/auth/src/authz/oso_generic.rs +++ b/nexus/auth/src/authz/oso_generic.rs @@ -124,6 +124,7 @@ pub fn make_omicron_oso(log: &slog::Logger) -> Result { TargetReleaseConfig::get_polar_class(), AlertClassList::get_polar_class(), ScimClientBearerTokenList::get_polar_class(), + MulticastGroupList::get_polar_class(), ]; for c in classes { oso_builder = oso_builder.register_class(c)?; @@ -179,6 +180,7 @@ pub fn make_omicron_oso(log: &slog::Logger) -> Result { Service::init(), UserBuiltin::init(), ScimClientBearerToken::init(), + MulticastGroup::init(), ]; for init in generated_inits { diff --git a/nexus/background-task-interface/src/init.rs b/nexus/background-task-interface/src/init.rs index d7431f93053..bcbd2443b2a 100644 --- a/nexus/background-task-interface/src/init.rs +++ b/nexus/background-task-interface/src/init.rs @@ -54,6 +54,7 @@ pub struct BackgroundTasks { pub task_fm_sitrep_loader: Activator, pub task_fm_sitrep_gc: Activator, pub task_probe_distributor: Activator, + pub task_multicast_reconciler: Activator, // Handles to activate background tasks that do not get used by Nexus // at-large. These background tasks are implementation details as far as diff --git a/nexus/db-lookup/src/lookup.rs b/nexus/db-lookup/src/lookup.rs index 65a320050fe..310fb895e7b 100644 --- a/nexus/db-lookup/src/lookup.rs +++ b/nexus/db-lookup/src/lookup.rs @@ -349,6 +349,23 @@ impl<'a> LookupPath<'a> { AddressLot::OwnedName(Root { lookup_root: self }, name) } + /// Select a resource of type MulticastGroup, identified by its name + pub fn multicast_group_name<'b, 'c>( + self, + name: &'b Name, + ) -> MulticastGroup<'c> + where + 'a: 'c, + 'b: 'c, + { + MulticastGroup::Name(Root { lookup_root: self }, name) + } + + /// Select a resource of type MulticastGroup, identified by its id + pub fn multicast_group_id(self, id: Uuid) -> MulticastGroup<'a> { + MulticastGroup::PrimaryKey(Root { lookup_root: self }, id) + } + pub fn loopback_address( self, rack_id: Uuid, @@ -749,6 +766,14 @@ lookup_resource! { // Miscellaneous resources nested directly below "Fleet" +lookup_resource! { + name = "MulticastGroup", + ancestors = [], + lookup_by_name = true, + soft_deletes = true, + primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] +} + lookup_resource! { name = "ConsoleSession", ancestors = [], diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 57dcec31262..6b81d296479 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -61,6 +61,7 @@ mod local_storage; mod macaddr; mod migration; mod migration_state; +mod multicast_group; mod name; mod network_interface; mod oximeter_info; @@ -207,6 +208,7 @@ pub use l4_port_range::*; pub use local_storage::*; pub use migration::*; pub use migration_state::*; +pub use multicast_group::*; pub use name::*; pub use nat_entry::*; pub use network_interface::*; diff --git a/nexus/db-model/src/multicast_group.rs b/nexus/db-model/src/multicast_group.rs new file mode 100644 index 00000000000..06ab9f27350 --- /dev/null +++ b/nexus/db-model/src/multicast_group.rs @@ -0,0 +1,490 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Database models for multicast groups and membership. +//! +//! Implements the bifurcated multicast design from +//! [RFD 488](https://rfd.shared.oxide.computer/rfd/488), with two types +//! of multicast groups: +//! +//! ## External Multicast Groups +//! +//! Customer-facing groups allocated from IP pools: +//! - Use IPv4/IPv6 addresses from customer IP pools +//! - Exposed via customer APIs for application multicast traffic +//! - Support Source-Specific Multicast (SSM) with configurable source IPs +//! - Follow the Resource trait pattern for user-facing identity management +//! - **Fleet-scoped** (not project-scoped) to enable cross-project multicast +//! - All use `DEFAULT_MULTICAST_VNI` (77) for consistent fleet-wide behavior +//! +//! ### VNI and Security Model +//! +//! External multicast groups use VNI 77 (i.e. an arbitrary VNI), a reserved +//! system VNI below `MIN_GUEST_VNI` (1024). This differs from VPC unicast +//! traffic where each VPC receives its own VNI for tenant isolation. +//! +//! The shared VNI design reflects multicast's fleet-scoped authorization model: +//! groups are fleet resources (like IP pools) that can span projects and silos. +//! Forwarding occurs through Dendrite's bifurcated NAT architecture, which +//! translates external multicast addresses to underlay IPv6 groups at the switch. +//! +//! **VNI Selection**: RFD 488 discusses using an "arbitrary multicast VNI for +//! multicast groups spanning VPCs" since we don't need VPC-specific VNIs for +//! groups that transcend VPC boundaries. VNI 77 is this default VNI for all +//! external multicast groups. Future implementations may support per-VPC +//! multicast VNIs if VPC-isolated multicast groups become necessary. +//! +//! Security happens at two layers: +//! - **Control plane**: Fleet admins create groups; users attach instances via API +//! - **Dataplane**: Switch hardware validates underlay group membership +//! +//! This allows cross-project and cross-silo multicast while maintaining explicit +//! membership control through underlay forwarding tables. +//! +//! ## Underlay Multicast Groups +//! +//! System-generated admin-scoped IPv6 multicast groups for internal forwarding: +//! - Use IPv6 admin-local multicast scope (ff04::/16) per RFC 7346 +//! +//! - Paired 1:1 with external groups for NAT-based forwarding +//! - Handle rack-internal multicast traffic between switches +//! - Use individual field pattern for system resources +//! +//! ## Member Lifecycle (handled by RPW) +//! +//! Multicast group members follow a 3-state lifecycle managed by the +//! Reliable Persistent Workflow (RPW) reconciler: +//! - ["Joining"](MulticastGroupMemberState::Joining): Member created, awaiting +//! dataplane configuration (via DPD) +//! - ["Joined"](MulticastGroupMemberState::Joined): Member configuration applied +//! in the dataplane, ready to receive multicast traffic +//! - ["Left"](MulticastGroupMemberState::Left): Member configuration removed from +//! the dataplane (e.g., instance stopping/stopped, explicit detach, delete) +//! +//! Migration note: during instance migration, membership is reconfigured in +//! place—the reconciler removes configuration from the old sled and applies it +//! on the new sled without transitioning the member to "Left". In other words, +//! migration is not considered leaving; the member generally remains "Joined" +//! while its `sled_id` and dataplane configuration are updated. +//! - If an instance is deleted, the member will be marked for removal with a +//! deleted timestamp, and the reconciler will remove it from the dataplane +//! +//! The RPW ensures eventual consistency between database state and dataplane +//! configuration (applied via DPD to switches). + +use std::net::IpAddr; + +use chrono::{DateTime, Utc}; +use diesel::{ + AsChangeset, AsExpression, FromSqlRow, Insertable, Queryable, Selectable, +}; +use ipnetwork::IpNetwork; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use db_macros::Resource; +use nexus_db_schema::schema::{ + multicast_group, multicast_group_member, underlay_multicast_group, +}; +use nexus_types::external_api::views; +use nexus_types::identity::Resource as IdentityResource; +use omicron_common::api::external::{self, IdentityMetadata}; +use omicron_common::vlan::VlanID; +use omicron_uuid_kinds::SledKind; + +use crate::typed_uuid::DbTypedUuid; +use crate::{Generation, Name, Vni, impl_enum_type}; + +impl_enum_type!( + MulticastGroupStateEnum: + + #[derive(Clone, Copy, Debug, PartialEq, Eq, AsExpression, FromSqlRow, Serialize, Deserialize, JsonSchema)] + pub enum MulticastGroupState; + + Creating => b"creating" + Active => b"active" + Deleting => b"deleting" + Deleted => b"deleted" +); + +impl_enum_type!( + MulticastGroupMemberStateEnum: + + #[derive(Clone, Copy, Debug, PartialEq, Eq, AsExpression, FromSqlRow, Serialize, Deserialize, JsonSchema)] + pub enum MulticastGroupMemberState; + + Joining => b"joining" + Joined => b"joined" + Left => b"left" +); + +impl std::fmt::Display for MulticastGroupState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + MulticastGroupState::Creating => "Creating", + MulticastGroupState::Active => "Active", + MulticastGroupState::Deleting => "Deleting", + MulticastGroupState::Deleted => "Deleted", + }) + } +} + +impl std::fmt::Display for MulticastGroupMemberState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + MulticastGroupMemberState::Joining => "Joining", + MulticastGroupMemberState::Joined => "Joined", + MulticastGroupMemberState::Left => "Left", + }) + } +} + +/// Type alias for lookup resource naming convention. +/// +/// This alias maps the generic name [MulticastGroup] to [ExternalMulticastGroup], +/// following the pattern used throughout Omicron where the user-facing resource +/// uses the simpler name. External multicast groups are the primary user-facing +/// multicast resources, while underlay groups are internal infrastructure. +pub type MulticastGroup = ExternalMulticastGroup; + +/// An external multicast group for delivering packets to multiple recipients. +/// +/// External groups are multicast groups allocated from IP pools. These are +/// distinct from [UnderlayMulticastGroup] which are system-generated IPv6 addresses for +/// NAT mapping. +#[derive( + Queryable, + Selectable, + Clone, + Debug, + PartialEq, + Eq, + Resource, + Serialize, + Deserialize, +)] +#[diesel(table_name = multicast_group)] +pub struct ExternalMulticastGroup { + #[diesel(embed)] + pub identity: ExternalMulticastGroupIdentity, + /// IP pool this address was allocated from. + pub ip_pool_id: Uuid, + /// IP pool range this address was allocated from. + pub ip_pool_range_id: Uuid, + /// VNI for multicast group. + pub vni: Vni, + /// Primary multicast IP address (overlay/external). + pub multicast_ip: IpNetwork, + /// Source IP addresses for Source-Specific Multicast (SSM). + /// Empty array means any source is allowed. + pub source_ips: Vec, + /// Multicast VLAN (MVLAN) for egress multicast traffic to upstream networks. + /// + /// When specified, this VLAN ID is passed to switches (via DPD) as part of + /// the `ExternalForwarding` configuration to tag multicast packets leaving + /// the rack. This enables multicast traffic to traverse VLAN-segmented + /// upstream networks (e.g., peering with external multicast sources/receivers + /// on specific VLANs). + /// + /// The MVLAN value is sent to switches during group creation/updates and + /// controls VLAN tagging for egress traffic only; it does not affect ingress + /// multicast traffic received by the rack. Switch port selection for egress + /// traffic remains pending (see TODOs in `nexus/src/app/multicast/dataplane.rs`). + /// + /// Valid range when specified: 2-4094 (IEEE 802.1Q; Dendrite requires >= 2). + /// + /// Database Type: i16 (INT2) - this field uses `i16` (INT2) for storage + /// efficiency, unlike other VLAN columns in the schema which use `SqlU16` + /// (forcing INT4). Direct `i16` is appropriate here since VLANs fit in + /// INT2's range. + pub mvlan: Option, + /// Associated underlay group for NAT. + /// Initially None in ["Creating"](MulticastGroupState::Creating) state, + /// populated by reconciler when group becomes ["Active"](MulticastGroupState::Active). + pub underlay_group_id: Option, + /// DPD-client tag used to couple external (overlay) and underlay entries + /// for this multicast group. + /// + /// System-generated from the group's unique name at creation + /// and updated on rename to maintain pairing consistency. Since group names + /// have a unique constraint (among non-deleted groups), tags are unique per + /// active group, ensuring tag-based DPD-client operations (like cleanup) + /// affect only the intended group. Not used for authorization; intended for + /// Dendrite management. + pub tag: Option, + /// Current state of the multicast group (RPW pattern). + /// See [MulticastGroupState] for possible values. + pub state: MulticastGroupState, + /// Version when this group was added. + pub version_added: Generation, + /// Version when this group was removed. + pub version_removed: Option, +} + +/// Values used to create a [MulticastGroupMember] in the database. +/// +/// This struct is used for database insertions and omits fields that are +/// automatically populated by the database (like version_added and version_removed +/// which use DEFAULT nextval() sequences). For complete member records with all +/// fields populated, use [MulticastGroupMember]. +#[derive(Insertable, Debug, Clone, PartialEq, Eq)] +#[diesel(table_name = multicast_group_member)] +pub struct MulticastGroupMemberValues { + pub id: Uuid, + pub time_created: DateTime, + pub time_modified: DateTime, + pub time_deleted: Option>, + pub external_group_id: Uuid, + pub parent_id: Uuid, + pub sled_id: Option>, + pub state: MulticastGroupMemberState, + // version_added and version_removed are omitted - database assigns these + // via DEFAULT nextval() +} + +/// A member of a multicast group (instance that receives multicast traffic). +#[derive( + Queryable, + Selectable, + Clone, + Debug, + PartialEq, + Eq, + Serialize, + Deserialize, + JsonSchema, +)] +#[diesel(table_name = multicast_group_member)] +pub struct MulticastGroupMember { + /// Unique identifier for this multicast group member. + pub id: Uuid, + /// Timestamp for creation of this multicast group member. + pub time_created: DateTime, + /// Timestamp for last modification of this multicast group member. + pub time_modified: DateTime, + /// Timestamp for deletion of this multicast group member, if applicable. + pub time_deleted: Option>, + /// External multicast group this member belongs to. + pub external_group_id: Uuid, + /// Parent instance or service that receives multicast traffic. + pub parent_id: Uuid, + /// Sled hosting the parent. + pub sled_id: Option>, + /// Current state of the multicast group member (RPW pattern). + /// See [MulticastGroupMemberState] for possible values. + pub state: MulticastGroupMemberState, + /// Version when this member was added. + pub version_added: Generation, + /// Version when this member was removed. + pub version_removed: Option, +} + +// Conversions to external API views + +impl TryFrom for views::MulticastGroup { + type Error = external::Error; + + fn try_from(group: ExternalMulticastGroup) -> Result { + let mvlan = group + .mvlan + .map(|vlan| VlanID::new(vlan as u16)) + .transpose() + .map_err(|e| { + external::Error::internal_error(&format!( + "invalid VLAN ID: {e:#}" + )) + })?; + + Ok(views::MulticastGroup { + identity: group.identity(), + multicast_ip: group.multicast_ip.ip(), + source_ips: group + .source_ips + .into_iter() + .map(|ip| ip.ip()) + .collect(), + mvlan, + ip_pool_id: group.ip_pool_id, + state: group.state.to_string(), + }) + } +} + +impl TryFrom for views::MulticastGroupMember { + type Error = external::Error; + + fn try_from(member: MulticastGroupMember) -> Result { + Ok(views::MulticastGroupMember { + identity: IdentityMetadata { + id: member.id, + name: format!("member-{}", member.id).parse().map_err(|e| { + external::Error::internal_error(&format!( + "generated member name is invalid: {e}" + )) + })?, + description: format!("multicast group member {}", member.id), + time_created: member.time_created, + time_modified: member.time_modified, + }, + multicast_group_id: member.external_group_id, + instance_id: member.parent_id, + state: member.state.to_string(), + }) + } +} + +/// An incomplete external multicast group, used to store state required for +/// issuing the database query that selects an available multicast IP and stores +/// the resulting record. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct IncompleteExternalMulticastGroup { + pub id: Uuid, + pub name: Name, + pub description: String, + pub time_created: DateTime, + pub ip_pool_id: Uuid, + pub source_ips: Vec, + // Optional address requesting that a specific multicast IP address be + // allocated or provided + pub explicit_address: Option, + pub mvlan: Option, + pub vni: Vni, + pub tag: Option, +} + +/// Parameters for creating an incomplete external multicast group. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct IncompleteExternalMulticastGroupParams { + pub id: Uuid, + pub name: Name, + pub description: String, + pub ip_pool_id: Uuid, + pub explicit_address: Option, + pub source_ips: Vec, + pub mvlan: Option, + pub vni: Vni, + pub tag: Option, +} + +impl IncompleteExternalMulticastGroup { + /// Create an incomplete multicast group from parameters. + pub fn new(params: IncompleteExternalMulticastGroupParams) -> Self { + Self { + id: params.id, + name: params.name, + description: params.description, + time_created: Utc::now(), + ip_pool_id: params.ip_pool_id, + source_ips: params.source_ips, + explicit_address: params.explicit_address.map(|ip| ip.into()), + mvlan: params.mvlan, + vni: params.vni, + tag: params.tag, + } + } +} + +impl MulticastGroupMember { + /// Generate a new multicast group member. + /// + /// Note: version_added will be set by the database sequence when inserted. + pub fn new( + id: Uuid, + external_group_id: Uuid, + parent_id: Uuid, + sled_id: Option>, + ) -> Self { + Self { + id, + time_created: Utc::now(), + time_modified: Utc::now(), + time_deleted: None, + external_group_id, + parent_id, + sled_id, + state: MulticastGroupMemberState::Joining, + // Placeholder - will be overwritten by database sequence on insert + version_added: Generation::new(), + version_removed: None, + } + } +} + +/// Database representation of an underlay multicast group. +/// +/// Underlay groups are system-generated admin-scoped IPv6 multicast addresses +/// used as a NAT target for internal multicast traffic. Underlay groups are +/// VNI-agnostic; the VNI is an overlay identifier carried by [ExternalMulticastGroup]. +/// +/// These are distinct from [ExternalMulticastGroup] which are external-facing +/// addresses allocated from IP pools, specified by users or applications. +#[derive( + Queryable, + Insertable, + Selectable, + Clone, + Debug, + PartialEq, + Eq, + Serialize, + Deserialize, +)] +#[diesel(table_name = underlay_multicast_group)] +pub struct UnderlayMulticastGroup { + /// Unique identifier for this underlay multicast group. + pub id: Uuid, + /// Timestamp for creation of this underlay multicast group. + pub time_created: DateTime, + /// Timestamp for last modification of this underlay multicast group. + pub time_modified: DateTime, + /// Timestamp for deletion of this underlay multicast group, if applicable. + pub time_deleted: Option>, + /// Admin-scoped IPv6 multicast address (NAT target). + pub multicast_ip: IpNetwork, + /// Dendrite tag used to couple external/underlay state for this group. + /// + /// Matches the tag on the paired [ExternalMulticastGroup] so Dendrite can treat + /// the overlay and underlay entries as a logical unit. Since tags are derived + /// from unique group names, each active group has a unique tag, ensuring + /// tag-based operations (like cleanup) affect only this group's configuration. + /// See [ExternalMulticastGroup::tag] for complete semantics. + pub tag: Option, + /// Version when this group was added. + pub version_added: Generation, + /// Version when this group was removed. + pub version_removed: Option, +} + +/// Update data for a multicast group. +#[derive(AsChangeset, Debug, PartialEq, Eq)] +#[diesel(table_name = multicast_group)] +pub struct ExternalMulticastGroupUpdate { + pub name: Option, + pub description: Option, + pub source_ips: Option>, + // Needs to be double Option so we can set a value of null in the DB by + // passing Some(None). None by itself is ignored by Diesel. + pub mvlan: Option>, + pub time_modified: DateTime, +} + +impl From + for ExternalMulticastGroupUpdate +{ + fn from( + params: nexus_types::external_api::params::MulticastGroupUpdate, + ) -> Self { + Self { + name: params.identity.name.map(Name), + description: params.identity.description, + source_ips: params + .source_ips + .map(|ips| ips.into_iter().map(IpNetwork::from).collect()), + // mvlan is always None here - handled manually in datastore + mvlan: None, + time_modified: Utc::now(), + } + } +} diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index d8305b885f1..1ee9a2a936a 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(208, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(209, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(209, "multicast-group-support"), KnownVersion::new(208, "disable-tuf-repo-pruner"), KnownVersion::new(207, "disk-types"), KnownVersion::new(206, "fm-sitreps-by-parent-id-index"), diff --git a/nexus/db-model/src/vni.rs b/nexus/db-model/src/vni.rs index 649694bfb24..ee2ec141bcb 100644 --- a/nexus/db-model/src/vni.rs +++ b/nexus/db-model/src/vni.rs @@ -10,6 +10,7 @@ use diesel::serialize; use diesel::serialize::ToSql; use diesel::sql_types; use omicron_common::api::external; +use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -23,6 +24,7 @@ use serde::Serialize; Deserialize, Eq, PartialEq, + JsonSchema, )] #[diesel(sql_type = sql_types::Int4)] pub struct Vni(pub external::Vni); diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index de38d2a4ab6..9b8f93c7a72 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -728,8 +728,8 @@ impl DataStore { .map(|res| res.map(|(ip, _do_saga)| ip)) } - /// Delete all non-floating IP addresses associated with the provided instance - /// ID. + /// Delete all non-floating IP addresses associated with the provided + /// instance ID. /// /// This method returns the number of records deleted, rather than the usual /// `DeleteResult`. That's mostly useful for tests, but could be important @@ -841,7 +841,7 @@ impl DataStore { .find(|v| v.kind == IpKind::Ephemeral)) } - /// Fetch all external IP addresses of any kind for the provided probe + /// Fetch all external IP addresses of any kind for the provided probe. pub async fn probe_lookup_external_ips( &self, opctx: &OpContext, diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index 941c2e0e75a..37e25dda4ec 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -67,6 +67,7 @@ use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::PropolisUuid; use omicron_uuid_kinds::SledUuid; use ref_cast::RefCast; +use std::collections::HashMap; use uuid::Uuid; /// Returns the operator-visible [external API @@ -738,6 +739,62 @@ impl DataStore { Ok(InstanceGestalt { instance, migration, active_vmm, target_vmm }) } + /// Batch-fetch instance and VMM records for multiple instances to avoid N+1 queries. + /// + /// This method efficiently retrieves multiple instances and their active VMMs + /// in a single database round-trip using a LEFT JOIN. It is used by the + /// multicast reconciler to check the state of many instances simultaneously. + /// + /// # Returns + /// + /// A HashMap mapping instance_id -> `(Instance, Option)` where: + /// - The VMM is `None` for stopped instances (no `active_propolis_id`) + /// - Deleted instances are excluded from the result + /// - Non-existent instance IDs are silently omitted from the map + pub async fn instance_and_vmm_batch_fetch( + &self, + opctx: &OpContext, + instance_ids: &[omicron_uuid_kinds::InstanceUuid], + ) -> Result)>, Error> { + use nexus_db_schema::schema::instance::dsl as instance_dsl; + use nexus_db_schema::schema::vmm::dsl as vmm_dsl; + + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + let conn = self.pool_connection_authorized(opctx).await?; + + if instance_ids.is_empty() { + return Ok(HashMap::new()); + } + + let results: Vec<(Instance, Option)> = instance_dsl::instance + .filter( + instance_dsl::id.eq_any( + instance_ids + .iter() + .map(|id| id.into_untyped_uuid()) + .collect::>(), + ), + ) + .filter(instance_dsl::time_deleted.is_null()) + .left_join( + vmm_dsl::vmm.on(vmm_dsl::id + .nullable() + .eq(instance_dsl::active_propolis_id) + .and(vmm_dsl::time_deleted.is_null())), + ) + .select((Instance::as_select(), Option::::as_select())) + .load_async::<(Instance, Option)>(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + let map = results + .into_iter() + .map(|(instance, vmm)| (instance.id(), (instance, vmm))) + .collect(); + + Ok(map) + } + // TODO-design It's tempting to return the updated state of the Instance // here because it's convenient for consumers and by using a RETURNING // clause, we could ensure that the "update" and "fetch" are atomic. @@ -2180,6 +2237,64 @@ impl DataStore { )) } } + + /// Get the runtime state of an instance by ID. + /// + /// Returns the instance's current runtime state, or None if the instance + /// doesn't exist or has been deleted. + pub async fn instance_get_state( + &self, + opctx: &OpContext, + instance_id: &InstanceUuid, + ) -> Result, external::Error> { + use nexus_db_schema::schema::instance::dsl; + let id = instance_id.into_untyped_uuid(); + + let instance = dsl::instance + .filter(dsl::id.eq(id)) + .filter(dsl::time_deleted.is_null()) + .select(Instance::as_select()) + .first_async::( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(instance.map(|i| i.runtime_state)) + } + + /// Look up the sled hosting an instance via its active VMM. + /// + /// Returns None if the instance exists but has no active VMM (stopped + /// instance). + pub async fn instance_get_sled_id( + &self, + opctx: &OpContext, + instance_id: Uuid, + ) -> Result, external::Error> { + use nexus_db_schema::schema::{instance, vmm}; + let maybe_row: Option> = instance::table + .left_join( + vmm::table + .on(instance::active_propolis_id.eq(vmm::id.nullable())), + ) + .filter(instance::id.eq(instance_id)) + .filter(instance::time_deleted.is_null()) + .select(vmm::sled_id.nullable()) + .first_async(&*self.pool_connection_authorized(opctx).await?) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + match maybe_row { + None => Err(external::Error::not_found_by_id( + ResourceType::Instance, + &instance_id, + )), + Some(sled) => Ok(sled), + } + } } #[cfg(test)] @@ -2260,6 +2375,7 @@ mod tests { start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, ), ) diff --git a/nexus/db-queries/src/db/datastore/migration.rs b/nexus/db-queries/src/db/datastore/migration.rs index 8981ab9bf35..f1d562dfa2d 100644 --- a/nexus/db-queries/src/db/datastore/migration.rs +++ b/nexus/db-queries/src/db/datastore/migration.rs @@ -240,6 +240,7 @@ mod tests { start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, ), ) diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 7c39024bf72..354c58a0300 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -81,6 +81,7 @@ mod lldp; mod local_storage; mod lookup_interface; mod migration; +pub mod multicast; mod nat_entry; mod network_interface; mod oximeter; diff --git a/nexus/db-queries/src/db/datastore/multicast/groups.rs b/nexus/db-queries/src/db/datastore/multicast/groups.rs new file mode 100644 index 00000000000..6364d45b6a3 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/multicast/groups.rs @@ -0,0 +1,2883 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Multicast group management and IP allocation. +//! +//! Database operations for multicast groups following the bifurcated design +//! from [RFD 488](https://rfd.shared.oxide.computer/rfd/488): +//! +//! - External groups: customer-facing, allocated from IP pools +//! - Underlay groups: system-generated admin-scoped IPv6 multicast groups + +use std::net::IpAddr; + +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::Utc; +use diesel::prelude::*; +use diesel::result::{ + DatabaseErrorKind::UniqueViolation, + Error::{DatabaseError, NotFound}, +}; +use ipnetwork::IpNetwork; +use ref_cast::RefCast; +use slog::{error, info}; +use uuid::Uuid; + +use nexus_db_errors::{ErrorHandler, public_error_from_diesel}; +use nexus_db_lookup::DbConnection; +use nexus_types::external_api::params; +use nexus_types::identity::Resource; +use omicron_common::api::external::http_pagination::PaginatedBy; +use omicron_common::api::external::{ + self, CreateResult, DataPageParams, DeleteResult, + IdentityMetadataCreateParams, ListResultVec, LookupResult, LookupType, + ResourceType, UpdateResult, +}; +use omicron_common::vlan::VlanID; +use omicron_uuid_kinds::{GenericUuid, MulticastGroupUuid}; + +use crate::authz; +use crate::context::OpContext; +use crate::db::datastore::DataStore; +use crate::db::model::{ + ExternalMulticastGroup, ExternalMulticastGroupUpdate, + IncompleteExternalMulticastGroup, IncompleteExternalMulticastGroupParams, + IpPoolType, MulticastGroup, MulticastGroupState, Name, + UnderlayMulticastGroup, Vni, +}; +use crate::db::pagination::paginated; +use crate::db::queries::external_multicast_group::NextExternalMulticastGroup; +use crate::db::update_and_check::{UpdateAndCheck, UpdateStatus}; + +/// Parameters for multicast group allocation. +#[derive(Debug, Clone)] +pub(crate) struct MulticastGroupAllocationParams { + pub identity: IdentityMetadataCreateParams, + pub ip: Option, + pub pool: Option, + pub source_ips: Option>, + pub mvlan: Option, +} + +impl DataStore { + /// List multicast groups by state. + /// + /// Used by RPW reconciler. + pub async fn multicast_groups_list_by_state( + &self, + opctx: &OpContext, + state: MulticastGroupState, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use nexus_db_schema::schema::multicast_group::dsl; + + paginated(dsl::multicast_group, dsl::id, pagparams) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.eq(state)) + .select(MulticastGroup::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Set multicast group state. + pub async fn multicast_group_set_state( + &self, + opctx: &OpContext, + group_id: MulticastGroupUuid, + new_state: MulticastGroupState, + ) -> UpdateResult<()> { + use nexus_db_schema::schema::multicast_group::dsl; + + let rows_updated = diesel::update(dsl::multicast_group) + .filter(dsl::id.eq(group_id.into_untyped_uuid())) + .filter(dsl::time_deleted.is_null()) + .set(( + dsl::state.eq(new_state), + dsl::time_modified.eq(diesel::dsl::now), + )) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + if rows_updated == 0 { + return Err(external::Error::not_found_by_id( + ResourceType::MulticastGroup, + &group_id.into_untyped_uuid(), + )); + } + + Ok(()) + } + + /// Allocate a new external multicast group. + /// + /// The external multicast IP is allocated from the specified pool or the + /// default multicast pool. + pub async fn multicast_group_create( + &self, + opctx: &OpContext, + params: ¶ms::MulticastGroupCreate, + authz_pool: Option, + ) -> CreateResult { + self.allocate_external_multicast_group( + opctx, + MulticastGroupAllocationParams { + identity: params.identity.clone(), + ip: params.multicast_ip, + pool: authz_pool, + source_ips: params.source_ips.clone(), + mvlan: params.mvlan, + }, + ) + .await + } + + /// Fetch an external multicast group by ID. + /// + /// See [`Self::multicast_group_fetch_on_conn`] for the connection-reusing + /// variant. + pub async fn multicast_group_fetch( + &self, + opctx: &OpContext, + group_id: MulticastGroupUuid, + ) -> LookupResult { + let conn = self.pool_connection_authorized(opctx).await?; + self.multicast_group_fetch_on_conn(&conn, group_id.into_untyped_uuid()) + .await + } + + /// Fetch an external multicast group using provided connection. + pub async fn multicast_group_fetch_on_conn( + &self, + conn: &async_bb8_diesel::Connection, + group_id: Uuid, + ) -> LookupResult { + use nexus_db_schema::schema::multicast_group::dsl; + + dsl::multicast_group + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(group_id)) + .select(ExternalMulticastGroup::as_select()) + .first_async(conn) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::MulticastGroup, + LookupType::ById(group_id.into_untyped_uuid()), + ), + ) + }) + } + + /// Lookup an external multicast group by IP address. + pub async fn multicast_group_lookup_by_ip( + &self, + opctx: &OpContext, + ip_addr: IpAddr, + ) -> LookupResult { + use nexus_db_schema::schema::multicast_group::dsl; + + dsl::multicast_group + .filter(dsl::time_deleted.is_null()) + .filter(dsl::multicast_ip.eq(IpNetwork::from(ip_addr))) + .select(ExternalMulticastGroup::as_select()) + .first_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::MulticastGroup, + LookupType::ByName(ip_addr.to_string()), + ), + ) + }) + } + + /// List multicast groups (fleet-wide). + pub async fn multicast_groups_list( + &self, + opctx: &OpContext, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { + use nexus_db_schema::schema::multicast_group::dsl; + + match pagparams { + PaginatedBy::Id(pagparams) => { + paginated(dsl::multicast_group, dsl::id, pagparams) + } + PaginatedBy::Name(pagparams) => paginated( + dsl::multicast_group, + dsl::name, + &pagparams.map_name(|n| Name::ref_cast(n)), + ), + } + .filter(dsl::time_deleted.is_null()) + .select(ExternalMulticastGroup::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Update a multicast group. + pub async fn multicast_group_update( + &self, + opctx: &OpContext, + group_id: MulticastGroupUuid, + params: ¶ms::MulticastGroupUpdate, + ) -> UpdateResult { + use nexus_db_schema::schema::multicast_group::dsl; + + // Create update struct with mvlan=None (won't update field) + let mut update = ExternalMulticastGroupUpdate::from(params.clone()); + + // Handle mvlan manually like VpcSubnetUpdate handles custom_router_id + // - None: leave as None (don't update field) + // - Some(Nullable(Some(v))): set to update field to value + // - Some(Nullable(None)): set to update field to NULL + if let Some(mvlan) = ¶ms.mvlan { + update.mvlan = Some(mvlan.0.map(|vlan| u16::from(vlan) as i16)); + } + + diesel::update(dsl::multicast_group) + .filter(dsl::id.eq(group_id.into_untyped_uuid())) + .filter(dsl::time_deleted.is_null()) + .set(update) + .returning(ExternalMulticastGroup::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::MulticastGroup, + LookupType::ById(group_id.into_untyped_uuid()), + ), + ) + }) + } + + /// Mark a multicast group for deletion by transitioning to "DELETING" state. + /// + /// Unlike members (which use `time_deleted` to distinguish temporary vs + /// permanent removal), groups use a simpler model: + /// - "DELETING" state = permanent removal in progress + /// - RPW reconciler handles cleanup then removes the row entirely + /// - `time_deleted` is only set as final step before row deletion + /// + /// The group remains visible in queries until the reconciler completes + /// cleanup and hard-deletes the row. + pub async fn mark_multicast_group_for_removal( + &self, + opctx: &OpContext, + group_id: MulticastGroupUuid, + ) -> DeleteResult { + use nexus_db_schema::schema::multicast_group::dsl; + let now = Utc::now(); + + diesel::update(dsl::multicast_group) + .filter(dsl::id.eq(group_id.into_untyped_uuid())) + .filter( + dsl::state + .eq(MulticastGroupState::Active) + .or(dsl::state.eq(MulticastGroupState::Creating)), + ) + .filter(dsl::time_deleted.is_null()) + .set(( + dsl::state.eq(MulticastGroupState::Deleting), + dsl::time_modified.eq(now), + )) + .returning(ExternalMulticastGroup::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::MulticastGroup, + LookupType::ById(group_id.into_untyped_uuid()), + ), + ) + })?; + + Ok(()) + } + + /// Delete a multicast group permanently. + pub async fn multicast_group_delete( + &self, + opctx: &OpContext, + group_id: MulticastGroupUuid, + ) -> DeleteResult { + use nexus_db_schema::schema::multicast_group::dsl; + + diesel::delete(dsl::multicast_group) + .filter(dsl::id.eq(group_id.into_untyped_uuid())) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map(|_| ()) + } + + /// Allocate an external multicast group from an IP Pool. + /// + /// See [`Self::allocate_external_multicast_group_on_conn`] for the connection-reusing variant. + pub(crate) async fn allocate_external_multicast_group( + &self, + opctx: &OpContext, + params: MulticastGroupAllocationParams, + ) -> CreateResult { + let group_id = Uuid::new_v4(); + let authz_pool = self + .resolve_pool_for_allocation( + opctx, + params.pool, + IpPoolType::Multicast, + ) + .await?; + + // Enforce ASM/SSM semantics when allocating from a pool: + // - If sources are provided without an explicit IP (implicit allocation), + // the pool must be SSM so we allocate an SSM address. + // - If the pool is SSM and sources are empty/missing, reject. + let sources_empty = + params.source_ips.as_ref().map(|v| v.is_empty()).unwrap_or(true); + + let pool_is_ssm = + self.multicast_pool_is_ssm(opctx, authz_pool.id()).await?; + + if !sources_empty && params.ip.is_none() && !pool_is_ssm { + let pool_id = authz_pool.id(); + return Err(external::Error::invalid_request(&format!( + "Cannot allocate SSM multicast group from ASM pool {pool_id}. Choose a multicast pool with SSM ranges (IPv4 232/8, IPv6 FF3x::/32) or provide an explicit SSM address." + ))); + } + + if sources_empty && pool_is_ssm { + let pool_id = authz_pool.id(); + return Err(external::Error::invalid_request(&format!( + "SSM multicast pool {pool_id} requires one or more source IPs" + ))); + } + + // Prepare source IPs from params if provided + let source_ip_networks: Vec = params + .source_ips + .as_ref() + .map(|source_ips| { + source_ips.iter().map(|ip| IpNetwork::from(*ip)).collect() + }) + .unwrap_or_default(); + + // Fleet-scoped multicast groups always use DEFAULT_MULTICAST_VNI (77). + // This reserved VNI is below MIN_GUEST_VNI (1024) and provides consistent + // behavior across all multicast groups. VNI is not derived from VPC since + // groups are fleet-wide and can span multiple projects/VPCs. + let vni = Vni(external::Vni::DEFAULT_MULTICAST_VNI); + + // Create the incomplete group + let data = IncompleteExternalMulticastGroup::new( + IncompleteExternalMulticastGroupParams { + id: group_id, + name: Name(params.identity.name.clone()), + description: params.identity.description.clone(), + ip_pool_id: authz_pool.id(), + explicit_address: params.ip, + source_ips: source_ip_networks, + mvlan: params.mvlan.map(|vlan_id| u16::from(vlan_id) as i16), + vni, + // Set DPD tag to the group name to couple overlay/underlay entries + // for this multicast group (kept in sync on rename) + tag: Some(params.identity.name.to_string()), + }, + ); + + let conn = self.pool_connection_authorized(opctx).await?; + Self::allocate_external_multicast_group_on_conn(&conn, data).await + } + + /// Allocate an external multicast group using provided connection. + pub(crate) async fn allocate_external_multicast_group_on_conn( + conn: &async_bb8_diesel::Connection, + data: IncompleteExternalMulticastGroup, + ) -> Result { + let name = data.name.to_string(); + let explicit_ip = data.explicit_address.is_some(); + + NextExternalMulticastGroup::new(data).get_result_async(conn).await.map_err(|e| { + match e { + NotFound => { + if explicit_ip { + external::Error::invalid_request( + "Requested multicast IP address is not available in the specified pool range", + ) + } else { + external::Error::insufficient_capacity( + "No multicast IP addresses available", + "NextExternalMulticastGroup::new returned NotFound", + ) + } + } + // Multicast group: name conflict + DatabaseError(UniqueViolation, ..) => { + public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::MulticastGroup, + &name, + ), + ) + } + _ => { + crate::db::queries::external_multicast_group::from_diesel(e) + } + } + }) + } + + /// Deallocate an external multicast group address for IP pool cleanup. + /// + /// This marks the group's IP address as deallocated by setting `time_deleted`, + /// releasing it back to the pool. This is NOT the user-initiated deletion path. + /// + /// User-initiated deletion uses `mark_multicast_group_for_removal` which + /// transitions to "Deleting" state for RPW cleanup before row removal. + /// + /// Returns `Ok(true)` if the group was deallocated, `Ok(false)` if it was + /// already deleted (i.e., `time_deleted` was already set), `Err(_)` for any + /// other condition including non-existent record. + pub async fn deallocate_external_multicast_group( + &self, + opctx: &OpContext, + group_id: MulticastGroupUuid, + ) -> Result { + let conn = self.pool_connection_authorized(opctx).await?; + self.deallocate_external_multicast_group_on_conn( + &conn, + group_id.into_untyped_uuid(), + ) + .await + } + + /// Transaction-safe variant of deallocate_external_multicast_group. + pub(crate) async fn deallocate_external_multicast_group_on_conn( + &self, + conn: &async_bb8_diesel::Connection, + group_id: Uuid, + ) -> Result { + use nexus_db_schema::schema::multicast_group::dsl; + + let now = Utc::now(); + let result = diesel::update(dsl::multicast_group) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(group_id)) + .set(dsl::time_deleted.eq(now)) + .check_if_exists::(group_id) + .execute_and_check(conn) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::MulticastGroup, + LookupType::ById(group_id.into_untyped_uuid()), + ), + ) + })?; + + Ok(match result.status { + UpdateStatus::Updated => true, + UpdateStatus::NotUpdatedButExists => false, + }) + } + + /// Ensure an underlay multicast group exists for an external multicast + /// group. + pub async fn ensure_underlay_multicast_group( + &self, + opctx: &OpContext, + external_group: MulticastGroup, + multicast_ip: IpNetwork, + ) -> CreateResult { + use nexus_db_schema::schema::multicast_group::dsl as external_dsl; + use nexus_db_schema::schema::underlay_multicast_group::dsl as underlay_dsl; + + let external_group_id = external_group.id(); + let tag = external_group.tag; + + // Try to create new underlay multicast group, or get existing one if concurrent creation + let underlay_group = match diesel::insert_into( + underlay_dsl::underlay_multicast_group, + ) + .values(( + underlay_dsl::id.eq(Uuid::new_v4()), + underlay_dsl::time_created.eq(Utc::now()), + underlay_dsl::time_modified.eq(Utc::now()), + underlay_dsl::multicast_ip.eq(multicast_ip), + underlay_dsl::tag.eq(tag.clone()), + )) + .returning(UnderlayMulticastGroup::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + { + Ok(created_group) => { + info!( + opctx.log, + "Created new underlay multicast group"; + "group_id" => %created_group.id, + "multicast_ip" => %multicast_ip + ); + created_group + } + Err(e) => match e { + DatabaseError(UniqueViolation, ..) => { + // Concurrent creation - fetch the existing group + // This is expected behavior for idempotent operations + info!( + opctx.log, + "Concurrent underlay multicast group creation detected, fetching existing"; + "multicast_ip" => %multicast_ip, + ); + + underlay_dsl::underlay_multicast_group + .filter(underlay_dsl::multicast_ip.eq(multicast_ip)) + .filter(underlay_dsl::time_deleted.is_null()) + .first_async::( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })? + } + _ => { + error!( + opctx.log, + "Failed to create underlay multicast group"; + "error" => ?e, + "multicast_ip" => %multicast_ip, + "tag" => ?tag + ); + return Err(public_error_from_diesel( + e, + ErrorHandler::Server, + )); + } + }, + }; + + // Link the external group to the underlay group if not already linked + // This makes the function truly idempotent + if external_group.underlay_group_id != Some(underlay_group.id) { + diesel::update(external_dsl::multicast_group) + .filter(external_dsl::id.eq(external_group_id)) + .filter(external_dsl::time_deleted.is_null()) + .set(external_dsl::underlay_group_id.eq(underlay_group.id)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + } + + Ok(underlay_group) + } + + /// Fetch an underlay multicast group by ID. + pub async fn underlay_multicast_group_fetch( + &self, + opctx: &OpContext, + group_id: Uuid, + ) -> LookupResult { + self.underlay_multicast_group_fetch_on_conn( + &*self.pool_connection_authorized(opctx).await?, + group_id, + ) + .await + } + + /// Fetch underlay multicast group using provided connection. + pub async fn underlay_multicast_group_fetch_on_conn( + &self, + conn: &async_bb8_diesel::Connection, + group_id: Uuid, + ) -> LookupResult { + use nexus_db_schema::schema::underlay_multicast_group::dsl; + + dsl::underlay_multicast_group + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(group_id)) + .select(UnderlayMulticastGroup::as_select()) + .first_async(conn) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::MulticastGroup, + LookupType::ById(group_id.into_untyped_uuid()), + ), + ) + }) + } + + /// Delete an underlay multicast group permanently. + /// + /// This immediately removes the underlay group record from the database. It + /// sho¨ld only be called when the group is already removed from the switch + /// or when cleaning up failed operations. + pub async fn underlay_multicast_group_delete( + &self, + opctx: &OpContext, + group_id: Uuid, + ) -> DeleteResult { + use nexus_db_schema::schema::underlay_multicast_group::dsl; + + diesel::delete(dsl::underlay_multicast_group) + .filter(dsl::id.eq(group_id)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map(|_| ()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::net::Ipv4Addr; + + use nexus_types::identity::Resource; + use omicron_common::address::{IpRange, Ipv4Range}; + use omicron_common::api::external::{ + IdentityMetadataUpdateParams, NameOrId, + }; + use omicron_test_utils::dev; + use omicron_uuid_kinds::{ + GenericUuid, InstanceUuid, PropolisUuid, SledUuid, + }; + + use crate::db::datastore::Error; + use crate::db::datastore::LookupType; + use crate::db::model::IpPool; + use crate::db::model::{ + Generation, InstanceRuntimeState, IpPoolReservationType, + IpPoolResource, IpPoolResourceType, IpVersion, + MulticastGroupMemberState, + }; + use crate::db::pub_test_utils::helpers::{ + SledUpdateBuilder, create_instance_with_vmm, create_project, + create_stopped_instance_record, + }; + use crate::db::pub_test_utils::{TestDatabase, multicast}; + + async fn create_test_sled(datastore: &DataStore) -> SledUuid { + let sled_id = SledUuid::new_v4(); + let sled_update = SledUpdateBuilder::new().sled_id(sled_id).build(); + datastore.sled_upsert(sled_update).await.unwrap(); + sled_id + } + + #[tokio::test] + async fn test_multicast_group_datastore_pool_exhaustion() { + let logctx = + dev::test_setup_log("test_multicast_group_pool_exhaustion"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let pool_identity = IdentityMetadataCreateParams { + name: "exhaust-pool".parse().unwrap(), + description: "Pool exhaustion test".to_string(), + }; + + // Create multicast IP pool with very small range (2 addresses) + let ip_pool = datastore + .ip_pool_create( + &opctx, + IpPool::new_multicast( + &pool_identity, + IpVersion::V4, + IpPoolReservationType::ExternalSilos, + ), + ) + .await + .expect("Should create multicast IP pool"); + + let authz_pool = authz::IpPool::new( + authz::FLEET, + ip_pool.id(), + LookupType::ById(ip_pool.id()), + ); + let range = IpRange::V4( + // Only 2 addresses + Ipv4Range::new( + Ipv4Addr::new(224, 100, 2, 1), + Ipv4Addr::new(224, 100, 2, 2), + ) + .unwrap(), + ); + datastore + .ip_pool_add_range(&opctx, &authz_pool, &ip_pool, &range) + .await + .expect("Should add multicast range to pool"); + + let link = IpPoolResource { + resource_id: opctx.authn.silo_required().unwrap().id(), + resource_type: IpPoolResourceType::Silo, + ip_pool_id: ip_pool.id(), + is_default: false, + }; + datastore + .ip_pool_link_silo(&opctx, link) + .await + .expect("Should link multicast pool to silo"); + + // Allocate first address + let params1 = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "first-group".parse().unwrap(), + description: "First group".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name("exhaust-pool".parse().unwrap())), + mvlan: None, + }; + datastore + .multicast_group_create(&opctx, ¶ms1, Some(authz_pool.clone())) + .await + .expect("Should create first group"); + + // Allocate second address + let params2 = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "second-group".parse().unwrap(), + description: "Second group".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name("exhaust-pool".parse().unwrap())), + mvlan: None, + }; + datastore + .multicast_group_create(&opctx, ¶ms2, Some(authz_pool.clone())) + .await + .expect("Should create second group"); + + // Third allocation should fail due to exhaustion + let params3 = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "third-group".parse().unwrap(), + description: "Should fail".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name("exhaust-pool".parse().unwrap())), + mvlan: None, + }; + let result3 = datastore + .multicast_group_create(&opctx, ¶ms3, Some(authz_pool.clone())) + .await; + assert!( + result3.is_err(), + "Third allocation should fail due to pool exhaustion" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_datastore_default_pool_allocation() { + let logctx = + dev::test_setup_log("test_multicast_group_default_pool_allocation"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let pool_identity = IdentityMetadataCreateParams { + name: "default-multicast-pool".parse().unwrap(), + description: "Default pool allocation test".to_string(), + }; + let ip_pool = datastore + .ip_pool_create( + &opctx, + IpPool::new_multicast( + &pool_identity, + IpVersion::V4, + IpPoolReservationType::ExternalSilos, + ), + ) + .await + .expect("Should create multicast IP pool"); + + let authz_pool = authz::IpPool::new( + authz::FLEET, + ip_pool.id(), + external::LookupType::ById(ip_pool.id()), + ); + let range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 250, 1, 1), + Ipv4Addr::new(224, 250, 1, 10), + ) + .unwrap(), + ); + datastore + .ip_pool_add_range(&opctx, &authz_pool, &ip_pool, &range) + .await + .expect("Should add multicast range to pool"); + + let link = IpPoolResource { + resource_id: opctx.authn.silo_required().unwrap().id(), + resource_type: IpPoolResourceType::Silo, + ip_pool_id: ip_pool.id(), + is_default: true, // For default allocation + }; + datastore + .ip_pool_link_silo(&opctx, link) + .await + .expect("Should link multicast pool to silo"); + + // Create group without specifying pool (should use default) + let params_default = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "auto-alloc-group".parse().unwrap(), + description: "Group using default pool".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: None, // No pool specified - should use default + mvlan: None, + }; + + let group_default = datastore + .multicast_group_create(&opctx, ¶ms_default, None) + .await + .expect("Should create group from default pool"); + + assert_eq!(group_default.state, MulticastGroupState::Creating); + + // Verify the IP is from our default pool's range + let ip_str = group_default.multicast_ip.ip().to_string(); + assert!( + ip_str.starts_with("224.250.1."), + "IP should be from default pool range" + ); + + // Create group with explicit pool name + let params_explicit = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "explicit-alloc-group".parse().unwrap(), + description: "Group with explicit pool".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name( + "default-multicast-pool".parse().unwrap(), + )), + mvlan: None, + }; + let group_explicit = datastore + .multicast_group_create(&opctx, ¶ms_explicit, None) + .await + .expect("Should create group from explicit pool"); + + assert_eq!(group_explicit.state, MulticastGroupState::Creating); + + // Verify the explicit group also got an IP from the same default pool range + let ip_str_explicit = group_explicit.multicast_ip.ip().to_string(); + assert!( + ip_str_explicit.starts_with("224.250.1."), + "Explicit IP should also be from default pool range" + ); + + // Test state transitions on the default pool group + datastore + .multicast_group_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group_default.id()), + MulticastGroupState::Active, + ) + .await + .expect("Should transition default group to 'Active'"); + + let updated_group = datastore + .multicast_group_fetch( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group_default.id()), + ) + .await + .expect("Should fetch updated group"); + assert_eq!(updated_group.state, MulticastGroupState::Active); + + // Test list by state functionality + let pagparams = &DataPageParams { + marker: None, + limit: std::num::NonZeroU32::new(100).unwrap(), + direction: dropshot::PaginationOrder::Ascending, + }; + let active_groups = datastore + .multicast_groups_list_by_state( + &opctx, + MulticastGroupState::Active, + pagparams, + ) + .await + .expect("Should list active groups"); + assert!(active_groups.iter().any(|g| g.id() == group_default.id())); + + let creating_groups = datastore + .multicast_groups_list_by_state( + &opctx, + MulticastGroupState::Creating, + pagparams, + ) + .await + .expect("Should list creating groups"); + // The explicit group should still be "Creating" + assert!(creating_groups.iter().any(|g| g.id() == group_explicit.id())); + // The default group should not be in "Creating" anymore + assert!(!creating_groups.iter().any(|g| g.id() == group_default.id())); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_datastore_underlay_linkage() { + let logctx = + dev::test_setup_log("test_multicast_group_with_underlay_linkage"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let pool_identity = IdentityMetadataCreateParams { + name: "test-multicast-pool".parse().unwrap(), + description: "Comprehensive test pool".to_string(), + }; + let ip_pool = datastore + .ip_pool_create( + &opctx, + IpPool::new_multicast( + &pool_identity, + IpVersion::V4, + IpPoolReservationType::ExternalSilos, + ), + ) + .await + .expect("Should create multicast IP pool"); + + let authz_pool = authz::IpPool::new( + authz::FLEET, + ip_pool.id(), + external::LookupType::ById(ip_pool.id()), + ); + let range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 1, 3, 1), + Ipv4Addr::new(224, 1, 3, 5), + ) + .unwrap(), + ); + datastore + .ip_pool_add_range(&opctx, &authz_pool, &ip_pool, &range) + .await + .expect("Should add multicast range to pool"); + + let silo_id = opctx.authn.silo_required().unwrap().id(); + let link = IpPoolResource { + ip_pool_id: ip_pool.id(), + resource_type: IpPoolResourceType::Silo, + resource_id: silo_id, + is_default: false, + }; + datastore + .ip_pool_link_silo(&opctx, link) + .await + .expect("Should link multicast pool to silo"); + + // Create external multicast group with explicit address + let params = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "test-group".parse().unwrap(), + description: "Comprehensive test group".to_string(), + }, + multicast_ip: Some("224.1.3.3".parse().unwrap()), + source_ips: None, + pool: Some(NameOrId::Name("test-multicast-pool".parse().unwrap())), + mvlan: None, + }; + + let external_group = datastore + .multicast_group_create(&opctx, ¶ms, Some(authz_pool.clone())) + .await + .expect("Should create external group"); + + // Verify initial state + assert_eq!(external_group.multicast_ip.to_string(), "224.1.3.3/32"); + assert_eq!(external_group.state, MulticastGroupState::Creating); + // With RPW pattern, underlay_group_id is initially None in "Creating" state + assert_eq!(external_group.underlay_group_id, None); + + // Create underlay group using ensure method (this would normally be done by reconciler) + let underlay_group = datastore + .ensure_underlay_multicast_group( + &opctx, + external_group.clone(), + "ff04::1".parse().unwrap(), + ) + .await + .expect("Should create underlay group"); + + // Verify underlay group properties + assert!(underlay_group.multicast_ip.ip().is_ipv6()); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_member_operations_with_parent_id() { + let logctx = dev::test_setup_log( + "test_multicast_group_member_operations_with_parent_id", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Set up multicast IP pool and group + let pool_identity = IdentityMetadataCreateParams { + name: "parent-id-test-pool".parse().unwrap(), + description: "Pool for parent_id testing".to_string(), + }; + let ip_pool = datastore + .ip_pool_create( + &opctx, + IpPool::new_multicast( + &pool_identity, + IpVersion::V4, + IpPoolReservationType::ExternalSilos, + ), + ) + .await + .expect("Should create multicast IP pool"); + + let authz_pool = authz::IpPool::new( + authz::FLEET, + ip_pool.id(), + external::LookupType::ById(ip_pool.id()), + ); + let range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 3, 1, 1), + Ipv4Addr::new(224, 3, 1, 10), + ) + .unwrap(), + ); + datastore + .ip_pool_add_range(&opctx, &authz_pool, &ip_pool, &range) + .await + .expect("Should add multicast range to pool"); + + let silo_id = opctx.authn.silo_required().unwrap().id(); + let link = IpPoolResource { + ip_pool_id: ip_pool.id(), + resource_type: IpPoolResourceType::Silo, + resource_id: silo_id, + is_default: false, + }; + datastore + .ip_pool_link_silo(&opctx, link) + .await + .expect("Should link multicast pool to silo"); + + // Create test project for parent_id operations + let (authz_project, _project) = + create_project(&opctx, &datastore, "test-project").await; + + // Create a multicast group using the real project + let params = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "parent-id-test-group".parse().unwrap(), + description: "Group for parent_id testing".to_string(), + }, + multicast_ip: Some("224.3.1.5".parse().unwrap()), + source_ips: None, + pool: Some(NameOrId::Name("parent-id-test-pool".parse().unwrap())), + mvlan: None, + }; + + let group = datastore + .multicast_group_create(&opctx, ¶ms, Some(authz_pool.clone())) + .await + .expect("Should create multicast group"); + + // Create test sled and instances + let sled_id = create_test_sled(&datastore).await; + let instance_record_1 = create_stopped_instance_record( + &opctx, + &datastore, + &authz_project, + "test-instance-1", + ) + .await; + let parent_id_1 = instance_record_1.as_untyped_uuid(); + let instance_record_2 = create_stopped_instance_record( + &opctx, + &datastore, + &authz_project, + "test-instance-2", + ) + .await; + let parent_id_2 = instance_record_2.as_untyped_uuid(); + let instance_record_3 = create_stopped_instance_record( + &opctx, + &datastore, + &authz_project, + "test-instance-3", + ) + .await; + let parent_id_3 = instance_record_3.as_untyped_uuid(); + + // Create VMMs and associate instances with sled (required for multicast membership) + let vmm1_id = PropolisUuid::new_v4(); + let vmm1 = crate::db::model::Vmm::new( + vmm1_id, + InstanceUuid::from_untyped_uuid(*parent_id_1), + sled_id, + "127.0.0.1".parse().unwrap(), + 12400, + crate::db::model::VmmCpuPlatform::SledDefault, + ); + datastore.vmm_insert(&opctx, vmm1).await.expect("Should create VMM1"); + + let vmm2_id = PropolisUuid::new_v4(); + let vmm2 = crate::db::model::Vmm::new( + vmm2_id, + InstanceUuid::from_untyped_uuid(*parent_id_2), + sled_id, + "127.0.0.1".parse().unwrap(), + 12401, + crate::db::model::VmmCpuPlatform::SledDefault, + ); + datastore.vmm_insert(&opctx, vmm2).await.expect("Should create VMM2"); + + let vmm3_id = PropolisUuid::new_v4(); + let vmm3 = crate::db::model::Vmm::new( + vmm3_id, + InstanceUuid::from_untyped_uuid(*parent_id_3), + sled_id, + "127.0.0.1".parse().unwrap(), + 12402, + crate::db::model::VmmCpuPlatform::SledDefault, + ); + datastore.vmm_insert(&opctx, vmm3).await.expect("Should create VMM3"); + + // Update instances to point to their VMMs + let instance1 = datastore + .instance_refetch( + &opctx, + &authz::Instance::new( + authz_project.clone(), + instance_record_1.into_untyped_uuid(), + LookupType::by_id(instance_record_1), + ), + ) + .await + .expect("Should fetch instance1"); + datastore + .instance_update_runtime( + &instance_record_1, + &InstanceRuntimeState { + nexus_state: crate::db::model::InstanceState::Vmm, + propolis_id: Some(vmm1_id.into_untyped_uuid()), + dst_propolis_id: None, + migration_id: None, + gen: Generation::from(instance1.runtime().gen.next()), + time_updated: Utc::now(), + time_last_auto_restarted: None, + }, + ) + .await + .expect("Should set instance1 runtime state"); + + let instance2 = datastore + .instance_refetch( + &opctx, + &authz::Instance::new( + authz_project.clone(), + instance_record_2.into_untyped_uuid(), + LookupType::by_id(instance_record_2), + ), + ) + .await + .expect("Should fetch instance2"); + datastore + .instance_update_runtime( + &instance_record_2, + &InstanceRuntimeState { + nexus_state: crate::db::model::InstanceState::Vmm, + propolis_id: Some(vmm2_id.into_untyped_uuid()), + dst_propolis_id: None, + migration_id: None, + gen: Generation::from(instance2.runtime().gen.next()), + time_updated: Utc::now(), + time_last_auto_restarted: None, + }, + ) + .await + .expect("Should set instance2 runtime state"); + + let instance3 = datastore + .instance_refetch( + &opctx, + &authz::Instance::new( + authz_project.clone(), + instance_record_3.into_untyped_uuid(), + LookupType::by_id(instance_record_3), + ), + ) + .await + .expect("Should fetch instance3"); + datastore + .instance_update_runtime( + &instance_record_3, + &InstanceRuntimeState { + nexus_state: crate::db::model::InstanceState::Vmm, + propolis_id: Some(vmm3_id.into_untyped_uuid()), + dst_propolis_id: None, + migration_id: None, + gen: Generation::from(instance3.runtime().gen.next()), + time_updated: Utc::now(), + time_last_auto_restarted: None, + }, + ) + .await + .expect("Should set instance3 runtime state"); + + // Transition group to "Active" state before adding members + datastore + .multicast_group_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + MulticastGroupState::Active, + ) + .await + .expect("Should transition group to 'Active' state"); + + // Add members using parent_id + let member1 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(*parent_id_1), + ) + .await + .expect("Should add first member"); + + let member2 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(*parent_id_2), + ) + .await + .expect("Should add second member"); + + // Try to add the same parent_id again - should succeed idempotently + let duplicate_result = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(*parent_id_1), + ) + .await + .expect("Should handle duplicate add idempotently"); + + // Should return the same member (idempotent) + assert_eq!(duplicate_result.id, member1.id); + assert_eq!(duplicate_result.parent_id, member1.parent_id); + + // Verify member structure uses parent_id correctly + assert_eq!(member1.external_group_id, group.id()); + assert_eq!(member1.parent_id, *parent_id_1); + assert_eq!(member2.external_group_id, group.id()); + assert_eq!(member2.parent_id, *parent_id_2); + + // Verify generation sequence is working correctly + // (database assigns sequential values) + let gen1 = member1.version_added; + let gen2 = member2.version_added; + assert!( + i64::from(&*gen1) > 0, + "First member should have positive generation number" + ); + assert!( + gen2 > gen1, + "Second member should have higher generation than first" + ); + + // List members + let pagparams = &DataPageParams { + marker: None, + limit: std::num::NonZeroU32::new(100).unwrap(), + direction: dropshot::PaginationOrder::Ascending, + }; + + let members = datastore + .multicast_group_members_list( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + pagparams, + ) + .await + .expect("Should list members"); + + assert_eq!(members.len(), 2); + assert!(members.iter().any(|m| m.parent_id == *parent_id_1)); + assert!(members.iter().any(|m| m.parent_id == *parent_id_2)); + + // Remove member by parent_id + datastore + .multicast_group_member_detach_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(*parent_id_1), + ) + .await + .expect("Should remove first member"); + + // Verify only one active member remains + let all_members = datastore + .multicast_group_members_list( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + pagparams, + ) + .await + .expect("Should list remaining members"); + + // Filter for active members (non-"Left" state) + let active_members: Vec<_> = all_members + .into_iter() + .filter(|m| m.state != MulticastGroupMemberState::Left) + .collect(); + + assert_eq!(active_members.len(), 1); + assert_eq!(active_members[0].parent_id, *parent_id_2); + + // Verify member removal doesn't affect the group + let updated_group = datastore + .multicast_group_fetch( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + ) + .await + .expect("Should fetch group after member removal"); + assert_eq!(updated_group.id(), group.id()); + assert_eq!(updated_group.multicast_ip, group.multicast_ip); + + // Add member back and remove all + datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(*parent_id_1), + ) + .await + .expect("Should re-add first member"); + + datastore + .multicast_group_member_detach_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(*parent_id_1), + ) + .await + .expect("Should remove first member again"); + + datastore + .multicast_group_member_detach_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(*parent_id_2), + ) + .await + .expect("Should remove second member"); + + // Verify no active members remain + let all_final_members = datastore + .multicast_group_members_list( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + pagparams, + ) + .await + .expect("Should list final members"); + + // Filter for active members (non-"Left" state) + let active_final_members: Vec<_> = all_final_members + .into_iter() + .filter(|m| m.state != MulticastGroupMemberState::Left) + .collect(); + + assert_eq!(active_final_members.len(), 0); + + // Add a member with the third parent_id to verify different parent + // types work + let member3 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(*parent_id_3), + ) + .await + .expect("Should add third member with different parent_id"); + + assert_eq!(member3.external_group_id, group.id()); + assert_eq!(member3.parent_id, *parent_id_3); + + // Verify generation continues to increment properly + let gen3 = member3.version_added; + assert!( + gen3 > gen2, + "Third member should have higher generation than second" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_member_duplicate_prevention() { + let logctx = dev::test_setup_log( + "test_multicast_group_member_duplicate_prevention", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Set up multicast IP pool and group + let pool_identity = IdentityMetadataCreateParams { + name: "duplicate-test-pool".parse().unwrap(), + description: "Pool for duplicate testing".to_string(), + }; + let ip_pool = datastore + .ip_pool_create( + &opctx, + IpPool::new_multicast( + &pool_identity, + IpVersion::V4, + IpPoolReservationType::ExternalSilos, + ), + ) + .await + .expect("Should create multicast IP pool"); + + let authz_pool = authz::IpPool::new( + authz::FLEET, + ip_pool.id(), + external::LookupType::ById(ip_pool.id()), + ); + let range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 3, 1, 1), + Ipv4Addr::new(224, 3, 1, 10), + ) + .unwrap(), + ); + datastore + .ip_pool_add_range(&opctx, &authz_pool, &ip_pool, &range) + .await + .expect("Should add multicast range to pool"); + + let silo_id = opctx.authn.silo_required().unwrap().id(); + let link = IpPoolResource { + ip_pool_id: ip_pool.id(), + resource_type: IpPoolResourceType::Silo, + resource_id: silo_id, + is_default: false, + }; + datastore + .ip_pool_link_silo(&opctx, link) + .await + .expect("Should link multicast pool to silo"); + + // Create test project, sled and instance for duplicate testing + let (authz_project, _project) = + create_project(&opctx, &datastore, "dup-test-proj").await; + let sled_id = create_test_sled(&datastore).await; + let instance_record = create_stopped_instance_record( + &opctx, + &datastore, + &authz_project, + "dup-test-instance", + ) + .await; + let parent_id = instance_record.as_untyped_uuid(); + + // Create VMM and associate instance with sled (required for multicast membership) + let vmm_id = PropolisUuid::new_v4(); + let vmm = crate::db::model::Vmm::new( + vmm_id, + InstanceUuid::from_untyped_uuid(*parent_id), + sled_id, + "127.0.0.1".parse().unwrap(), + 12400, + crate::db::model::VmmCpuPlatform::SledDefault, + ); + datastore.vmm_insert(&opctx, vmm).await.expect("Should create VMM"); + + // Update instance to point to the VMM (increment generation for update to succeed) + let instance = datastore + .instance_refetch( + &opctx, + &authz::Instance::new( + authz_project.clone(), + instance_record.into_untyped_uuid(), + LookupType::by_id(instance_record), + ), + ) + .await + .expect("Should fetch instance"); + datastore + .instance_update_runtime( + &instance_record, + &InstanceRuntimeState { + nexus_state: crate::db::model::InstanceState::Vmm, + propolis_id: Some(vmm_id.into_untyped_uuid()), + dst_propolis_id: None, + migration_id: None, + gen: Generation::from(instance.runtime().gen.next()), + time_updated: Utc::now(), + time_last_auto_restarted: None, + }, + ) + .await + .expect("Should set instance runtime state"); + + let params = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "duplicate-test-group".parse().unwrap(), + description: "Group for duplicate testing".to_string(), + }, + multicast_ip: Some("224.3.1.5".parse().unwrap()), + source_ips: None, + pool: Some(NameOrId::Name("duplicate-test-pool".parse().unwrap())), + mvlan: None, + }; + + let group = datastore + .multicast_group_create(&opctx, ¶ms, Some(authz_pool.clone())) + .await + .expect("Should create multicast group"); + + // Transition group to "Active" state before adding members + datastore + .multicast_group_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + MulticastGroupState::Active, + ) + .await + .expect("Should transition group to 'Active' state"); + + // Add member first time - should succeed + let member1 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(*parent_id), + ) + .await + .expect("Should add member first time"); + + // Try to add same parent_id again - this should either: + // 1. Fail with a conflict error, or + // 2. Succeed if the system allows multiple entries (which we can test) + let result2 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(*parent_id), + ) + .await; + + // Second attempt should succeed idempotently (return existing member) + let member2 = + result2.expect("Should handle duplicate add idempotently"); + + // Should return the same member (idempotent) + assert_eq!(member2.id, member1.id); + assert_eq!(member2.parent_id, *parent_id); + + // Verify only one member exists + let pagparams = &DataPageParams { + marker: None, + limit: std::num::NonZeroU32::new(100).unwrap(), + direction: dropshot::PaginationOrder::Ascending, + }; + + let members = datastore + .multicast_group_members_list( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + pagparams, + ) + .await + .expect("Should list members"); + + assert_eq!(members.len(), 1); + assert_eq!(members[0].parent_id, *parent_id); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_member_state_transitions_datastore() { + let logctx = dev::test_setup_log( + "test_multicast_group_member_state_transitions_datastore", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Set up multicast IP pool and group + let pool_identity = IdentityMetadataCreateParams { + name: "state-test-pool".parse().unwrap(), + description: "Pool for state transition testing".to_string(), + }; + let ip_pool = datastore + .ip_pool_create( + &opctx, + IpPool::new_multicast( + &pool_identity, + IpVersion::V4, + IpPoolReservationType::ExternalSilos, + ), + ) + .await + .expect("Should create multicast IP pool"); + + let authz_pool = authz::IpPool::new( + authz::FLEET, + ip_pool.id(), + LookupType::ById(ip_pool.id()), + ); + let range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 4, 1, 1), + Ipv4Addr::new(224, 4, 1, 10), + ) + .unwrap(), + ); + datastore + .ip_pool_add_range(&opctx, &authz_pool, &ip_pool, &range) + .await + .expect("Should add multicast range to pool"); + + let silo_id = opctx.authn.silo_required().unwrap().id(); + let link = IpPoolResource { + ip_pool_id: ip_pool.id(), + resource_type: IpPoolResourceType::Silo, + resource_id: silo_id, + is_default: false, + }; + datastore + .ip_pool_link_silo(&opctx, link) + .await + .expect("Should link pool to silo"); + + // Create multicast group (datastore-only; not exercising reconciler) + let group_params = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "state-test-group".parse().unwrap(), + description: "Group for testing member state transitions" + .to_string(), + }, + multicast_ip: None, // Let it allocate from pool + source_ips: None, + pool: Some(NameOrId::Name("state-test-pool".parse().unwrap())), + mvlan: None, + }; + let group = datastore + .multicast_group_create( + &opctx, + &group_params, + Some(authz_pool.clone()), + ) + .await + .expect("Should create multicast group"); + + // Create test project and instance (datastore-only) + let (authz_project, _project) = + create_project(&opctx, &datastore, "state-test-proj").await; + let sled_id = create_test_sled(&datastore).await; + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &authz_project, + "state-test-instance", + sled_id, + ) + .await; + let test_instance_id = instance.into_untyped_uuid(); + + // Transition group to "Active" state before adding members + datastore + .multicast_group_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + MulticastGroupState::Active, + ) + .await + .expect("Should transition group to 'Active' state"); + + // Create member record in "Joining" state using datastore API + let member = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(test_instance_id), + ) + .await + .expect("Should create member record"); + + assert_eq!(member.state, MulticastGroupMemberState::Joining); + assert_eq!(member.parent_id, test_instance_id); + + // Test: Transition from "Joining" → "Joined" (simulating what the reconciler would do) + datastore + .multicast_group_member_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(test_instance_id), + MulticastGroupMemberState::Joined, + ) + .await + .expect("Should transition to 'Joined'"); + + // Verify member is now "Active" + let pagparams = &DataPageParams { + marker: None, + limit: std::num::NonZeroU32::new(100).unwrap(), + direction: dropshot::PaginationOrder::Ascending, + }; + + let members = datastore + .multicast_group_members_list( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + pagparams, + ) + .await + .expect("Should list members"); + + assert_eq!(members.len(), 1); + assert_eq!(members[0].state, MulticastGroupMemberState::Joined); + + // Test: Transition member to "Left" state (without permanent deletion) + datastore + .multicast_group_member_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(test_instance_id), + MulticastGroupMemberState::Left, + ) + .await + .expect("Should transition to 'Left' state"); + + // Verify member is now in "Left" state + let all_members = datastore + .multicast_group_members_list_by_id( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + pagparams, + ) + .await + .expect("Should list all members"); + + assert_eq!(all_members.len(), 1); + + // Verify only "Active" members are shown (filter out Left members) + let all_members = datastore + .multicast_group_members_list( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + pagparams, + ) + .await + .expect("Should list all members"); + + // Filter for "Active" members (non-"Left" state) + let active_members: Vec<_> = all_members + .into_iter() + .filter(|m| m.state != MulticastGroupMemberState::Left) + .collect(); + + assert_eq!( + active_members.len(), + 0, + "Active member list should filter out Left members" + ); + + // Complete removal (→ "Left") + datastore + .multicast_group_member_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(test_instance_id), + MulticastGroupMemberState::Left, + ) + .await + .expect("Should transition to Left"); + + // Member should still exist in database and be in "Left" state + let members = datastore + .multicast_group_members_list_by_id( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + pagparams, + ) + .await + .expect("Should list members"); + + assert_eq!(members.len(), 1); + assert_eq!(members[0].state, MulticastGroupMemberState::Left); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_ip_reuse_after_deletion() { + let logctx = + dev::test_setup_log("test_multicast_group_ip_reuse_after_deletion"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Set up multicast IP pool + let pool_identity = IdentityMetadataCreateParams { + name: "reuse-test-pool".parse().unwrap(), + description: "Pool for IP reuse testing".to_string(), + }; + let ip_pool = datastore + .ip_pool_create( + &opctx, + IpPool::new_multicast( + &pool_identity, + IpVersion::V4, + IpPoolReservationType::ExternalSilos, + ), + ) + .await + .expect("Should create multicast IP pool"); + + let authz_pool = authz::IpPool::new( + authz::FLEET, + ip_pool.id(), + external::LookupType::ById(ip_pool.id()), + ); + let range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 10, 1, 100), + Ipv4Addr::new(224, 10, 1, 102), // Only 3 addresses + ) + .unwrap(), + ); + datastore + .ip_pool_add_range(&opctx, &authz_pool, &ip_pool, &range) + .await + .expect("Should add multicast range to pool"); + + let silo_id = opctx.authn.silo_required().unwrap().id(); + let link = IpPoolResource { + ip_pool_id: ip_pool.id(), + resource_type: IpPoolResourceType::Silo, + resource_id: silo_id, + is_default: false, + }; + datastore + .ip_pool_link_silo(&opctx, link) + .await + .expect("Should link pool to silo"); + + // Create group with specific IP + let target_ip = "224.10.1.101".parse().unwrap(); + let params = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "reuse-test".parse().unwrap(), + description: "Group for IP reuse test".to_string(), + }, + multicast_ip: Some(target_ip), + source_ips: None, + pool: Some(NameOrId::Name("reuse-test-pool".parse().unwrap())), + mvlan: None, + }; + + let group1 = datastore + .multicast_group_create(&opctx, ¶ms, Some(authz_pool.clone())) + .await + .expect("Should create first group"); + assert_eq!(group1.multicast_ip.ip(), target_ip); + + // Delete the group completely (time_deleted set) + let deleted = datastore + .deallocate_external_multicast_group( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group1.id()), + ) + .await + .expect("Should deallocate group"); + assert_eq!(deleted, true, "Should successfully deallocate the group"); + + // Create another group with the same IP - should succeed due to time_deleted filtering + let params2 = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "reuse-test-2".parse().unwrap(), + description: "Second group reusing same IP".to_string(), + }, + multicast_ip: Some(target_ip), + source_ips: None, + pool: Some(NameOrId::Name("reuse-test-pool".parse().unwrap())), + mvlan: None, + }; + + let group2 = datastore + .multicast_group_create( + &opctx, + ¶ms2, + Some(authz_pool.clone()), + ) + .await + .expect("Should create second group with same IP after first was deleted"); + assert_eq!(group2.multicast_ip.ip(), target_ip); + assert_ne!( + group1.id(), + group2.id(), + "Should be different group instances" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_pool_exhaustion_delete_create_cycle() { + let logctx = dev::test_setup_log( + "test_multicast_group_pool_exhaustion_delete_create_cycle", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Set up small pool (only 1 address) + let pool_identity = IdentityMetadataCreateParams { + name: "cycle-test-pool".parse().unwrap(), + description: "Pool for exhaustion-delete-create cycle testing" + .to_string(), + }; + let ip_pool = datastore + .ip_pool_create( + &opctx, + IpPool::new_multicast( + &pool_identity, + IpVersion::V4, + IpPoolReservationType::ExternalSilos, + ), + ) + .await + .expect("Should create multicast IP pool"); + + let authz_pool = authz::IpPool::new( + authz::FLEET, + ip_pool.id(), + external::LookupType::ById(ip_pool.id()), + ); + let range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 20, 1, 50), // Only 1 address + Ipv4Addr::new(224, 20, 1, 50), + ) + .unwrap(), + ); + datastore + .ip_pool_add_range(&opctx, &authz_pool, &ip_pool, &range) + .await + .expect("Should add multicast range to pool"); + + let silo_id = opctx.authn.silo_required().unwrap().id(); + let link = IpPoolResource { + ip_pool_id: ip_pool.id(), + resource_type: IpPoolResourceType::Silo, + resource_id: silo_id, + is_default: false, + }; + datastore + .ip_pool_link_silo(&opctx, link) + .await + .expect("Should link pool to silo"); + + // Exhaust the pool + let params1 = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "cycle-test-1".parse().unwrap(), + description: "First group to exhaust pool".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name("cycle-test-pool".parse().unwrap())), + mvlan: None, + }; + + let group1 = datastore + .multicast_group_create(&opctx, ¶ms1, Some(authz_pool.clone())) + .await + .expect("Should create first group"); + let allocated_ip = group1.multicast_ip.ip(); + + // Try to create another group - should fail due to exhaustion + let params2 = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "cycle-test-2".parse().unwrap(), + description: "Second group should fail".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name("cycle-test-pool".parse().unwrap())), + mvlan: None, + }; + + let result2 = datastore + .multicast_group_create(&opctx, ¶ms2, Some(authz_pool.clone())) + .await; + assert!( + result2.is_err(), + "Second group creation should fail due to pool exhaustion" + ); + + // Delete the first group to free up the IP + let deleted = datastore + .deallocate_external_multicast_group( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group1.id()), + ) + .await + .expect("Should deallocate first group"); + assert_eq!(deleted, true, "Should successfully deallocate the group"); + + // Now creating a new group should succeed + let params3 = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "cycle-test-3".parse().unwrap(), + description: "Third group should succeed after deletion" + .to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name("cycle-test-pool".parse().unwrap())), + mvlan: None, + }; + + let group3 = datastore + .multicast_group_create(&opctx, ¶ms3, Some(authz_pool.clone())) + .await + .expect("Should create third group after first was deleted"); + + // Should reuse the same IP address + assert_eq!( + group3.multicast_ip.ip(), + allocated_ip, + "Should reuse the same IP address" + ); + assert_ne!( + group1.id(), + group3.id(), + "Should be different group instances" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_deallocation_return_values() { + let logctx = dev::test_setup_log( + "test_multicast_group_deallocation_return_values", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Set up multicast IP pool + let pool_identity = IdentityMetadataCreateParams { + name: "dealloc-test-pool".parse().unwrap(), + description: "Pool for deallocation testing".to_string(), + }; + let ip_pool = datastore + .ip_pool_create( + &opctx, + IpPool::new_multicast( + &pool_identity, + IpVersion::V4, + IpPoolReservationType::ExternalSilos, + ), + ) + .await + .expect("Should create multicast IP pool"); + + let authz_pool = authz::IpPool::new( + authz::FLEET, + ip_pool.id(), + external::LookupType::ById(ip_pool.id()), + ); + let range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 30, 1, 1), + Ipv4Addr::new(224, 30, 1, 5), + ) + .unwrap(), + ); + datastore + .ip_pool_add_range(&opctx, &authz_pool, &ip_pool, &range) + .await + .expect("Should add multicast range to pool"); + + let silo_id = opctx.authn.silo_required().unwrap().id(); + let link = IpPoolResource { + ip_pool_id: ip_pool.id(), + resource_type: IpPoolResourceType::Silo, + resource_id: silo_id, + is_default: false, + }; + datastore + .ip_pool_link_silo(&opctx, link) + .await + .expect("Should link pool to silo"); + + // Create a group + let params = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "dealloc-test".parse().unwrap(), + description: "Group for deallocation testing".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name("dealloc-test-pool".parse().unwrap())), + mvlan: None, + }; + + let group = datastore + .multicast_group_create(&opctx, ¶ms, Some(authz_pool.clone())) + .await + .expect("Should create multicast group"); + + // Deallocate existing group - should return true + let result1 = datastore + .deallocate_external_multicast_group( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + ) + .await + .expect("Deallocation should succeed"); + assert_eq!( + result1, true, + "Deallocating existing group should return true" + ); + + // Deallocate the same group again - should return false (already deleted) + let result2 = datastore + .deallocate_external_multicast_group( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + ) + .await + .expect("Second deallocation should succeed but return false"); + assert_eq!( + result2, false, + "Deallocating already-deleted group should return false" + ); + + // Try to deallocate non-existent group - should return error + let fake_id = Uuid::new_v4(); + let result3 = datastore + .deallocate_external_multicast_group( + &opctx, + MulticastGroupUuid::from_untyped_uuid(fake_id), + ) + .await; + assert!( + result3.is_err(), + "Deallocating non-existent group should return an error" + ); + + // Verify it's the expected NotFound error + match result3.unwrap_err() { + external::Error::ObjectNotFound { .. } => { + // This is expected + } + other => panic!("Expected ObjectNotFound error, got: {:?}", other), + } + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_create_and_fetch() { + let logctx = + dev::test_setup_log("test_multicast_group_create_and_fetch"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Create project for multicast groups + + // Create IP pool + let pool_identity = IdentityMetadataCreateParams { + name: "fetch-test-pool".parse().unwrap(), + description: "Test pool for fetch operations".to_string(), + }; + + let ip_pool = datastore + .ip_pool_create( + &opctx, + IpPool::new_multicast( + &pool_identity, + IpVersion::V4, + IpPoolReservationType::ExternalSilos, + ), + ) + .await + .expect("Should create multicast IP pool"); + + let authz_pool = authz::IpPool::new( + authz::FLEET, + ip_pool.id(), + LookupType::ById(ip_pool.id()), + ); + + let range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 100, 10, 1), + Ipv4Addr::new(224, 100, 10, 100), + ) + .unwrap(), + ); + + datastore + .ip_pool_add_range(&opctx, &authz_pool, &ip_pool, &range) + .await + .expect("Should add range to pool"); + + let link = IpPoolResource { + resource_id: opctx.authn.silo_required().unwrap().id(), + resource_type: IpPoolResourceType::Silo, + ip_pool_id: ip_pool.id(), + is_default: false, + }; + datastore + .ip_pool_link_silo(&opctx, link) + .await + .expect("Should link multicast pool to silo"); + + // Test creating a multicast group + let params = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "fetch-test-group".parse().unwrap(), + description: "Test group for fetch operations".to_string(), + }, + multicast_ip: Some("224.100.10.5".parse().unwrap()), + source_ips: Some(vec![ + "10.0.0.1".parse().unwrap(), + "10.0.0.2".parse().unwrap(), + ]), + pool: Some(NameOrId::Name("fetch-test-pool".parse().unwrap())), + mvlan: None, + }; + + let group = datastore + .multicast_group_create(&opctx, ¶ms, Some(authz_pool)) + .await + .expect("Should create multicast group"); + + // Test fetching the created group + let fetched_group = datastore + .multicast_group_fetch( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + ) + .await + .expect("Should fetch created group"); + + assert_eq!(group.id(), fetched_group.id()); + assert_eq!(group.name(), fetched_group.name()); + assert_eq!(group.description(), fetched_group.description()); + assert_eq!(group.multicast_ip, fetched_group.multicast_ip); + assert_eq!(group.source_ips, fetched_group.source_ips); + assert_eq!(group.state, MulticastGroupState::Creating); + + // Test fetching non-existent group + let fake_id = Uuid::new_v4(); + let result = datastore + .multicast_group_fetch( + &opctx, + MulticastGroupUuid::from_untyped_uuid(fake_id), + ) + .await; + assert!(result.is_err()); + match result.unwrap_err() { + external::Error::ObjectNotFound { .. } => { + // Expected + } + other => panic!("Expected ObjectNotFound, got: {:?}", other), + } + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_list_fleet_wide() { + let logctx = + dev::test_setup_log("test_multicast_group_list_fleet_wide"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Create IP pool + let pool_identity = IdentityMetadataCreateParams { + name: "list-test-pool".parse().unwrap(), + description: "Test pool for list operations".to_string(), + }; + + let ip_pool = datastore + .ip_pool_create( + &opctx, + IpPool::new_multicast( + &pool_identity, + IpVersion::V4, + IpPoolReservationType::ExternalSilos, + ), + ) + .await + .expect("Should create multicast IP pool"); + + let authz_pool = authz::IpPool::new( + authz::FLEET, + ip_pool.id(), + LookupType::ById(ip_pool.id()), + ); + + let range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 100, 20, 1), + Ipv4Addr::new(224, 100, 20, 100), + ) + .unwrap(), + ); + + datastore + .ip_pool_add_range(&opctx, &authz_pool, &ip_pool, &range) + .await + .expect("Should add range to pool"); + + let link = IpPoolResource { + resource_id: opctx.authn.silo_required().unwrap().id(), + resource_type: IpPoolResourceType::Silo, + ip_pool_id: ip_pool.id(), + is_default: false, + }; + datastore + .ip_pool_link_silo(&opctx, link) + .await + .expect("Should link multicast pool to silo"); + + // Create fleet-wide multicast groups + let params_1 = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "fleet-group-1".parse().unwrap(), + description: "Fleet-wide group 1".to_string(), + }, + multicast_ip: Some("224.100.20.10".parse().unwrap()), + source_ips: None, + pool: Some(NameOrId::Name("list-test-pool".parse().unwrap())), + mvlan: None, + }; + + let params_2 = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "fleet-group-2".parse().unwrap(), + description: "Fleet-wide group 2".to_string(), + }, + multicast_ip: Some("224.100.20.11".parse().unwrap()), + source_ips: None, + pool: Some(NameOrId::Name("list-test-pool".parse().unwrap())), + mvlan: None, + }; + + let params_3 = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "fleet-group-3".parse().unwrap(), + description: "Fleet-wide group 3".to_string(), + }, + multicast_ip: Some("224.100.20.12".parse().unwrap()), + source_ips: None, + pool: Some(NameOrId::Name("list-test-pool".parse().unwrap())), + mvlan: None, + }; + + // Create groups (all are fleet-wide) + datastore + .multicast_group_create(&opctx, ¶ms_1, Some(authz_pool.clone())) + .await + .expect("Should create fleet-group-1"); + + datastore + .multicast_group_create(&opctx, ¶ms_2, Some(authz_pool.clone())) + .await + .expect("Should create fleet-group-2"); + + datastore + .multicast_group_create(&opctx, ¶ms_3, Some(authz_pool)) + .await + .expect("Should create fleet-group-3"); + + // List all groups fleet-wide - should get 3 groups + let pagparams = DataPageParams { + marker: None, + direction: external::PaginationOrder::Ascending, + limit: std::num::NonZeroU32::new(10).unwrap(), + }; + + let paginated_by = + external::http_pagination::PaginatedBy::Id(pagparams); + let groups = datastore + .multicast_groups_list(&opctx, &paginated_by) + .await + .expect("Should list all fleet-wide groups"); + + assert_eq!(groups.len(), 3, "Should have 3 fleet-wide groups"); + + // Verify the groups have the correct names + let group_names: Vec<_> = + groups.iter().map(|g| g.name().to_string()).collect(); + assert!(group_names.contains(&"fleet-group-1".to_string())); + assert!(group_names.contains(&"fleet-group-2".to_string())); + assert!(group_names.contains(&"fleet-group-3".to_string())); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_state_transitions() { + let logctx = + dev::test_setup_log("test_multicast_group_state_transitions"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Create IP pool + let pool_identity = IdentityMetadataCreateParams { + name: "state-test-pool".parse().unwrap(), + description: "Test pool for state transitions".to_string(), + }; + + let ip_pool = datastore + .ip_pool_create( + &opctx, + IpPool::new_multicast( + &pool_identity, + IpVersion::V4, + IpPoolReservationType::ExternalSilos, + ), + ) + .await + .expect("Should create multicast IP pool"); + + let authz_pool = authz::IpPool::new( + authz::FLEET, + ip_pool.id(), + LookupType::ById(ip_pool.id()), + ); + + let range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 100, 30, 1), + Ipv4Addr::new(224, 100, 30, 100), + ) + .unwrap(), + ); + + datastore + .ip_pool_add_range(&opctx, &authz_pool, &ip_pool, &range) + .await + .expect("Should add range to pool"); + + let link = IpPoolResource { + resource_id: opctx.authn.silo_required().unwrap().id(), + resource_type: IpPoolResourceType::Silo, + ip_pool_id: ip_pool.id(), + is_default: false, + }; + datastore + .ip_pool_link_silo(&opctx, link) + .await + .expect("Should link multicast pool to silo"); + + let params = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "state-test-group".parse().unwrap(), + description: "Test group for state transitions".to_string(), + }, + multicast_ip: Some("224.100.30.5".parse().unwrap()), + source_ips: None, + pool: Some(NameOrId::Name("state-test-pool".parse().unwrap())), + mvlan: None, + }; + + // Create group - starts in "Creating" state + let group = datastore + .multicast_group_create(&opctx, ¶ms, Some(authz_pool)) + .await + .expect("Should create multicast group"); + + assert_eq!(group.state, MulticastGroupState::Creating); + + // Test transition to "Active" + datastore + .multicast_group_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + MulticastGroupState::Active, + ) + .await + .expect("Should transition to 'Active'"); + + let updated_group = datastore + .multicast_group_fetch( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + ) + .await + .expect("Should fetch updated group"); + + assert_eq!(updated_group.state, MulticastGroupState::Active); + + // Test transition to "Deleting" + datastore + .multicast_group_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + MulticastGroupState::Deleting, + ) + .await + .expect("Should transition to 'Deleting'"); + + let deleting_group = datastore + .multicast_group_fetch( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + ) + .await + .expect("Should fetch deleting group"); + + assert_eq!(deleting_group.state, MulticastGroupState::Deleting); + + // Test trying to update non-existent group + let fake_id = Uuid::new_v4(); + let result = datastore + .multicast_group_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(fake_id), + MulticastGroupState::Active, + ) + .await; + assert!(result.is_err()); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_lookup_by_ip() { + let logctx = dev::test_setup_log("test_multicast_group_lookup_by_ip"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Create test setup + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "test-pool", + "test-project", + ) + .await; + + // Create first multicast group with IP 224.10.1.100 + let group1 = multicast::create_test_group( + &opctx, + &datastore, + &setup, + "group1", + "224.10.1.100", + ) + .await; + + // Create second multicast group with IP 224.10.1.101 + let group2 = multicast::create_test_group( + &opctx, + &datastore, + &setup, + "group2", + "224.10.1.101", + ) + .await; + + // Test successful lookup for first group + let found_group1 = datastore + .multicast_group_lookup_by_ip( + &opctx, + "224.10.1.100".parse().unwrap(), + ) + .await + .expect("Should find group by IP"); + + assert_eq!(found_group1.id(), group1.id()); + assert_eq!( + found_group1.multicast_ip.ip(), + "224.10.1.100".parse::().unwrap() + ); + + // Test successful lookup for second group + let found_group2 = datastore + .multicast_group_lookup_by_ip( + &opctx, + "224.10.1.101".parse().unwrap(), + ) + .await + .expect("Should find group by IP"); + + assert_eq!(found_group2.id(), group2.id()); + assert_eq!( + found_group2.multicast_ip.ip(), + "224.10.1.101".parse::().unwrap() + ); + + // Test lookup for nonexistent IP - should fail + let not_found_result = datastore + .multicast_group_lookup_by_ip( + &opctx, + "224.10.1.199".parse().unwrap(), + ) + .await; + + assert!(not_found_result.is_err()); + match not_found_result.err().unwrap() { + Error::ObjectNotFound { .. } => { + // Expected error type for missing multicast group + } + other => panic!("Expected ObjectNotFound error, got: {:?}", other), + } + + // Test that soft-deleted groups are not returned + // Soft-delete group1 (sets time_deleted) + datastore + .deallocate_external_multicast_group( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group1.id()), + ) + .await + .expect("Should soft-delete group"); + + // Now lookup should fail for deleted group + let deleted_lookup_result = datastore + .multicast_group_lookup_by_ip( + &opctx, + "224.10.1.100".parse().unwrap(), + ) + .await; + + assert!(deleted_lookup_result.is_err()); + match deleted_lookup_result.err().unwrap() { + Error::ObjectNotFound { .. } => { + // Expected - deleted groups should not be found + } + other => panic!( + "Expected ObjectNotFound error for deleted group, got: {:?}", + other + ), + } + + // Second group should still be findable + let still_found_group2 = datastore + .multicast_group_lookup_by_ip( + &opctx, + "224.10.1.101".parse().unwrap(), + ) + .await + .expect("Should still find non-deleted group"); + + assert_eq!(still_found_group2.id(), group2.id()); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_update() { + let logctx = dev::test_setup_log("test_multicast_group_update"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Create test setup + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "test-pool", + "test-project", + ) + .await; + + // Create initial multicast group + let group = multicast::create_test_group( + &opctx, + &datastore, + &setup, + "original-group", + "224.10.1.100", + ) + .await; + + // Verify original values + assert_eq!(group.name().as_str(), "original-group"); + assert_eq!(group.description(), "Test group: original-group"); + assert_eq!(group.source_ips.len(), 0); // Empty array initially + + // Test updating name and description + let update_params = params::MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: Some("updated-group".parse().unwrap()), + description: Some("Updated group description".to_string()), + }, + source_ips: None, + mvlan: None, + }; + + let updated_group = datastore + .multicast_group_update( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + &update_params, + ) + .await + .expect("Should update multicast group"); + + // Verify updated identity fields + assert_eq!(updated_group.name().as_str(), "updated-group"); + assert_eq!(updated_group.description(), "Updated group description"); + assert_eq!(updated_group.id(), group.id()); // ID should not change + assert_eq!(updated_group.multicast_ip, group.multicast_ip); // IP should not change + assert!(updated_group.time_modified() > group.time_modified()); // Modified time should advance + + // Test updating source IPs (Source-Specific Multicast) + let source_ip_update = params::MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + source_ips: Some(vec![ + "10.1.1.10".parse().unwrap(), + "10.1.1.20".parse().unwrap(), + ]), + mvlan: None, + }; + + let group_with_sources = datastore + .multicast_group_update( + &opctx, + MulticastGroupUuid::from_untyped_uuid(updated_group.id()), + &source_ip_update, + ) + .await + .expect("Should update source IPs"); + + // Verify source IPs were updated + assert_eq!(group_with_sources.source_ips.len(), 2); + let source_addrs: Vec<_> = + group_with_sources.source_ips.iter().map(|ip| ip.ip()).collect(); + assert!(source_addrs.contains(&"10.1.1.10".parse().unwrap())); + assert!(source_addrs.contains(&"10.1.1.20".parse().unwrap())); + + // Test updating all fields at once + let complete_update = params::MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: Some("final-group".parse().unwrap()), + description: Some("Final group description".to_string()), + }, + source_ips: Some(vec!["192.168.1.1".parse().unwrap()]), + mvlan: None, + }; + + let final_group = datastore + .multicast_group_update( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group_with_sources.id()), + &complete_update, + ) + .await + .expect("Should update all fields"); + + assert_eq!(final_group.name().as_str(), "final-group"); + assert_eq!(final_group.description(), "Final group description"); + assert_eq!(final_group.source_ips.len(), 1); + assert_eq!( + final_group.source_ips[0].ip(), + "192.168.1.1".parse::().unwrap() + ); + + // Test updating nonexistent group - should fail + let nonexistent_id = MulticastGroupUuid::new_v4(); + let failed_update = datastore + .multicast_group_update(&opctx, nonexistent_id, &update_params) + .await; + + assert!(failed_update.is_err()); + match failed_update.err().unwrap() { + Error::ObjectNotFound { .. } => { + // Expected error for nonexistent group + } + other => panic!("Expected ObjectNotFound error, got: {:?}", other), + } + + // Test updating deleted group - should fail + // First soft-delete the group (sets time_deleted) + datastore + .deallocate_external_multicast_group( + &opctx, + MulticastGroupUuid::from_untyped_uuid(final_group.id()), + ) + .await + .expect("Should soft-delete group"); + + let deleted_update = datastore + .multicast_group_update( + &opctx, + MulticastGroupUuid::from_untyped_uuid(final_group.id()), + &update_params, + ) + .await; + + assert!(deleted_update.is_err()); + match deleted_update.err().unwrap() { + Error::ObjectNotFound { .. } => { + // Expected - soft-deleted groups should not be updatable + } + other => panic!( + "Expected ObjectNotFound error for deleted group, got: {:?}", + other + ), + } + + db.terminate().await; + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/datastore/multicast/members.rs b/nexus/db-queries/src/db/datastore/multicast/members.rs new file mode 100644 index 00000000000..21a80446732 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/multicast/members.rs @@ -0,0 +1,3302 @@ +//! Multicast group member management operations. +//! +//! Database operations for managing multicast group memberships - adding/ +//! removing members and lifecycle coordination. + +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::Utc; +use diesel::prelude::*; +use slog::debug; +use uuid::Uuid; + +use nexus_db_errors::{ErrorHandler, public_error_from_diesel}; +use omicron_common::api::external::{ + self, CreateResult, DataPageParams, DeleteResult, ListResultVec, + LookupType, ResourceType, UpdateResult, +}; +use omicron_uuid_kinds::{ + GenericUuid, InstanceUuid, MulticastGroupUuid, SledKind, +}; + +use crate::context::OpContext; +use crate::db::datastore::DataStore; +use crate::db::datastore::multicast::ops; +use crate::db::model::{ + DbTypedUuid, MulticastGroupMember, MulticastGroupMemberState, + MulticastGroupMemberValues, +}; +use crate::db::on_conflict_ext::IncompleteOnConflictExt; +use crate::db::pagination::paginated; + +impl DataStore { + /// List members of a multicast group. + pub async fn multicast_group_members_list( + &self, + opctx: &OpContext, + group_id: MulticastGroupUuid, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + self.multicast_group_members_list_by_id(opctx, group_id, pagparams) + .await + } + + /// Create a new multicast group member for an instance. + /// + /// Used by the HTTP API endpoint for explicit member attachment. + /// Creates a member record in "Joining" state. Uses a Diesel + /// upsert (not the CTE) since the HTTP endpoint validates separately. + /// + /// RPW reconciler programs the dataplane when the instance starts. + /// + /// Handles reactivation of "Left" members and preserves "Joined" state for + /// idempotency. + pub async fn multicast_group_member_add( + &self, + opctx: &OpContext, + group_id: MulticastGroupUuid, + instance_id: InstanceUuid, + ) -> CreateResult { + let conn = self.pool_connection_authorized(opctx).await?; + self.multicast_group_member_add_with_conn( + opctx, + &conn, + group_id.into_untyped_uuid(), + instance_id.into_untyped_uuid(), + ) + .await + } + + /// Add an instance to a multicast group using provided connection. + /// + /// Internal helper that performs member attachment with state preservation. + /// This only transitions "Left" members (with time_deleted=NULL) to "Joining" + /// for reactivation, preserving "Joined" state if already active. + /// + /// State handling: + /// - Member in "Left" with time_deleted=NULL → UPDATE to "Joining" (reactivation) + /// - Member in "Left" with time_deleted set → not matched (soft-deleted, INSERT new) + /// - Member in "Joining" → return existing (idempotent) + /// - Member in "Joined" → return existing (preserve active state) + /// - Member doesn't exist → INSERT as "Joining" + async fn multicast_group_member_add_with_conn( + &self, + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + group_id: Uuid, + instance_id: Uuid, + ) -> CreateResult { + use nexus_db_schema::schema::multicast_group_member::dsl; + + // Look up the sled_id for this instance (may be None for stopped instances) + let sled_id = self + .instance_get_sled_id(opctx, instance_id) + .await? + .map(DbTypedUuid::from_untyped_uuid); + + // Try UPDATE on "Left" members only (reactivation) + let reactivation_result = diesel::update(dsl::multicast_group_member) + .filter(dsl::external_group_id.eq(group_id)) + .filter(dsl::parent_id.eq(instance_id)) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.eq(MulticastGroupMemberState::Left)) + .set(( + dsl::state.eq(MulticastGroupMemberState::Joining), + dsl::sled_id.eq(sled_id), + dsl::time_modified.eq(Utc::now()), + )) + .returning(MulticastGroupMember::as_returning()) + .get_result_async(conn) + .await; + + // Early return on member or error + match reactivation_result { + // Successfully reactivated Left → Joining + Ok(member) => return Ok(member), + Err(diesel::result::Error::NotFound) => {} + Err(e) => { + return Err(public_error_from_diesel(e, ErrorHandler::Server)); + } + } + + // Try INSERT, but preserve existing state on conflict + let new_member = MulticastGroupMemberValues { + id: Uuid::new_v4(), + parent_id: instance_id, + external_group_id: group_id, + sled_id, + state: MulticastGroupMemberState::Joining, + time_created: Utc::now(), + time_modified: Utc::now(), + time_deleted: None, + }; + + // On conflict, perform a no-op update to return existing member. + // This preserves "Joined"/"Joining" state while avoiding an extra SELECT. + // CockroachDB requires `.as_partial_index()` for partial unique indexes. + diesel::insert_into(dsl::multicast_group_member) + .values(new_member) + .on_conflict((dsl::external_group_id, dsl::parent_id)) + .as_partial_index() + .do_update() + .set(dsl::time_modified.eq(dsl::time_modified)) + .returning(MulticastGroupMember::as_returning()) + .get_result_async(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Delete a multicast group member by group ID. + /// + /// This performs a hard delete of all members (both active and soft-deleted) + /// for the specified group. Used during group cleanup operations. + pub async fn multicast_group_members_delete_by_group( + &self, + opctx: &OpContext, + group_id: MulticastGroupUuid, + ) -> DeleteResult { + use nexus_db_schema::schema::multicast_group_member::dsl; + + // Delete all members for this group, including soft-deleted ones + // We use a targeted query to leverage existing indexes + diesel::delete(dsl::multicast_group_member) + .filter(dsl::external_group_id.eq(group_id.into_untyped_uuid())) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map(|_x| ()) + } + + /// Set the state of a multicast group member. + pub async fn multicast_group_member_set_state( + &self, + opctx: &OpContext, + external_group_id: MulticastGroupUuid, + parent_id: InstanceUuid, + new_state: MulticastGroupMemberState, + ) -> UpdateResult<()> { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let rows_updated = diesel::update(dsl::multicast_group_member) + .filter( + dsl::external_group_id + .eq(external_group_id.into_untyped_uuid()), + ) + .filter(dsl::parent_id.eq(parent_id.into_untyped_uuid())) + .filter(dsl::time_deleted.is_null()) + .set((dsl::state.eq(new_state), dsl::time_modified.eq(Utc::now()))) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::MulticastGroupMember, + LookupType::ById(external_group_id.into_untyped_uuid()), + ), + ) + })?; + + if rows_updated == 0 { + return Err(external::Error::not_found_by_id( + ResourceType::MulticastGroupMember, + &external_group_id.into_untyped_uuid(), + )); + } + + Ok(()) + } + + /// Conditionally set the state of a multicast group member if the current + /// state matches `expected_state`. + /// + /// Used by RPW reconciler. + /// + /// Returns `Ok(true)` if updated, `Ok(false)` if no row matched the filters + /// (member not found, soft-deleted, or state mismatch). + pub async fn multicast_group_member_set_state_if_current( + &self, + opctx: &OpContext, + external_group_id: MulticastGroupUuid, + parent_id: InstanceUuid, + expected_state: MulticastGroupMemberState, + new_state: MulticastGroupMemberState, + ) -> UpdateResult { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let rows_updated = diesel::update(dsl::multicast_group_member) + .filter( + dsl::external_group_id + .eq(external_group_id.into_untyped_uuid()), + ) + .filter(dsl::parent_id.eq(parent_id.into_untyped_uuid())) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.eq(expected_state)) + .set((dsl::state.eq(new_state), dsl::time_modified.eq(Utc::now()))) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(rows_updated > 0) + } + + /// Atomically transition from "Left" → "Joining" and set sled_id. + /// + /// Used by RPW reconciler. + /// + /// Returns Ok(true) if updated, Ok(false) if state was not "Left" or row missing. + pub async fn multicast_group_member_left_to_joining_if_current( + &self, + opctx: &OpContext, + external_group_id: MulticastGroupUuid, + parent_id: InstanceUuid, + sled_id: DbTypedUuid, + ) -> UpdateResult { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let rows_updated = diesel::update(dsl::multicast_group_member) + .filter( + dsl::external_group_id + .eq(external_group_id.into_untyped_uuid()), + ) + .filter(dsl::parent_id.eq(parent_id.into_untyped_uuid())) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.eq(MulticastGroupMemberState::Left)) + .set(( + dsl::state.eq(MulticastGroupMemberState::Joining), + dsl::sled_id.eq(Some(sled_id)), + dsl::time_modified.eq(Utc::now()), + )) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(rows_updated > 0) + } + + /// Atomically transition to "Left" and clear sled_id if current state + /// matches `expected_state`. + /// + /// Used by RPW reconciler. + /// + /// Returns Ok(true) if updated, Ok(false) if state did not match or row missing. + pub async fn multicast_group_member_to_left_if_current( + &self, + opctx: &OpContext, + external_group_id: MulticastGroupUuid, + parent_id: InstanceUuid, + expected_state: MulticastGroupMemberState, + ) -> UpdateResult { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let rows_updated = diesel::update(dsl::multicast_group_member) + .filter( + dsl::external_group_id + .eq(external_group_id.into_untyped_uuid()), + ) + .filter(dsl::parent_id.eq(parent_id.into_untyped_uuid())) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.eq(expected_state)) + .set(( + dsl::state.eq(MulticastGroupMemberState::Left), + dsl::sled_id.eq(Option::>::None), + dsl::time_modified.eq(Utc::now()), + )) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(rows_updated > 0) + } + + /// List members of a multicast group by ID. + pub async fn multicast_group_members_list_by_id( + &self, + opctx: &OpContext, + external_group_id: MulticastGroupUuid, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use nexus_db_schema::schema::multicast_group_member::dsl; + + paginated(dsl::multicast_group_member, dsl::id, pagparams) + .filter(dsl::time_deleted.is_null()) + .filter( + dsl::external_group_id + .eq(external_group_id.into_untyped_uuid()), + ) + .select(MulticastGroupMember::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// List multicast group memberships for a specific instance. + /// + /// If `include_removed` is true, includes memberships that have been + /// marked removed (i.e., rows with `time_deleted` set). Otherwise only + /// returns active memberships. + pub async fn multicast_group_members_list_by_instance( + &self, + opctx: &OpContext, + instance_id: InstanceUuid, + include_removed: bool, + ) -> ListResultVec { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let mut query = dsl::multicast_group_member.into_boxed(); + + if !include_removed { + query = query.filter(dsl::time_deleted.is_null()); + } + + query + .filter(dsl::parent_id.eq(instance_id.into_untyped_uuid())) + .order(dsl::id.asc()) + .select(MulticastGroupMember::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Attach an instance to a multicast group atomically. + /// + /// Used by instance create saga and instance reconfiguration to ensure + /// atomic validation and member creation. This CTE: + /// - Verifies the group is "Active" + /// - Validates instance exists + /// - Retrieves instance's current sled_id from VMM table + /// - Inserts "Joining" if no row exists + /// - Reactivates "Left" → "Joining" (updates sled_id) + /// - No-ops for "Joining"/"Joined" (idempotent) + /// + /// Returns the `member_id` for this `(group, instance)` pair. + /// + /// See `crate::db::datastore::multicast::ops::member_attach::AttachMemberToGroupStatement` for CTE implementation. + pub async fn multicast_group_member_attach_to_instance( + &self, + opctx: &OpContext, + group_id: MulticastGroupUuid, + instance_id: InstanceUuid, + ) -> Result { + let conn = self.pool_connection_authorized(opctx).await?; + + // Use the CTE to atomically validate group state, instance existence, + // retrieve sled_id, and attach member - all in a single database operation. + // This eliminates TOCTOU issues from separate instance validation. + let statement = ops::member_attach::AttachMemberToGroupStatement::new( + group_id.into_untyped_uuid(), + instance_id.into_untyped_uuid(), + Uuid::new_v4(), // new_member_id if we need to insert + ); + + let result = statement.execute(&conn).await?; + Ok(result.member_id) + } + + /// Atomically reconcile a member in "Joining" state. + /// + /// This combines sled_id updates and state transitions into a single atomic + /// database operation to handle concurrent reconciliation by multiple Nexus + /// instances. + /// + /// # Arguments + /// + /// - `group_id`: The multicast group + /// - `instance_id`: The instance being reconciled + /// - `instance_valid`: Whether the instance is in a valid state for multicast + /// - `current_sled_id`: The instance's current sled_id from VMM lookup + /// + /// # Returns + /// + /// Returns the reconciliation result indicating what action was taken. + /// + /// # Example Usage (from RPW reconciler) + /// + /// ```rust,ignore + /// // Fetch cached instance state and sled_id from reconciler's state map + /// let (instance_valid, sled_id) = instance_states + /// .get(&member.parent_id) + /// .copied() + /// .unwrap_or((false, None)); + /// let current_sled_id = sled_id.map(|id| id.into()); + /// + /// let result = self + /// .datastore + /// .multicast_group_member_reconcile_joining( + /// opctx, + /// MulticastGroupUuid::from_untyped_uuid(group.id()), + /// InstanceUuid::from_untyped_uuid(member.parent_id), + /// instance_valid, + /// current_sled_id, + /// ) + /// .await?; + /// + /// match result.action { + /// ReconcileAction::TransitionedToLeft => { /* program dataplane to remove */ } + /// ReconcileAction::UpdatedSledId { .. } => { /* sled changed, stay "Joining" */ } + /// ReconcileAction::NoChange => { /* ready to transition to "Joined" */ } + /// ReconcileAction::NotFound => { /* member not in "Joining" state */ } + /// } + /// ``` + /// + /// See [`ops::member_reconcile::reconcile_joining_member`] for atomic CTE implementation. + pub async fn multicast_group_member_reconcile_joining( + &self, + opctx: &OpContext, + group_id: MulticastGroupUuid, + instance_id: InstanceUuid, + instance_valid: bool, + current_sled_id: Option>, + ) -> Result + { + let conn = self.pool_connection_authorized(opctx).await?; + + ops::member_reconcile::reconcile_joining_member( + &conn, + group_id.into_untyped_uuid(), + instance_id.into_untyped_uuid(), + instance_valid, + current_sled_id, + ) + .await + .map_err(external::Error::from) + } + + /// Detach all multicast group memberships for an instance. + /// + /// Transitions all non-Left members to "Left" state and clears sled_id. + /// Used by instance lifecycle operations (stop, delete) to signal RPW + /// that dataplane cleanup is needed. + /// + /// Note: This does not set `time_deleted`. For soft deletion of memberships, + /// use [`Self::multicast_group_members_mark_for_removal`]. + /// + /// See also [`Self::multicast_group_member_detach_by_group_and_instance`] + /// for detaching a specific group membership. + pub async fn multicast_group_members_detach_by_instance( + &self, + opctx: &OpContext, + instance_id: InstanceUuid, + ) -> Result<(), external::Error> { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let now = Utc::now(); + + // Transition members from "Joined/Joining" to "Left" state and clear + // `sled_id` + diesel::update(dsl::multicast_group_member) + .filter(dsl::parent_id.eq(instance_id.into_untyped_uuid())) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.ne(MulticastGroupMemberState::Left)) // Only update non-Left members + .set(( + dsl::state.eq(MulticastGroupMemberState::Left), + dsl::sled_id.eq(Option::>::None), + dsl::time_modified.eq(now), + )) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map(|_| ()) + } + + /// Get a specific multicast group member by group ID and instance ID. + pub async fn multicast_group_member_get_by_group_and_instance( + &self, + opctx: &OpContext, + group_id: MulticastGroupUuid, + instance_id: InstanceUuid, + ) -> Result, external::Error> { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let member = dsl::multicast_group_member + .filter(dsl::external_group_id.eq(group_id.into_untyped_uuid())) + .filter(dsl::parent_id.eq(instance_id.into_untyped_uuid())) + .filter(dsl::time_deleted.is_null()) + .select(MulticastGroupMember::as_select()) + .first_async(&*self.pool_connection_authorized(opctx).await?) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(member) + } + + /// Get a multicast group member by its unique ID. + /// + /// If `include_removed` is true, returns the member even if it has been + /// soft-deleted (i.e., `time_deleted` is set). Otherwise filters out + /// soft-deleted rows. + pub async fn multicast_group_member_get_by_id( + &self, + opctx: &OpContext, + member_id: Uuid, + include_removed: bool, + ) -> Result, external::Error> { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let mut query = dsl::multicast_group_member.into_boxed(); + if !include_removed { + query = query.filter(dsl::time_deleted.is_null()); + } + + let member = query + .filter(dsl::id.eq(member_id)) + .select(MulticastGroupMember::as_select()) + .first_async(&*self.pool_connection_authorized(opctx).await?) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(member) + } + + /// Detach a specific multicast group member by group ID and instance ID. + /// + /// This transitions member to "Left" state, clears `sled_id`, and sets `time_deleted` + /// (marking for permanent removal). Used by the HTTP API for explicit detach operations. + /// Distinct from instance stop which only transitions to "Left" without `time_deleted`. + /// + /// See [`Self::multicast_group_members_detach_by_instance`] for detaching all + /// memberships of an instance (used during instance stop). + pub async fn multicast_group_member_detach_by_group_and_instance( + &self, + opctx: &OpContext, + group_id: MulticastGroupUuid, + instance_id: InstanceUuid, + ) -> Result { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let now = Utc::now(); + + // Mark member for removal (set time_deleted and state to "Left"), similar + // to soft instance deletion + let updated_rows = diesel::update(dsl::multicast_group_member) + .filter(dsl::external_group_id.eq(group_id.into_untyped_uuid())) + .filter(dsl::parent_id.eq(instance_id.into_untyped_uuid())) + .filter(dsl::time_deleted.is_null()) + .set(( + dsl::state.eq(MulticastGroupMemberState::Left), + dsl::sled_id.eq(Option::>::None), + dsl::time_deleted.eq(Some(now)), // Mark for deletion + dsl::time_modified.eq(now), + )) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(updated_rows > 0) + } + + /// Update sled_id for all multicast group memberships of an instance. + /// + /// Used by instance sagas to update sled_id during lifecycle transitions: + /// - Start: NULL → actual sled UUID + /// - Stop: actual sled UUID → NULL + /// - Migrate: old sled UUID → new sled UUID + /// + /// Only updates non-"Left" members. RPW detects the change and reprograms + /// the dataplane accordingly. + /// + /// Note: This does not update members already in "Left" state. For instance + /// stops, first transition memberships to "Left" and clear their `sled_id` + /// via [`Self::multicast_group_members_detach_by_instance`]. + pub async fn multicast_group_member_update_sled_id( + &self, + opctx: &OpContext, + instance_id: InstanceUuid, + new_sled_id: Option>, + ) -> Result<(), external::Error> { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let operation_type = match new_sled_id { + Some(_) => "instance_start_or_migrate", + None => "instance_stop", + }; + + debug!( + opctx.log, + "multicast member lifecycle transition: updating sled_id"; + "instance_id" => %instance_id, + "operation" => operation_type, + "new_sled_id" => ?new_sled_id + ); + + diesel::update(dsl::multicast_group_member) + .filter(dsl::parent_id.eq(instance_id.into_untyped_uuid())) + .filter(dsl::time_deleted.is_null()) + // Only update members not in "Left" state + .filter(dsl::state.ne(MulticastGroupMemberState::Left)) + .set(( + dsl::sled_id.eq(new_sled_id), + dsl::time_modified.eq(Utc::now()), + )) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map(|_| ()) + } + + /// Conditionally update sled_id only if it currently has the expected value. + /// + /// Used by RPW reconciler. + /// + /// Returns `Ok(true)` if updated, `Ok(false)` if the expected value didn't + /// match (indicating concurrent modification). + /// + /// This prevents race conditions where multiple Nexus instances try to update + /// the same member's sled_id concurrently. The update only proceeds if the + /// current sled_id matches `expected_sled_id`, implementing a compare-and-swap + /// (CAS) pattern. + pub async fn multicast_group_member_update_sled_id_if_current( + &self, + opctx: &OpContext, + instance_id: InstanceUuid, + expected_sled_id: Option>, + new_sled_id: Option>, + ) -> UpdateResult { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let rows_updated = diesel::update(dsl::multicast_group_member) + .filter(dsl::parent_id.eq(instance_id.into_untyped_uuid())) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.ne(MulticastGroupMemberState::Left)) + .filter(dsl::sled_id.eq(expected_sled_id)) // CAS condition + .set(( + dsl::sled_id.eq(new_sled_id), + dsl::time_modified.eq(Utc::now()), + )) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(rows_updated > 0) + } + + /// Set the sled_id for multicast members when an instance starts. + /// + /// This handles two scenarios: + /// 1. **First-time start**: "Joining" (sled_id=NULL) → "Joining" (sled_id=actual) + /// 2. **Restart after stop**: "Left" (sled_id=NULL) → "Joining" (sled_id=actual) + /// + /// After this operation, the RPW reconciler will detect the sled_id and + /// transition "Joining" → "Joined" by programming the switch. + /// + /// # State Transitions + /// + /// - "Left" (sled_id=NULL) → "Joining" (sled_id=actual) - Instance restart + /// - "Joining" (sled_id=NULL) → "Joining" (sled_id=actual) - First-time start + /// - "Joined" - No change (already has sled_id, ignored) + /// + /// See also: + /// - CAS-based reconciliation helpers for concurrent updates in + /// `nexus/db-queries/src/db/datastore/multicast/ops/member_reconcile.rs`. + /// - Background reconciler docs discussing the CAS pattern in + /// `nexus/src/app/background/tasks/multicast/members.rs`. + pub async fn multicast_group_member_set_instance_sled( + &self, + opctx: &OpContext, + instance_id: InstanceUuid, + sled_id: DbTypedUuid, + ) -> Result<(), external::Error> { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let now = Utc::now(); + + // Update members in "Left" state (restart) or "Joining" state with NULL + // sled_id (first start) + // - "Left" → "Joining" + set sled_id (instance restart) + // - "Joining" (sled_id=NULL) → "Joining" + set sled_id (first-time start) + diesel::update(dsl::multicast_group_member) + .filter(dsl::parent_id.eq(instance_id.into_untyped_uuid())) + .filter(dsl::time_deleted.is_null()) + .filter( + dsl::state.eq(MulticastGroupMemberState::Left).or(dsl::state + .eq(MulticastGroupMemberState::Joining) + .and(dsl::sled_id.is_null())), + ) + .set(( + dsl::state.eq(MulticastGroupMemberState::Joining), + dsl::sled_id.eq(Some(sled_id)), + dsl::time_modified.eq(now), + )) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map(|_| ()) + } + + /// Permanently mark all multicast memberships for deletion when instance is deleted. + /// + /// Sets members to "Left" state with `time_deleted` timestamp, indicating + /// permanent removal (not temporary like instance stop). This distinguishes + /// permanent deletion from instance stop which only sets state="Left" + /// without `time_deleted`, allowing later reactivation. + /// + /// After this operation: + /// - Members cannot be reactivated (new attach creates new member record) + /// - RPW reconciler will remove DPD configuration + /// - Cleanup task will eventually hard-delete the database rows + /// + /// Compare with [`Self::multicast_group_members_detach_by_instance`] which leaves + /// `time_deleted=NULL` for reactivation on instance restart. + pub async fn multicast_group_members_mark_for_removal( + &self, + opctx: &OpContext, + instance_id: InstanceUuid, + ) -> Result<(), external::Error> { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let now = Utc::now(); + + diesel::update(dsl::multicast_group_member) + .filter(dsl::parent_id.eq(instance_id.into_untyped_uuid())) + .filter(dsl::time_deleted.is_null()) + .set(( + dsl::state.eq(MulticastGroupMemberState::Left), // Transition to Left state + dsl::sled_id.eq(Option::>::None), // Clear sled reference + dsl::time_deleted.eq(Some(now)), // Mark for deletion + dsl::time_modified.eq(now), + )) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map(|_| ()) + } + + /// Permanently delete a multicast group member by ID. + pub async fn multicast_group_member_delete_by_id( + &self, + opctx: &OpContext, + member_id: Uuid, + ) -> DeleteResult { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let deleted_rows = diesel::delete(dsl::multicast_group_member) + .filter(dsl::id.eq(member_id)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + if deleted_rows == 0 { + return Err(external::Error::not_found_by_id( + ResourceType::MulticastGroupMember, + &member_id, + )); + } + + debug!( + opctx.log, + "multicast group member deletion completed"; + "member_id" => %member_id, + "rows_deleted" => deleted_rows + ); + + Ok(()) + } + + /// Complete deletion of multicast group members that are in + /// ["Left"](MulticastGroupMemberState::Left) state and `time_deleted` is + /// set. + /// + /// Returns the number of members physically deleted. + pub async fn multicast_group_members_complete_delete( + &self, + opctx: &OpContext, + ) -> Result { + use nexus_db_schema::schema::multicast_group_member::dsl; + + let deleted_rows = diesel::delete(dsl::multicast_group_member) + .filter(dsl::state.eq(MulticastGroupMemberState::Left)) + .filter(dsl::time_deleted.is_not_null()) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + debug!( + opctx.log, + "multicast group member complete deletion finished"; + "left_and_time_deleted_members_deleted" => deleted_rows + ); + + Ok(deleted_rows) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use nexus_types::external_api::params; + use nexus_types::identity::Resource; + use omicron_common::api::external::IdentityMetadataCreateParams; + use omicron_test_utils::dev; + use omicron_uuid_kinds::SledUuid; + + use crate::db::pub_test_utils::helpers::{ + SledUpdateBuilder, attach_instance_to_vmm, create_instance_with_vmm, + create_stopped_instance_record, create_vmm_for_instance, + }; + use crate::db::pub_test_utils::{TestDatabase, multicast}; + + // NOTE: These are datastore-level tests. They validate database state + // transitions, validations, and query behavior for multicast members. + // They purposefully do not exercise the reconciler (RPW) or dataplane (DPD) + // components. End-to-end RPW/DPD behavior is covered by integration tests + // under `nexus/tests/integration_tests/multicast`. + + // Lists all active multicast group members. + impl DataStore { + async fn multicast_group_members_list_active_test( + &self, + opctx: &OpContext, + ) -> ListResultVec { + use nexus_db_schema::schema::multicast_group_member::dsl; + + dsl::multicast_group_member + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.ne(MulticastGroupMemberState::Left)) + .order(dsl::id.asc()) + .select(MulticastGroupMember::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + } + + #[tokio::test] + async fn test_multicast_group_member_attach_to_instance() { + let logctx = dev::test_setup_log( + "test_multicast_group_member_attach_to_instance", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "attach-test-pool", + "test-project-attach", + ) + .await; + + // Create active group using helper + let active_group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "active-group", + "224.10.1.5", + true, // make_active + ) + .await; + + // Create creating group manually (needs to stay in "Creating" state) + let creating_group_params = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "creating-group".parse().unwrap(), + description: "Creating test group".to_string(), + }, + multicast_ip: Some("224.10.1.6".parse().unwrap()), + source_ips: None, + // Pool resolved via authz_pool argument to datastore call + pool: None, + mvlan: None, + }; + + let creating_group = datastore + .multicast_group_create( + &opctx, + &creating_group_params, + Some(setup.authz_pool.clone()), + ) + .await + .expect("Should create creating multicast group"); + + // Create test instance + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "attach-test-instance", + setup.sled_id, + ) + .await; + let instance_id = *instance.as_untyped_uuid(); + + // Cannot attach to group in "Creating" state (not "Active") + let result = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(creating_group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await; + assert!(result.is_err()); + match result.unwrap_err() { + external::Error::InvalidRequest { .. } => (), + other => panic!( + "Expected InvalidRequest for 'Creating' group, got: {:?}", + other + ), + } + + // First attach to active group should succeed and create new member + let member_id = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(active_group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should attach instance to active group"); + + // Verify member was created in "Joining" state + let member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(active_group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should get member") + .expect("Member should exist"); + + assert_eq!(member.id, member_id); + assert_eq!(member.state, MulticastGroupMemberState::Joining); + assert_eq!(member.sled_id, Some(setup.sled_id.into())); + let time_after_first_attach = member.time_modified; + + // Second attach to same group with member in "Joining" state should be + // idempotent + let member_id2 = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(active_group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should handle duplicate attach to 'Joining' member"); + + assert_eq!(member_id, member_id2, "Should return same member ID"); + // Verify idempotency: time_modified unchanged + let member_after_second = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(active_group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should get member after second attach") + .expect("Member should exist"); + assert_eq!( + member_after_second.time_modified, time_after_first_attach, + "Idempotent attach must not update time_modified" + ); + + // Transition member to "Joined" state and capture time_modified + datastore + .multicast_group_member_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(active_group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + MulticastGroupMemberState::Joined, + ) + .await + .expect("Should transition member to 'Joined'"); + let member_joined = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(active_group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should refetch member after Joined") + .expect("Member should exist"); + let time_after_joined = member_joined.time_modified; + + // Attach to member in "Joined" state should be idempotent + let member_id3 = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(active_group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should handle attach to 'Joined' member"); + + assert_eq!(member_id, member_id3, "Should return same member ID"); + // Verify idempotency in "Joined": time_modified unchanged + let member_after_third = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(active_group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should get member after third attach") + .expect("Member should exist"); + assert_eq!( + member_after_third.time_modified, time_after_joined, + "Idempotent attach while Joined must not update time_modified" + ); + + // Transition member to "Left" state (simulating instance stop) + datastore + .multicast_group_member_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(active_group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + MulticastGroupMemberState::Left, + ) + .await + .expect("Should transition member to 'Left'"); + + // Update member to have no sled_id (simulating stopped instance) + datastore + .multicast_group_member_update_sled_id( + &opctx, + InstanceUuid::from_untyped_uuid(instance_id), + None, + ) + .await + .expect("Should clear sled_id for stopped instance"); + let member_left = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(active_group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should get member after Left") + .expect("Member should exist"); + let time_after_left = member_left.time_modified; + + // Attach to member in "Left" state should reactivate it + let member_id4 = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(active_group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should reactivate 'Left' member"); + + assert_eq!(member_id, member_id4, "Should return same member ID"); + + // Verify member was reactivated to "Joining" state with updated sled_id + let reactivated_member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(active_group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should get reactivated member") + .expect("Reactivated member should exist"); + + assert_eq!( + reactivated_member.state, + MulticastGroupMemberState::Joining + ); + assert_eq!(reactivated_member.sled_id, Some(setup.sled_id.into())); + assert!( + reactivated_member.time_modified >= time_after_left, + "Reactivation should advance time_modified" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_members_detach_by_instance() { + let logctx = dev::test_setup_log( + "test_multicast_group_members_detach_by_instance", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "test-pool", + "test-project", + ) + .await; + + // Create multiple multicast groups + let group1 = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "group1", + "224.10.1.5", + true, // make_active + ) + .await; + let group2 = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "group2", + "224.10.1.6", + true, // make_active + ) + .await; + + // Create test instances + let instance1_record = create_stopped_instance_record( + &opctx, + &datastore, + &setup.authz_project, + "test-instance-1", + ) + .await; + let instance1_id = instance1_record.as_untyped_uuid(); + let instance2_record = create_stopped_instance_record( + &opctx, + &datastore, + &setup.authz_project, + "test-instance-2", + ) + .await; + let instance2_id = instance2_record.as_untyped_uuid(); + + // Create VMMs and associate instances with sled (required for multicast membership) + let vmm1_id = create_vmm_for_instance( + &opctx, + &datastore, + instance1_record, + setup.sled_id, + ) + .await; + attach_instance_to_vmm( + &opctx, + &datastore, + &setup.authz_project, + instance1_record, + vmm1_id, + ) + .await; + + let vmm2_id = create_vmm_for_instance( + &opctx, + &datastore, + instance2_record, + setup.sled_id, + ) + .await; + attach_instance_to_vmm( + &opctx, + &datastore, + &setup.authz_project, + instance2_record, + vmm2_id, + ) + .await; + + // Add instance1 to both groups and instance2 to only group1 + let member1_1 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group1.id()), + InstanceUuid::from_untyped_uuid(*instance1_id), + ) + .await + .expect("Should add instance1 to group1"); + + let member1_2 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group2.id()), + InstanceUuid::from_untyped_uuid(*instance1_id), + ) + .await + .expect("Should add instance1 to group2"); + + let member2_1 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group1.id()), + InstanceUuid::from_untyped_uuid(*instance2_id), + ) + .await + .expect("Should add instance2 to group1"); + + // Verify all memberships exist + assert_eq!(member1_1.parent_id, *instance1_id); + assert_eq!(member1_2.parent_id, *instance1_id); + assert_eq!(member2_1.parent_id, *instance2_id); + + // Detach all memberships for instance1 (transitions to Left, does NOT set time_deleted) + datastore + .multicast_group_members_detach_by_instance( + &opctx, + InstanceUuid::from_untyped_uuid(*instance1_id), + ) + .await + .expect("Should detach all memberships for instance1"); + + // Verify time_deleted was NOT set (members still exist, just in Left state) + let detached_member1 = datastore + .multicast_group_member_get_by_id(&opctx, member1_1.id, false) + .await + .expect("Should fetch member") + .expect("Member should still exist"); + assert_eq!(detached_member1.state, MulticastGroupMemberState::Left); + assert!( + detached_member1.time_deleted.is_none(), + "detach_by_instance should NOT set time_deleted" + ); + assert!( + detached_member1.sled_id.is_none(), + "sled_id should be cleared" + ); + + // Verify instance1 memberships transitioned to Left state + datastore + .multicast_group_members_list_by_id( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group1.id()), + &external::DataPageParams::max_page(), + ) + .await + .expect("Should list group1 members"); + + datastore + .multicast_group_members_list_by_id( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group2.id()), + &external::DataPageParams::max_page(), + ) + .await + .expect("Should list group2 members"); + + // Use list_active_test to get only active members (excludes "Left" state) + let active_group1_members = datastore + .multicast_group_members_list_active_test(&opctx) + .await + .expect("Should list active members") + .into_iter() + .filter(|m| m.external_group_id == group1.id()) + .collect::>(); + assert_eq!(active_group1_members.len(), 1); + assert_eq!(active_group1_members[0].parent_id, *instance2_id); + + let active_group2_members = datastore + .multicast_group_members_list_active_test(&opctx) + .await + .expect("Should list active members") + .into_iter() + .filter(|m| m.external_group_id == group2.id()) + .collect::>(); + assert_eq!(active_group2_members.len(), 0); + + // Test idempotency - detaching again should be idempotent + datastore + .multicast_group_members_detach_by_instance( + &opctx, + InstanceUuid::from_untyped_uuid(*instance1_id), + ) + .await + .expect("Should handle detaching instance1 again"); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_member_operations_with_parent_id() { + let logctx = dev::test_setup_log( + "test_multicast_group_member_operations_with_parent_id", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup_with_range( + &opctx, + &datastore, + "parent-id-test-pool", + "test-project2", + (224, 0, 2, 1), + (224, 0, 2, 254), + ) + .await; + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "parent-id-test-group", + "224.0.2.5", + true, + ) + .await; + + // Create test instance + let instance_record = create_stopped_instance_record( + &opctx, + &datastore, + &setup.authz_project, + "test-instance-parent", + ) + .await; + let instance_id = instance_record.as_untyped_uuid(); + + // Create VMM and associate instance with sled (required for multicast membership) + let vmm_id = create_vmm_for_instance( + &opctx, + &datastore, + instance_record, + setup.sled_id, + ) + .await; + attach_instance_to_vmm( + &opctx, + &datastore, + &setup.authz_project, + instance_record, + vmm_id, + ) + .await; + + // Add member using parent_id (instance_id) + let member = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(*instance_id), + ) + .await + .expect("Should add instance as member"); + + // Verify member has correct parent_id + assert_eq!(member.parent_id, *instance_id); + assert_eq!(member.external_group_id, group.id()); + assert_eq!(member.state, MulticastGroupMemberState::Joining); + + // Test member lookup by parent_id + let member_memberships = datastore + .multicast_group_members_list_by_instance( + &opctx, + InstanceUuid::from_untyped_uuid(*instance_id), + false, + ) + .await + .expect("Should list memberships for instance"); + + assert_eq!(member_memberships.len(), 1); + assert_eq!(member_memberships[0].parent_id, *instance_id); + assert_eq!(member_memberships[0].external_group_id, group.id()); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_member_duplicate_prevention() { + let logctx = dev::test_setup_log( + "test_multicast_group_member_duplicate_prevention", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "duplicate-test-pool", + "test-project3", + ) + .await; + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "duplicate-test-group", + "224.10.1.5", + true, + ) + .await; + + // Create test instance + let instance_id = create_stopped_instance_record( + &opctx, + &datastore, + &setup.authz_project, + "test-instance-dup", + ) + .await; + + // Create VMM and associate instance with sled (required for multicast membership) + let vmm_id = create_vmm_for_instance( + &opctx, + &datastore, + instance_id, + setup.sled_id, + ) + .await; + attach_instance_to_vmm( + &opctx, + &datastore, + &setup.authz_project, + instance_id, + vmm_id, + ) + .await; + + // Add member first time - should succeed + let member1 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + instance_id, + ) + .await + .expect("Should add instance as member first time"); + + // Try to add same instance again - should return existing member + let member2 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + instance_id, + ) + .await + .expect("Should handle duplicate add idempotently"); + + // Should return the same member + assert_eq!(member1.id, member2.id); + assert_eq!(member1.parent_id, member2.parent_id); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_member_sled_id_lifecycle() { + let logctx = + dev::test_setup_log("test_multicast_member_sled_id_lifecycle"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "lifecycle-test-pool", + "test-project-lifecycle", + ) + .await; + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "lifecycle-test-group", + "224.10.1.5", + true, + ) + .await; + + // Create additional test sleds for migration testing + let sled1_id = SledUuid::new_v4(); + let sled1_update = SledUpdateBuilder::new().sled_id(sled1_id).build(); + datastore.sled_upsert(sled1_update).await.unwrap(); + + let sled2_id = SledUuid::new_v4(); + let sled2_update = SledUpdateBuilder::new().sled_id(sled2_id).build(); + datastore.sled_upsert(sled2_update).await.unwrap(); + + // Create test instance + let instance_id = create_stopped_instance_record( + &opctx, + &datastore, + &setup.authz_project, + "lifecycle-test-instance", + ) + .await; + let test_instance_id = instance_id.into_untyped_uuid(); + + // Create member record in "Joining" state (no sled_id initially) + let member = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(test_instance_id), + ) + .await + .expect("Should create member record"); + + // Member initially has no sled_id (created in "Joining" state) + assert_eq!(member.sled_id, None); + + // Instance start - Update sled_id from NULL to actual sled + datastore + .multicast_group_member_update_sled_id( + &opctx, + InstanceUuid::from_untyped_uuid(test_instance_id), + Some(sled1_id.into()), + ) + .await + .expect("Should update sled_id for instance start"); + + // Verify sled_id was updated + let updated_member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(test_instance_id), + ) + .await + .expect("Should fetch updated member") + .expect("Member should exist"); + + assert_eq!(updated_member.sled_id, Some(sled1_id.into())); + + // Instance migration - Update sled_id from sled1 to sled2 + datastore + .multicast_group_member_update_sled_id( + &opctx, + InstanceUuid::from_untyped_uuid(test_instance_id), + Some(sled2_id.into()), + ) + .await + .expect("Should update sled_id for instance migration"); + + // Verify sled_id was updated to new sled + let migrated_member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(test_instance_id), + ) + .await + .expect("Should fetch migrated member") + .expect("Member should exist"); + + assert_eq!(migrated_member.sled_id, Some(sled2_id.into())); + + // Instance stop - Clear sled_id (set to NULL) + datastore + .multicast_group_members_detach_by_instance( + &opctx, + InstanceUuid::from_untyped_uuid(test_instance_id), + ) + .await + .expect("Should clear sled_id for instance stop"); + + // Verify sled_id was cleared + let stopped_member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(test_instance_id), + ) + .await + .expect("Should fetch stopped member") + .expect("Member should exist"); + + assert_eq!(stopped_member.sled_id, None); + + // Idempotency - Clearing again should be idempotent + datastore + .multicast_group_members_detach_by_instance( + &opctx, + InstanceUuid::from_untyped_uuid(test_instance_id), + ) + .await + .expect("Should handle clearing sled_id again"); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + /// Datastore-only verification of member state transitions. + async fn test_multicast_group_member_state_transitions_datastore() { + let logctx = dev::test_setup_log( + "test_multicast_group_member_state_transitions_datastore", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup_with_range( + &opctx, + &datastore, + "state-test-pool", + "test-project4", + (224, 2, 1, 1), + (224, 2, 1, 254), + ) + .await; + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "state-test-group", + "224.2.1.5", + true, + ) + .await; + + // Create test instance (datastore-only) + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "state-test-instance", + setup.sled_id, + ) + .await; + let test_instance_id = instance.into_untyped_uuid(); + + // Create member record directly in "Joining" state + datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(test_instance_id), + ) + .await + .expect("Should create member record"); + + // Complete the attach operation + datastore + .multicast_group_member_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(test_instance_id), + MulticastGroupMemberState::Joined, + ) + .await + .expect("Should complete attach operation"); + + // Complete the operation and leave + datastore + .multicast_group_member_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(test_instance_id), + MulticastGroupMemberState::Left, + ) + .await + .expect("Should complete detach operation"); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_members_complete_delete() { + let logctx = + dev::test_setup_log("test_multicast_group_members_complete_delete"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "complete-delete-test-pool", + "test-project-cleanup", + ) + .await; + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "cleanup-test-group", + "224.10.1.5", + true, + ) + .await; + + // Create real instances for the test + let (instance1, _vmm1) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "delete-test-instance1", + setup.sled_id, + ) + .await; + let instance1_id = instance1.into_untyped_uuid(); + + let (instance2, _vmm2) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "delete-test-instance2", + setup.sled_id, + ) + .await; + let instance2_id = instance2.into_untyped_uuid(); + + let (instance3, _vmm3) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "delete-test-instance3", + setup.sled_id, + ) + .await; + let instance3_id = instance3.into_untyped_uuid(); + + // Create member records in different states + let conn = datastore + .pool_connection_authorized(&opctx) + .await + .expect("Get connection"); + use nexus_db_schema::schema::multicast_group_member::dsl; + + // Member 1: "Left" + `time_deleted` (should be deleted) + let member1: MulticastGroupMember = + diesel::insert_into(dsl::multicast_group_member) + .values(MulticastGroupMemberValues { + id: Uuid::new_v4(), + time_created: Utc::now(), + time_modified: Utc::now(), + time_deleted: Some(Utc::now()), + external_group_id: group.id(), + parent_id: instance1_id, + sled_id: Some(setup.sled_id.into()), + state: MulticastGroupMemberState::Left, + }) + .returning(MulticastGroupMember::as_returning()) + .get_result_async(&*conn) + .await + .expect("Should create member1 record"); + + // Member 2: "Left" but no `time_deleted` (should NOT be deleted) + let member2: MulticastGroupMember = + diesel::insert_into(dsl::multicast_group_member) + .values(MulticastGroupMemberValues { + id: Uuid::new_v4(), + time_created: Utc::now(), + time_modified: Utc::now(), + time_deleted: None, + external_group_id: group.id(), + parent_id: instance2_id, + sled_id: Some(setup.sled_id.into()), + state: MulticastGroupMemberState::Left, + }) + .returning(MulticastGroupMember::as_returning()) + .get_result_async(&*conn) + .await + .expect("Should create member2 record"); + + // Member 3: "Joined" state (should NOT be deleted, even if it had time_deleted) + let member3: MulticastGroupMember = + diesel::insert_into(dsl::multicast_group_member) + .values(MulticastGroupMemberValues { + id: Uuid::new_v4(), + time_created: Utc::now(), + time_modified: Utc::now(), + time_deleted: Some(Utc::now()), // Has time_deleted but is Joined, so won't be cleaned up + external_group_id: group.id(), + parent_id: instance3_id, + sled_id: Some(setup.sled_id.into()), + state: MulticastGroupMemberState::Joined, + }) + .returning(MulticastGroupMember::as_returning()) + .get_result_async(&*conn) + .await + .expect("Should create member3 record"); + + // Since we created exactly 3 member records above, we can verify by + // checking that each member was created successfully (no need for a + // full table scan) member1: "Left" + `time_deleted`, member2: "Left" + + // no `time_deleted`, member3: "Joined" + `time_deleted` + + // Run complete delete + let deleted_count = datastore + .multicast_group_members_complete_delete(&opctx) + .await + .expect("Should run complete delete"); + + // Should only delete member1 ("Left" + `time_deleted`) + assert_eq!(deleted_count, 1); + + // Verify member1 was deleted by trying to find it directly + let member1_result = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(member1.parent_id), + ) + .await + .expect("Should query for member1"); + assert!(member1_result.is_none(), "member1 should be deleted"); + + // Verify member2 still exists + let member2_result = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(member2.parent_id), + ) + .await + .expect("Should query for member2"); + assert!(member2_result.is_some(), "member2 should still exist"); + + // Verify member3 still exists (time_deleted set but not cleaned up yet) + let member3_result = datastore + .multicast_group_member_get_by_id(&opctx, member3.id, true) + .await + .expect("Should query for member3"); + assert!( + member3_result.is_some(), + "member3 should still exist in database (not cleaned up due to 'Joined' state)" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_instance_get_sled_id() { + let logctx = dev::test_setup_log("test_instance_get_sled_id"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "sled-test-pool", + "test-project-sled", + ) + .await; + + // Non-existent instance should return NotFound error + let fake_instance_id = Uuid::new_v4(); + let result = + datastore.instance_get_sled_id(&opctx, fake_instance_id).await; + assert!(result.is_err()); + match result.unwrap_err() { + external::Error::ObjectNotFound { .. } => (), + other => panic!("Expected ObjectNotFound, got: {:?}", other), + } + + // Stopped instance (no active VMM) should return None + let stopped_instance = create_stopped_instance_record( + &opctx, + &datastore, + &setup.authz_project, + "stopped-instance", + ) + .await; + let stopped_instance_id = stopped_instance.as_untyped_uuid(); + + let result = datastore + .instance_get_sled_id(&opctx, *stopped_instance_id) + .await + .expect("Should get sled_id for stopped instance"); + assert_eq!(result, None); + + // Running instance (with active VMM) should return the sled_id + let (running_instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "running-instance", + setup.sled_id, + ) + .await; + let running_instance_id = running_instance.as_untyped_uuid(); + + let result = datastore + .instance_get_sled_id(&opctx, *running_instance_id) + .await + .expect("Should get sled_id for running instance"); + assert_eq!(result, Some(setup.sled_id.into_untyped_uuid())); + + // Instance with VMM but no active_propolis_id should return None + let inactive_instance = create_stopped_instance_record( + &opctx, + &datastore, + &setup.authz_project, + "inactive-instance", + ) + .await; + let inactive_instance_id = inactive_instance.as_untyped_uuid(); + + // Create VMM but don't attach it (no active_propolis_id) + create_vmm_for_instance( + &opctx, + &datastore, + inactive_instance, + setup.sled_id, + ) + .await; + + let result = datastore + .instance_get_sled_id(&opctx, *inactive_instance_id) + .await + .expect("Should get sled_id for inactive instance"); + assert_eq!(result, None); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_member_database_error_handling() { + let logctx = dev::test_setup_log( + "test_multicast_group_member_database_error_handling", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "error-test-pool", + "test-project-errors", + ) + .await; + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "error-test-group", + "224.10.1.6", + true, + ) + .await; + + // Create test instance + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "error-test-instance", + setup.sled_id, + ) + .await; + let instance_id = *instance.as_untyped_uuid(); + + // Operations on non-existent groups should return appropriate errors + let fake_group_id = Uuid::new_v4(); + + // Try to add member to non-existent group + let result = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(fake_group_id), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await; + assert!(result.is_err(), "Attach to non-existent group should fail"); + + // Try to set state for non-existent member + let result = datastore + .multicast_group_member_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(fake_group_id), + InstanceUuid::from_untyped_uuid(instance_id), + MulticastGroupMemberState::Joined, + ) + .await; + assert!( + result.is_err(), + "Set state for non-existent member should fail" + ); + + // Try to get member from non-existent group + let result = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(fake_group_id), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Query should succeed"); + assert!(result.is_none(), "Non-existent member should return None"); + + // Operations on non-existent instances should handle errors appropriately + let fake_instance_id = Uuid::new_v4(); + + // Try to get sled_id for non-existent instance + let result = + datastore.instance_get_sled_id(&opctx, fake_instance_id).await; + assert!( + result.is_err(), + "Get sled_id for non-existent instance should fail" + ); + + // Try to attach non-existent instance to group + let result = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(fake_instance_id), + ) + .await; + assert!(result.is_err(), "Attach non-existent instance should fail"); + + // Successfully create a member for further testing + datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should create member"); + + // Invalid state transitions should be handled gracefully + datastore + .multicast_group_member_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + MulticastGroupMemberState::Left, + ) + .await + .expect("Should allow transition to 'Left'"); + + datastore + .multicast_group_member_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + MulticastGroupMemberState::Joined, + ) + .await + .expect("Should allow transition back to 'Joined'"); + + // Test idempotent operations work correctly + datastore + .multicast_group_members_detach_by_instance( + &opctx, + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("First detach should succeed"); + + datastore + .multicast_group_members_detach_by_instance( + &opctx, + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Second detach should be idempotent"); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_member_set_instance_sled() { + let logctx = dev::test_setup_log( + "test_multicast_group_member_set_instance_sled", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Create test setup + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "start-test-pool", + "test-project", + ) + .await; + + // Create multicast group + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "start-test-group", + "224.10.1.100", + true, + ) + .await; + + let initial_sled = SledUuid::new_v4(); + let new_sled = SledUuid::new_v4(); + + // Create sled records + datastore + .sled_upsert(SledUpdateBuilder::new().sled_id(initial_sled).build()) + .await + .unwrap(); + datastore + .sled_upsert(SledUpdateBuilder::new().sled_id(new_sled).build()) + .await + .unwrap(); + + // Create test instance + let instance_record = create_stopped_instance_record( + &opctx, + &datastore, + &setup.authz_project, + "start-test-instance", + ) + .await; + let instance_id = + InstanceUuid::from_untyped_uuid(*instance_record.as_untyped_uuid()); + + // Add member in "Joining" state (typical after instance create) + let member = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + instance_id, + ) + .await + .expect("Should add member"); + + // Verify initial state: "Joining" with no sled_id + assert_eq!(member.state, MulticastGroupMemberState::Joining); + assert!(member.sled_id.is_none()); + + // Simulate first-time instance start - use update_sled_id for "Joining" members + datastore + .multicast_group_member_update_sled_id( + &opctx, + instance_id, + Some(initial_sled.into()), + ) + .await + .expect("Should update sled_id on first start"); + + // Verify member is still "Joining" but now has sled_id + let updated_member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + instance_id, + ) + .await + .expect("Should find updated member") + .expect("Member should exist"); + + assert_eq!(updated_member.state, MulticastGroupMemberState::Joining); + assert_eq!(updated_member.sled_id, Some(initial_sled.into())); + assert!(updated_member.time_modified > member.time_modified); + + // Simulate instance stop by transitioning to "Left" state + datastore + .multicast_group_members_detach_by_instance(&opctx, instance_id) + .await + .expect("Should stop instance"); + + // Verify member is "Left" with no sled_id + let stopped_member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + instance_id, + ) + .await + .expect("Should find stopped member") + .expect("Member should exist"); + + assert_eq!(stopped_member.state, MulticastGroupMemberState::Left); + assert!(stopped_member.sled_id.is_none()); + + // Simulate instance restart on new sled - should transition "Left" → "Joining" + datastore + .multicast_group_member_set_instance_sled( + &opctx, + instance_id, + new_sled.into(), + ) + .await + .expect("Should restart instance on new sled"); + + // Verify member is back to "Joining" with new sled_id + let restarted_member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + instance_id, + ) + .await + .expect("Should find restarted member") + .expect("Member should exist"); + + assert_eq!(restarted_member.state, MulticastGroupMemberState::Joining); + assert_eq!(restarted_member.sled_id, Some(new_sled.into())); + assert!(restarted_member.time_modified > stopped_member.time_modified); + + // Test that starting instance with "Joined" members works correctly + // First transition to "Joined" state (simulate RPW reconciler) + datastore + .multicast_group_member_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + instance_id, + MulticastGroupMemberState::Joined, + ) + .await + .expect("Should transition to 'Joined'"); + + // Verify member is now "Joined" + let joined_member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + instance_id, + ) + .await + .expect("Should find joined member") + .expect("Member should exist"); + + assert_eq!(joined_member.state, MulticastGroupMemberState::Joined); + + // Start instance again - "Joined" members should remain unchanged + let before_modification = joined_member.time_modified; + datastore + .multicast_group_member_set_instance_sled( + &opctx, + instance_id, + new_sled.into(), + ) + .await + .expect("Should handle start on already-running instance"); + + // Verify "Joined" member remains unchanged (no state transition) + let unchanged_member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + instance_id, + ) + .await + .expect("Should find unchanged member") + .expect("Member should exist"); + + assert_eq!(unchanged_member.state, MulticastGroupMemberState::Joined); + assert_eq!(unchanged_member.time_modified, before_modification); + + // Test starting instance that has no multicast memberships (should be no-op) + let non_member_instance = InstanceUuid::new_v4(); + datastore + .multicast_group_member_set_instance_sled( + &opctx, + non_member_instance, + new_sled.into(), + ) + .await + .expect("Should handle start on instance with no memberships"); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_members_mark_for_removal() { + let logctx = dev::test_setup_log( + "test_multicast_group_members_mark_for_removal", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Create test setup + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "removal-test-pool", + "test-project", + ) + .await; + + // Create multicast groups + let group1 = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "removal-group1", + "224.10.1.100", + true, + ) + .await; + + let group2 = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "removal-group2", + "224.10.1.101", + true, + ) + .await; + + // Create test instances + let instance1_record = create_stopped_instance_record( + &opctx, + &datastore, + &setup.authz_project, + "removal-test-instance1", + ) + .await; + let instance1_id = InstanceUuid::from_untyped_uuid( + *instance1_record.as_untyped_uuid(), + ); + + let instance2_record = create_stopped_instance_record( + &opctx, + &datastore, + &setup.authz_project, + "removal-test-instance2", + ) + .await; + let instance2_id = InstanceUuid::from_untyped_uuid( + *instance2_record.as_untyped_uuid(), + ); + + // Add instance1 to both groups + let member1_1 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group1.id()), + instance1_id, + ) + .await + .expect("Should add instance1 to group1"); + + let member1_2 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group2.id()), + instance1_id, + ) + .await + .expect("Should add instance1 to group2"); + + // Add instance2 to only group1 + let member2_1 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group1.id()), + instance2_id, + ) + .await + .expect("Should add instance2 to group1"); + + // Verify all members exist and are not marked for removal + assert!(member1_1.time_deleted.is_none()); + assert!(member1_2.time_deleted.is_none()); + assert!(member2_1.time_deleted.is_none()); + + // Mark all memberships for instance1 for removal + datastore + .multicast_group_members_mark_for_removal(&opctx, instance1_id) + .await + .expect("Should mark instance1 memberships for removal"); + + // Verify instance1 memberships are marked for removal + let marked_member1_1 = datastore + .multicast_group_member_get_by_id(&opctx, member1_1.id, true) + .await + .expect("Should query member1_1") + .expect("Member1_1 should exist"); + assert!(marked_member1_1.time_deleted.is_some()); + + let marked_member1_2 = datastore + .multicast_group_member_get_by_id(&opctx, member1_2.id, true) + .await + .expect("Should query member1_2") + .expect("Member1_2 should exist"); + assert!(marked_member1_2.time_deleted.is_some()); + + // Verify instance2 membership is NOT marked for removal + let unmarked_member2_1 = datastore + .multicast_group_member_get_by_id(&opctx, member2_1.id, true) + .await + .expect("Should query member2_1") + .expect("Member2_1 should exist"); + assert!(unmarked_member2_1.time_deleted.is_none()); + + // Verify marked members are not returned by normal queries (time_deleted filter) + let visible_member1_1 = datastore + .multicast_group_member_get_by_id(&opctx, member1_1.id, false) + .await + .expect("Should query member1_1"); + assert!( + visible_member1_1.is_none(), + "Marked member should not be visible" + ); + + let visible_member2_1 = datastore + .multicast_group_member_get_by_id(&opctx, member2_1.id, false) + .await + .expect("Should query member2_1"); + assert!( + visible_member2_1.is_some(), + "Unmarked member should be visible" + ); + + // Test idempotency - marking again should be safe + datastore + .multicast_group_members_mark_for_removal(&opctx, instance1_id) + .await + .expect("Should handle duplicate mark for removal"); + + // Test marking instance with no memberships (should be no-op) + let non_member_instance = InstanceUuid::new_v4(); + datastore + .multicast_group_members_mark_for_removal( + &opctx, + non_member_instance, + ) + .await + .expect("Should handle marking instance with no memberships"); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_multicast_group_members_delete_by_group() { + let logctx = + dev::test_setup_log("test_multicast_group_members_delete_by_group"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + // Create test setup + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "delete-group-test-pool", + "test-project", + ) + .await; + + // Create multicast groups + let group1 = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "delete-group1", + "224.10.1.100", + true, + ) + .await; + + let group2 = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "delete-group2", + "224.10.1.101", + true, + ) + .await; + + // Create test instances + let instance1_record = create_stopped_instance_record( + &opctx, + &datastore, + &setup.authz_project, + "delete-test-instance1", + ) + .await; + let instance1_id = InstanceUuid::from_untyped_uuid( + *instance1_record.as_untyped_uuid(), + ); + + let instance2_record = create_stopped_instance_record( + &opctx, + &datastore, + &setup.authz_project, + "delete-test-instance2", + ) + .await; + let instance2_id = InstanceUuid::from_untyped_uuid( + *instance2_record.as_untyped_uuid(), + ); + + let instance3_record = create_stopped_instance_record( + &opctx, + &datastore, + &setup.authz_project, + "delete-test-instance3", + ) + .await; + let instance3_id = InstanceUuid::from_untyped_uuid( + *instance3_record.as_untyped_uuid(), + ); + + // Add members to group1 + let member1_1 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group1.id()), + instance1_id, + ) + .await + .expect("Should add instance1 to group1"); + + let member1_2 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group1.id()), + instance2_id, + ) + .await + .expect("Should add instance2 to group1"); + + // Add members to group2 + let member2_1 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group2.id()), + instance1_id, + ) + .await + .expect("Should add instance1 to group2"); + + let member2_2 = datastore + .multicast_group_member_add( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group2.id()), + instance3_id, + ) + .await + .expect("Should add instance3 to group2"); + + // Verify all members exist + assert!( + datastore + .multicast_group_member_get_by_id(&opctx, member1_1.id, false) + .await + .unwrap() + .is_some() + ); + assert!( + datastore + .multicast_group_member_get_by_id(&opctx, member1_2.id, false) + .await + .unwrap() + .is_some() + ); + assert!( + datastore + .multicast_group_member_get_by_id(&opctx, member2_1.id, false) + .await + .unwrap() + .is_some() + ); + assert!( + datastore + .multicast_group_member_get_by_id(&opctx, member2_2.id, false) + .await + .unwrap() + .is_some() + ); + + // Delete all members of group1 + datastore + .multicast_group_members_delete_by_group( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group1.id()), + ) + .await + .expect("Should delete all group1 members"); + + // Verify group1 members are gone + assert!( + datastore + .multicast_group_member_get_by_id(&opctx, member1_1.id, true) + .await + .unwrap() + .is_none() + ); + assert!( + datastore + .multicast_group_member_get_by_id(&opctx, member1_2.id, true) + .await + .unwrap() + .is_none() + ); + + // Verify group2 members still exist + assert!( + datastore + .multicast_group_member_get_by_id(&opctx, member2_1.id, false) + .await + .unwrap() + .is_some() + ); + assert!( + datastore + .multicast_group_member_get_by_id(&opctx, member2_2.id, false) + .await + .unwrap() + .is_some() + ); + + // Verify group1 member list is empty + let group1_members = datastore + .multicast_group_members_list_by_id( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group1.id()), + &external::DataPageParams::max_page(), + ) + .await + .expect("Should list group1 members"); + assert_eq!(group1_members.len(), 0); + + // Verify group2 still has its members + let group2_members = datastore + .multicast_group_members_list_by_id( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group2.id()), + &external::DataPageParams::max_page(), + ) + .await + .expect("Should list group2 members"); + assert_eq!(group2_members.len(), 2); + + // Test deleting from group with no members (should be no-op) + datastore + .multicast_group_members_delete_by_group( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group1.id()), + ) + .await + .expect("Should handle deleting from empty group"); + + // Test deleting from nonexistent group (should be no-op) + let fake_group_id = Uuid::new_v4(); + datastore + .multicast_group_members_delete_by_group( + &opctx, + MulticastGroupUuid::from_untyped_uuid(fake_group_id), + ) + .await + .expect("Should handle deleting from nonexistent group"); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_member_attach_concurrent_same_member() { + let logctx = + dev::test_setup_log("test_member_attach_concurrent_same_member"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "concurrent-test-pool", + "concurrent-test-project", + ) + .await; + + // Create active group + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "test-group", + "224.10.1.5", + true, // make_active + ) + .await; + + // Create instance + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "test-instance", + setup.sled_id, + ) + .await; + let instance_id = *instance.as_untyped_uuid(); + + // Simulate two Nexus instances concurrently attaching the same member + let group_id = group.id(); + let datastore1 = datastore.clone(); + let datastore2 = datastore.clone(); + let opctx1 = opctx.child(std::collections::BTreeMap::new()); + let opctx2 = opctx.child(std::collections::BTreeMap::new()); + + let handle1 = tokio::spawn(async move { + datastore1 + .multicast_group_member_attach_to_instance( + &opctx1, + MulticastGroupUuid::from_untyped_uuid(group_id), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + }); + + let handle2 = tokio::spawn(async move { + datastore2 + .multicast_group_member_attach_to_instance( + &opctx2, + MulticastGroupUuid::from_untyped_uuid(group_id), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + }); + + // Both operations should succeed + let (result1, result2) = tokio::join!(handle1, handle2); + let member_id1 = result1 + .expect("Task 1 should complete") + .expect("Attach 1 should succeed"); + let member_id2 = result2 + .expect("Task 2 should complete") + .expect("Attach 2 should succeed"); + + // Both should return the same member_id + assert_eq!(member_id1, member_id2); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_member_attach_invalid_group_or_instance() { + let logctx = + dev::test_setup_log("test_member_attach_invalid_group_or_instance"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "invalid-test-pool", + "invalid-test-project", + ) + .await; + + // Create a valid instance + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "test-instance", + setup.sled_id, + ) + .await; + let instance_id = *instance.as_untyped_uuid(); + + // Attach to non-existent group + let fake_group_id = Uuid::new_v4(); + let result = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(fake_group_id), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await; + + // Should fail with GroupNotActive (group doesn't exist) + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(matches!(err, external::Error::InvalidRequest { .. })); + + // Create a valid active group + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "test-group", + "224.10.1.6", + true, // make_active + ) + .await; + + // Attach non-existent instance + let fake_instance_id = Uuid::new_v4(); + let result = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(fake_instance_id), + ) + .await; + + // Should fail because CTE validates instance exists atomically + assert!(result.is_err()); + let err = result.unwrap_err(); + // The error will be InvalidRequest from the CTE (instance not found) + assert!(matches!(err, external::Error::InvalidRequest { .. })); + assert!(err.to_string().contains("does not exist")); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_member_attach_requires_active_group() { + let logctx = + dev::test_setup_log("test_member_attach_requires_active_group"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "active-check-pool", + "active-check-project", + ) + .await; + + // Create group that stays in Creating state (don't activate) + let creating_group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "creating-group", + "224.10.1.7", + false, // leave in Creating state + ) + .await; + + // Create instance + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "test-instance", + setup.sled_id, + ) + .await; + let instance_id = *instance.as_untyped_uuid(); + + // Attempt to attach to non-active group should fail + let result = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(creating_group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await; + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(matches!(err, external::Error::InvalidRequest { .. })); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_member_attach_idempotency() { + let logctx = dev::test_setup_log("test_member_attach_idempotency"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "idempotent-test-pool", + "idempotent-test-project", + ) + .await; + + // Create active group + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "test-group", + "224.10.1.8", + true, // make_active + ) + .await; + + // Create instance + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "test-instance", + setup.sled_id, + ) + .await; + let instance_id = *instance.as_untyped_uuid(); + + // First attach + let member_id1 = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("First attach should succeed"); + // Capture time_modified after first attach + let member_after_first = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should fetch member after first attach") + .expect("Member should exist"); + let time_after_first = member_after_first.time_modified; + + // Second attach + let member_id2 = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Second attach should succeed"); + + assert_eq!(member_id1, member_id2, "Should return same member ID"); + let member_after_second = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should fetch member after second attach") + .expect("Member should exist"); + assert_eq!( + member_after_second.time_modified, time_after_first, + "Idempotent attach must not update time_modified" + ); + + // Third attach (still idempotent) + let member_id3 = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Third attach should succeed"); + + assert_eq!(member_id1, member_id3, "Should return same member ID"); + let member_after_third = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should fetch member after third attach") + .expect("Member should exist"); + assert_eq!( + member_after_third.time_modified, time_after_first, + "Idempotent attach must not update time_modified (third call)" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_member_attach_reactivation_from_left() { + let logctx = + dev::test_setup_log("test_member_attach_reactivation_from_left"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "reactivation-test-pool", + "reactivation-test-project", + ) + .await; + + // Create active group + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "test-group", + "224.10.1.9", + true, // make_active + ) + .await; + + // Create instance + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "test-instance", + setup.sled_id, + ) + .await; + let instance_id = *instance.as_untyped_uuid(); + + // First attach + let member_id1 = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("First attach should succeed"); + + // Transition member to "Left" state and clear sled_id (simulating instance stop) + // This does NOT set time_deleted - only stopped instances can be reactivated + datastore + .multicast_group_members_detach_by_instance( + &opctx, + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should transition member to 'Left' and clear sled_id"); + + // Verify member is now in Left state WITHOUT time_deleted + let member_stopped = datastore + .multicast_group_member_get_by_id(&opctx, member_id1, false) + .await + .expect("Should get member") + .expect("Member should still exist (not soft-deleted)"); + assert_eq!(member_stopped.state, MulticastGroupMemberState::Left); + assert!( + member_stopped.time_deleted.is_none(), + "time_deleted should NOT be set for stopped instances" + ); + assert!(member_stopped.sled_id.is_none(), "sled_id should be cleared"); + + // Reactivate by attaching again (simulating instance restart) + let member_id2 = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Reactivation should succeed"); + + // Should return same member ID (reactivated existing member) + assert_eq!(member_id1, member_id2, "Should reactivate same member"); + + // Verify member is back in "Joining" state with time_deleted still NULL + let member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should get member") + .expect("Member should exist"); + + assert_eq!(member.state, MulticastGroupMemberState::Joining); + assert_eq!(member.id, member_id1); + assert!( + member.time_deleted.is_none(), + "time_deleted should remain NULL (never set by detach_by_instance)" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_member_attach_partial_index_behavior() { + let logctx = + dev::test_setup_log("test_member_attach_partial_index_behavior"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "partial-index-test-pool", + "partial-index-test-project", + ) + .await; + + // Create active group + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "test-group", + "224.10.1.10", + true, // make_active + ) + .await; + + // Create instance + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "test-instance", + setup.sled_id, + ) + .await; + let instance_id = *instance.as_untyped_uuid(); + + // Create member + let member_id1 = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Attach should succeed"); + + // Transition through states: "Joining" -> "Joined" -> "Left" + datastore + .multicast_group_member_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + MulticastGroupMemberState::Joined, + ) + .await + .expect("Transition to Joined should succeed"); + + datastore + .multicast_group_member_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + MulticastGroupMemberState::Left, + ) + .await + .expect("Transition to Left should succeed"); + + // The partial unique index with predicate (time_deleted IS NULL) + // works with ON CONFLICT to reactivate an existing row that is in + // state 'Left' with time_deleted=NULL. In this case, ON CONFLICT + // updates the row (Left → Joining) instead of inserting a new one. + let member_id2 = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should allow reattach of Left member"); + + // Should reactivate the same member (not create a new one) + assert_eq!(member_id1, member_id2); + + // Verify only one member exists for this (group, instance) pair + let members = datastore + .multicast_group_members_list_by_instance( + &opctx, + InstanceUuid::from_untyped_uuid(instance_id), + false, // include_removed = false + ) + .await + .expect("List members should succeed"); + + // Filter to our group + let our_members: Vec<_> = members + .iter() + .filter(|m| m.external_group_id == group.id()) + .collect(); + + assert_eq!(our_members.len(), 1, "Should have exactly one member"); + assert_eq!(our_members[0].id, member_id1); + assert_eq!(our_members[0].state, MulticastGroupMemberState::Joining); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_member_attach_error_priority_both_invalid() { + let logctx = dev::test_setup_log( + "test_member_attach_error_priority_both_invalid", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let fake_group_id = Uuid::new_v4(); + let fake_instance_id = Uuid::new_v4(); + + // Attempt to attach non-existent instance to non-existent group + let result = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(fake_group_id), + InstanceUuid::from_untyped_uuid(fake_instance_id), + ) + .await; + + // Should fail with InstanceNotFound (checked first), not GroupNotActive + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(matches!(err, external::Error::InvalidRequest { .. })); + assert!( + err.to_string().contains("Instance does not exist"), + "Expected InstanceNotFound error, got: {err}" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_member_attach_stopped_instance() { + let logctx = dev::test_setup_log("test_member_attach_stopped_instance"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "stopped-test-pool", + "stopped-test-project", + ) + .await; + + // Create active group + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "test-group", + "224.10.1.11", + true, // make_active + ) + .await; + + // Create stopped instance (no VMM) + let instance_id = create_stopped_instance_record( + &opctx, + &datastore, + &setup.authz_project, + "stopped-instance", + ) + .await; + + // Attach stopped instance should succeed + let member_id = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + instance_id, + ) + .await + .expect("Should attach stopped instance"); + + // Verify member created with sled_id = NULL (no active VMM) + let member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + instance_id, + ) + .await + .expect("Should get member") + .expect("Member should exist"); + + assert_eq!(member.id, member_id); + assert_eq!(member.state, MulticastGroupMemberState::Joining); + assert_eq!( + member.sled_id, None, + "Stopped instance should have sled_id = NULL" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/datastore/multicast/mod.rs b/nexus/db-queries/src/db/datastore/multicast/mod.rs new file mode 100644 index 00000000000..c7d41774805 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/multicast/mod.rs @@ -0,0 +1,24 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Multicast group management and IP allocation. +//! +//! This module provides database operations for multicast groups following +//! the bifurcated design from [RFD 488](https://rfd.shared.oxide.computer/rfd/488): +//! +//! - External groups: External-facing, allocated from IP pools +//! - Underlay groups: System-generated admin-scoped IPv6 multicast groups +//! +//! ## Typed UUID Usage +//! +//! Public datastore functions in this module use typed UUIDs for type safety: +//! +//! - **Public functions** use `MulticastGroupUuid` and `InstanceUuid` for: +//! - Type safety at API boundaries +//! - Clear documentation of expected ID types +//! - Preventing UUID type confusion + +pub mod groups; +pub mod members; +pub mod ops; diff --git a/nexus/db-queries/src/db/datastore/multicast/ops/member_attach.rs b/nexus/db-queries/src/db/datastore/multicast/ops/member_attach.rs new file mode 100644 index 00000000000..886a223d1db --- /dev/null +++ b/nexus/db-queries/src/db/datastore/multicast/ops/member_attach.rs @@ -0,0 +1,365 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Atomic CTE for attaching instances to multicast groups. +//! +//! Uses three CTEs to atomically validate group is "Active" and instance exists, +//! then inserts or updates the member row. Idempotent operation handles: +//! +//! - **No existing member**: Insert new row in "Joining" state +//! - **Member in "Left" (time_deleted=NULL)**: Transition to "Joining", update sled_id +//! - **Member in "Left" (time_deleted set)**: Insert new row (soft-delete ignored / not reactivated) +//! - **Member in "Joining"/"Joined"**: No-op (already attached) +//! +//! Upsert only runs if group is "Active" and instance exists (validated by +//! `active_group` and `instance_sled` CTEs). Returns the member ID. +//! +//! Prevents TOCTOU races: group validation, instance sled_id lookup, and member +//! upsert all happen in one atomic database operation. + +use std::fmt::Debug; + +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::{DateTime, Utc}; +use diesel::pg::Pg; +use diesel::prelude::*; +use diesel::query_builder::*; +use diesel::result::Error as DieselError; +use diesel::sql_types::{Bool, Nullable, Timestamptz, Uuid as SqlUuid}; +use uuid::Uuid; + +use nexus_db_lookup::DbConnection; +use nexus_db_model::MulticastGroupMemberState; +use omicron_common::api::external::Error as ExternalError; + +/// True if the group exists and is in "Active" state. +type GroupIsActive = Option; + +/// True if the instance exists and has not been deleted. +type InstanceExists = Option; + +/// UUID of the member row (new or existing). +type MemberId = Option; + +/// Raw result tuple from the CTE query before parsing. +/// +/// All fields are `Option` because CTEs return zero rows when validation fails +/// (group not active, instance not found, etc.). +type RawAttachMemberResult = (GroupIsActive, InstanceExists, MemberId); + +/// Result of attaching an instance to a multicast group. +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct AttachMemberResult { + /// Member UUID for this (group, instance) pair. New on first attach, + /// existing ID on subsequent calls. + pub member_id: Uuid, +} + +/// Errors from attaching an instance to a multicast group. +#[derive(Debug)] +pub(crate) enum AttachMemberError { + /// Multicast group doesn't exist or isn't "Active" + GroupNotActive, + /// Instance doesn't exist or has been deleted + InstanceNotFound, + /// Database constraint violation (unique index, etc.) + ConstraintViolation(String), + /// Other database error + DatabaseError(DieselError), +} + +impl From for ExternalError { + fn from(err: AttachMemberError) -> Self { + match err { + AttachMemberError::GroupNotActive => { + ExternalError::invalid_request( + "Multicast group is not active (may be creating, deleting, or deleted)", + ) + } + AttachMemberError::InstanceNotFound => { + ExternalError::invalid_request( + "Instance does not exist or has been deleted", + ) + } + AttachMemberError::ConstraintViolation(msg) => { + ExternalError::invalid_request(&format!( + "Constraint violation: {msg}" + )) + } + AttachMemberError::DatabaseError(e) => { + ExternalError::internal_error(&format!("Database error: {e:?}")) + } + } + } +} + +/// Atomically attach an instance to a multicast group. +/// +/// Single database round-trip performs unconditional upsert: +/// +/// - **Insert**: No member exists → create in "Joining" state +/// - **Reactivate**: Member in "Left" (time_deleted=NULL) → transition to "Joining", update sled_id +/// - **Insert new**: Member in "Left" (time_deleted set) → create new row +/// - **Idempotent**: Member already "Joining" or "Joined" → no-op +/// +/// Atomically validates group and instance exist, retrieves instance's current +/// sled_id, and performs member upsert. Returns member ID. +#[must_use = "Queries must be executed"] +pub(crate) struct AttachMemberToGroupStatement { + group_id: Uuid, + instance_id: Uuid, + new_member_id: Uuid, + time_created: DateTime, + time_modified: DateTime, +} + +impl AttachMemberToGroupStatement { + /// Create an attach statement. + /// + /// # Arguments + /// + /// - `group_id`: Multicast group to attach to + /// - `instance_id`: Instance being attached as member + /// - `new_member_id`: UUID for new member row (if creating) + /// + /// Three CTEs atomically validate group is "Active", instance exists, and + /// retrieve current sled_id from VMM table, then perform upsert. + pub fn new(group_id: Uuid, instance_id: Uuid, new_member_id: Uuid) -> Self { + let now = Utc::now(); + Self { + group_id, + instance_id, + new_member_id, + time_created: now, + time_modified: now, + } + } + + /// Execute the statement and parse the result. + pub async fn execute( + self, + conn: &async_bb8_diesel::Connection, + ) -> Result { + self.get_result_async::(conn) + .await + .map_err(|e| match &e { + DieselError::DatabaseError(kind, info) => match kind { + diesel::result::DatabaseErrorKind::UniqueViolation => { + AttachMemberError::ConstraintViolation( + info.message().to_string(), + ) + } + _ => AttachMemberError::DatabaseError(e), + }, + _ => AttachMemberError::DatabaseError(e), + }) + .and_then(Self::parse_result) + } + + fn parse_result( + result: RawAttachMemberResult, + ) -> Result { + let (group_is_active, instance_exists, member_id) = result; + + // Check validations in priority order for most helpful error messages. + // Instance errors first since users attach their own instances to groups, + // making instance-not-found more actionable than group-state errors. + if instance_exists != Some(true) { + return Err(AttachMemberError::InstanceNotFound); + } + + // Group must be active + if group_is_active != Some(true) { + return Err(AttachMemberError::GroupNotActive); + } + + // If validations passed, we must have a member_id + let member_id = member_id + .ok_or(AttachMemberError::DatabaseError(DieselError::NotFound))?; + Ok(AttachMemberResult { member_id }) + } +} + +impl QueryId for AttachMemberToGroupStatement { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl Query for AttachMemberToGroupStatement { + type SqlType = ( + // group_is_active: true if group exists and is Active + Nullable, + // instance_exists: true if instance exists and not deleted + Nullable, + // member_id: UUID of member row + Nullable, + ); +} + +impl RunQueryDsl for AttachMemberToGroupStatement {} + +/// Generates SQL for atomic member attachment via three CTEs. +/// +/// CTEs validate group and instance exist, retrieve instance's current sled_id, +/// then perform unconditional upsert (handles insert, reactivation, and +/// idempotent cases). ON CONFLICT DO UPDATE only modifies rows in "Left" state. +/// +/// Prevents TOCTOU races by performing all validation and updates in one atomic +/// database operation. +impl AttachMemberToGroupStatement { + /// Generates the `active_group` CTE (checks if group exists and is active). + fn push_active_group_cte<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> QueryResult<()> { + use nexus_db_model::MulticastGroupState; + out.push_sql("SELECT id FROM multicast_group WHERE id = "); + out.push_bind_param::(&self.group_id)?; + out.push_sql(" AND state = "); + out.push_sql(super::group_state_as_sql_literal( + MulticastGroupState::Active, + )); + out.push_sql(" AND time_deleted IS NULL"); + Ok(()) + } + + /// Generates the `instance_sled` CTE (validates instance and gets sled_id). + /// + /// Joins instance and VMM tables via active_propolis_id to get current sled_id. + /// Returns one row with (instance_id, sled_id) if instance exists and not deleted. + fn push_instance_sled_cte<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> QueryResult<()> { + out.push_sql( + "SELECT instance.id, vmm.sled_id \ + FROM instance \ + LEFT JOIN vmm ON instance.active_propolis_id = vmm.id \ + WHERE instance.id = ", + ); + out.push_bind_param::(&self.instance_id)?; + out.push_sql(" AND instance.time_deleted IS NULL"); + Ok(()) + } + + /// Generates the `upserted_member` CTE (performs unconditional upsert). + /// + /// SELECT joins with both `active_group` and `instance_sled` CTEs to: + /// 1. Ensure group is active (FROM active_group) + /// 2. Retrieve instance's current sled_id (CROSS JOIN instance_sled) + /// + /// ON CONFLICT clause uses partial unique index (only rows with time_deleted IS NULL): + /// - Conflict only for members with time_deleted=NULL (active or stopped) + /// - Members with time_deleted set ignored by constraint (INSERT new row) + /// - UPDATE path preserves time_deleted=NULL for reactivated members + fn push_upserted_member_cte<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> QueryResult<()> { + out.push_sql( + "INSERT INTO multicast_group_member (\ + id, time_created, time_modified, external_group_id, \ + parent_id, sled_id, state) SELECT ", + ); + out.push_bind_param::(&self.new_member_id)?; + out.push_sql(", "); + out.push_bind_param::(&self.time_created)?; + out.push_sql(", "); + out.push_bind_param::(&self.time_modified)?; + out.push_sql(", "); + out.push_bind_param::(&self.group_id)?; + out.push_sql(", "); + out.push_bind_param::(&self.instance_id)?; + out.push_sql(", instance_sled.sled_id, "); + out.push_sql(super::member_state_as_sql_literal( + MulticastGroupMemberState::Joining, + )); + out.push_sql(" FROM active_group CROSS JOIN instance_sled "); + out.push_sql("ON CONFLICT (external_group_id, parent_id) WHERE time_deleted IS NULL DO UPDATE SET state = CASE WHEN multicast_group_member.state = "); + out.push_sql(super::member_state_as_sql_literal( + MulticastGroupMemberState::Left, + )); + out.push_sql(" THEN "); + out.push_sql(super::member_state_as_sql_literal( + MulticastGroupMemberState::Joining, + )); + out.push_sql(" ELSE multicast_group_member.state END, sled_id = CASE WHEN multicast_group_member.state = "); + out.push_sql(super::member_state_as_sql_literal( + MulticastGroupMemberState::Left, + )); + out.push_sql(" THEN EXCLUDED.sled_id ELSE multicast_group_member.sled_id END, time_modified = CASE WHEN multicast_group_member.state = "); + out.push_sql(super::member_state_as_sql_literal( + MulticastGroupMemberState::Left, + )); + out.push_sql(" THEN EXCLUDED.time_modified ELSE multicast_group_member.time_modified END, time_deleted = CASE WHEN multicast_group_member.state = "); + out.push_sql(super::member_state_as_sql_literal( + MulticastGroupMemberState::Left, + )); + out.push_sql(" THEN NULL ELSE multicast_group_member.time_deleted END RETURNING id"); + Ok(()) + } + + /// Generates the final SELECT (always returns exactly one row). + /// + /// LEFT JOIN pattern ensures we return a row even when group isn't active + /// or instance doesn't exist (which causes `upserted_member` CTE to return + /// zero rows). + /// + fn push_final_select<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> QueryResult<()> { + out.push_sql( + "SELECT \ + EXISTS(SELECT 1 FROM active_group) AS group_is_active, \ + EXISTS(SELECT 1 FROM instance_sled) AS instance_exists, \ + u.id AS member_id \ + FROM (SELECT 1) AS dummy \ + LEFT JOIN upserted_member u ON TRUE", + ); + Ok(()) + } +} + +impl QueryFragment for AttachMemberToGroupStatement { + fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> QueryResult<()> { + out.unsafe_to_cache_prepared(); + + // CTE: Check if group exists and is active + out.push_sql("WITH active_group AS ("); + self.push_active_group_cte(out.reborrow())?; + out.push_sql("), "); + + // CTE: Validate instance exists and get sled_id + out.push_sql("instance_sled AS ("); + self.push_instance_sled_cte(out.reborrow())?; + out.push_sql("), "); + + // CTE: Unconditional upsert (INSERT or UPDATE) + out.push_sql("upserted_member AS ("); + self.push_upserted_member_cte(out.reborrow())?; + out.push_sql(") "); + + // Final SELECT: always return a row with group validity check. + // + // We ensure that we are always returning a constant number of columns. + // + // In our case, the `upserted_member` CTE returns zero rows if the group + // is not active (because `FROM active_group` returns nothing). Without + // the LEFT JOIN, the final SELECT would return zero rows, which would be + // unparseable by Diesel (it expects exactly one row). + // + // The pattern we use is: + // - Start with a dummy scalar query `(SELECT 1)` to anchor the result + // - LEFT JOIN the `upserted_member` CTE, which may have zero or one row + // - Use `EXISTS(SELECT 1 FROM active_group)` to check group validity + // + // This ensures we always return exactly one row with a constant number + // of columns, even when the group doesn't exist or the upsert CTE returns + // nothing. + self.push_final_select(out.reborrow())?; + + Ok(()) + } +} diff --git a/nexus/db-queries/src/db/datastore/multicast/ops/member_reconcile.rs b/nexus/db-queries/src/db/datastore/multicast/ops/member_reconcile.rs new file mode 100644 index 00000000000..5c837c2396f --- /dev/null +++ b/nexus/db-queries/src/db/datastore/multicast/ops/member_reconcile.rs @@ -0,0 +1,758 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! CAS operations for reconciling "Joining" state members. +//! +//! Compare-And-Swap operations for the "Joining" member state. Unlike the atomic +//! CTE in member_attach (handles initial attachment), these simpler CAS operations +//! work for reconciliation since: +//! +//! - Instance state is fetched before calling +//! - Multiple reconcilers on same member is safe (idempotent) +//! +//! "Joining" is the handoff point from control plane to RPW, with the most +//! complex state transitions: +//! +//! - Multiple possible next states (→ "Joined" or → "Left") +//! - Multi-field updates (state + sled_id) must be atomic +//! - Conditional logic based on instance_valid and sled_id changes +//! +//! Other states ("Joined", "Left") have simpler transitions using direct datastore +//! methods (e.g., `multicast_group_member_to_left_if_current`). +//! +//! ## Operations +//! +//! 1. Instance invalid → transition to "Left", clear sled_id +//! 2. sled_id changed → update to new sled (migration) +//! 3. No change → return current state +//! +//! ## Usage +//! +//! Callers maintain member state from batch fetches and use returned `ReconcileAction` +//! to decide what happened. The `current_state` and `current_sled_id` fields may be +//! stale after failed CAS, so callers should use their own state view for decisions. + +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::Utc; +use diesel::prelude::*; +use diesel::result::Error as DieselError; +use uuid::Uuid; + +use nexus_db_lookup::DbConnection; +use nexus_db_model::{ + DbTypedUuid, MulticastGroupMember, MulticastGroupMemberState, +}; +use nexus_db_schema::schema::multicast_group_member::dsl; +use omicron_common::api::external::Error as ExternalError; +use omicron_uuid_kinds::SledKind; + +/// Result of reconciling a "Joining" state member. +#[derive(Debug, Clone, PartialEq)] +pub struct ReconcileJoiningResult { + /// Action taken during reconciliation + pub action: ReconcileAction, + /// Current state after operation (None if member not found) + pub current_state: Option, + /// Current sled_id after operation (None if member not found or has no sled) + pub current_sled_id: Option>, +} + +/// Actions taken when reconciling a "Joining" member. +#[derive(Debug, Clone, PartialEq)] +pub enum ReconcileAction { + /// Transitioned to "Left" because instance became invalid + TransitionedToLeft, + /// Updated sled_id to new value (stayed in "Joining") + UpdatedSledId { + old: Option>, + new: Option>, + }, + /// No change made (member not in "Joining", or already correct) + NoChange, + /// Member not found or not in "Joining" state + NotFound, +} + +/// Errors from reconciling a multicast group member. +#[derive(Debug)] +pub enum ReconcileMemberError { + /// Database constraint violation (unique index, etc.) + ConstraintViolation(String), + /// Other database error + DatabaseError(DieselError), +} + +impl From for ExternalError { + fn from(err: ReconcileMemberError) -> Self { + match err { + ReconcileMemberError::ConstraintViolation(msg) => { + ExternalError::invalid_request(&format!( + "Constraint violation: {msg}" + )) + } + ReconcileMemberError::DatabaseError(e) => { + ExternalError::internal_error(&format!("Database error: {e:?}")) + } + } + } +} + +/// Reconcile a "Joining" state member using simple CAS operations. +/// +/// Takes instance validity and desired sled_id as inputs (from separate +/// instance/VMM lookups) and performs appropriate CAS operation to update +/// member state. +/// +/// # Arguments +/// +/// - `conn`: Database connection +/// - `group_id`: Multicast group +/// - `instance_id`: Instance being reconciled +/// - `instance_valid`: Whether instance is in valid state for multicast +/// - `current_sled_id`: Instance's current sled_id (from VMM lookup) +pub async fn reconcile_joining_member( + conn: &async_bb8_diesel::Connection, + group_id: Uuid, + instance_id: Uuid, + instance_valid: bool, + current_sled_id: Option>, +) -> Result { + // First, read the current member state + let member_opt: Option = dsl::multicast_group_member + .filter(dsl::external_group_id.eq(group_id)) + .filter(dsl::parent_id.eq(instance_id)) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.eq(MulticastGroupMemberState::Joining)) + .first_async(conn) + .await + .optional() + .map_err(|e| ReconcileMemberError::DatabaseError(e))?; + + let Some(member) = member_opt else { + return Ok(ReconcileJoiningResult { + action: ReconcileAction::NotFound, + current_state: None, + current_sled_id: None, + }); + }; + + let prior_sled_id = member.sled_id; + + // Determine what action to take based on instance validity + if !instance_valid { + // Instance is invalid - transition to "Left" + let updated = diesel::update(dsl::multicast_group_member) + .filter(dsl::id.eq(member.id)) + .filter(dsl::state.eq(MulticastGroupMemberState::Joining)) + .set(( + dsl::state.eq(MulticastGroupMemberState::Left), + dsl::sled_id.eq(None::>), + dsl::time_modified.eq(Utc::now()), + )) + .execute_async(conn) + .await + .map_err(|e| match &e { + DieselError::DatabaseError(kind, info) => match kind { + diesel::result::DatabaseErrorKind::UniqueViolation => { + ReconcileMemberError::ConstraintViolation( + info.message().to_string(), + ) + } + _ => ReconcileMemberError::DatabaseError(e), + }, + _ => ReconcileMemberError::DatabaseError(e), + })?; + + if updated > 0 { + Ok(ReconcileJoiningResult { + action: ReconcileAction::TransitionedToLeft, + current_state: Some(MulticastGroupMemberState::Left), + current_sled_id: None, + }) + } else { + // Member changed state between read and update + Ok(ReconcileJoiningResult { + action: ReconcileAction::NoChange, + current_state: Some(member.state), + current_sled_id: prior_sled_id, + }) + } + } else if prior_sled_id != current_sled_id { + // Instance is valid but sled_id needs updating + let updated = diesel::update(dsl::multicast_group_member) + .filter(dsl::id.eq(member.id)) + .filter(dsl::state.eq(MulticastGroupMemberState::Joining)) + .set(( + dsl::sled_id.eq(current_sled_id), + dsl::time_modified.eq(Utc::now()), + )) + .execute_async(conn) + .await + .map_err(|e| match &e { + DieselError::DatabaseError(kind, info) => match kind { + diesel::result::DatabaseErrorKind::UniqueViolation => { + ReconcileMemberError::ConstraintViolation( + info.message().to_string(), + ) + } + _ => ReconcileMemberError::DatabaseError(e), + }, + _ => ReconcileMemberError::DatabaseError(e), + })?; + + if updated > 0 { + Ok(ReconcileJoiningResult { + action: ReconcileAction::UpdatedSledId { + old: prior_sled_id, + new: current_sled_id, + }, + current_state: Some(MulticastGroupMemberState::Joining), + current_sled_id, + }) + } else { + // Member changed state between read and update + Ok(ReconcileJoiningResult { + action: ReconcileAction::NoChange, + current_state: Some(member.state), + current_sled_id: prior_sled_id, + }) + } + } else { + // No change needed + Ok(ReconcileJoiningResult { + action: ReconcileAction::NoChange, + current_state: Some(MulticastGroupMemberState::Joining), + current_sled_id: prior_sled_id, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use nexus_types::identity::Resource; + use omicron_test_utils::dev; + use omicron_uuid_kinds::{ + GenericUuid, InstanceUuid, MulticastGroupUuid, SledUuid, + }; + + use crate::db::pub_test_utils::helpers::{ + SledUpdateBuilder, create_instance_with_vmm, + }; + use crate::db::pub_test_utils::{TestDatabase, multicast}; + + #[tokio::test] + async fn test_reconcile_joining_instance_invalid() { + let logctx = + dev::test_setup_log("test_reconcile_joining_instance_invalid"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "reconcile-invalid-pool", + "reconcile-invalid-project", + ) + .await; + + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "test-group", + "224.10.1.12", + true, + ) + .await; + + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "test-instance", + setup.sled_id, + ) + .await; + let instance_id = *instance.as_untyped_uuid(); + + // Attach instance to create member in Joining state + datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should attach instance"); + + // Reconcile with instance_valid=false (instance stopped/deleted) + let conn = datastore.pool_connection_authorized(&opctx).await.unwrap(); + let result = reconcile_joining_member( + &conn, + group.id(), + instance_id, + false, // instance_valid=false + Some(setup.sled_id.into()), + ) + .await + .expect("Should reconcile"); + + assert_eq!(result.action, ReconcileAction::TransitionedToLeft); + assert_eq!(result.current_state, Some(MulticastGroupMemberState::Left)); + assert_eq!(result.current_sled_id, None); + + // Verify database state + let member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should get member") + .expect("Member should exist"); + + assert_eq!(member.state, MulticastGroupMemberState::Left); + assert_eq!(member.sled_id, None); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_reconcile_joining_sled_id_changed() { + let logctx = + dev::test_setup_log("test_reconcile_joining_sled_id_changed"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "reconcile-migrate-pool", + "reconcile-migrate-project", + ) + .await; + + // Create second sled for migration + let sled_id_new = SledUuid::new_v4(); + let sled_update2 = + SledUpdateBuilder::default().sled_id(sled_id_new).build(); + datastore + .sled_upsert(sled_update2) + .await + .expect("Should insert second sled"); + + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "test-group", + "224.10.1.13", + true, + ) + .await; + + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "test-instance", + setup.sled_id, + ) + .await; + let instance_id = *instance.as_untyped_uuid(); + + // Attach instance + datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should attach instance"); + + // Reconcile with new sled_id (simulating migration) + let conn = datastore.pool_connection_authorized(&opctx).await.unwrap(); + let result = reconcile_joining_member( + &conn, + group.id(), + instance_id, + true, // instance_valid=true + Some(sled_id_new.into()), + ) + .await + .expect("Should reconcile"); + + match result.action { + ReconcileAction::UpdatedSledId { old, new } => { + assert_eq!(old, Some(setup.sled_id.into())); + assert_eq!(new, Some(sled_id_new.into())); + } + other => panic!("Expected UpdatedSledId, got {other:?}"), + } + assert_eq!( + result.current_state, + Some(MulticastGroupMemberState::Joining) + ); + assert_eq!(result.current_sled_id, Some(sled_id_new.into())); + + // Verify database state + let member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should get member") + .expect("Member should exist"); + + assert_eq!(member.state, MulticastGroupMemberState::Joining); + assert_eq!(member.sled_id, Some(sled_id_new.into())); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_reconcile_joining_no_change_needed() { + let logctx = + dev::test_setup_log("test_reconcile_joining_no_change_needed"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "reconcile-nochange-pool", + "reconcile-nochange-project", + ) + .await; + + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "test-group", + "224.10.1.14", + true, + ) + .await; + + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "test-instance", + setup.sled_id, + ) + .await; + let instance_id = *instance.as_untyped_uuid(); + + // Attach instance + datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should attach instance"); + + let member_before = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should get member") + .expect("Member should exist"); + let time_modified_before = member_before.time_modified; + + // Reconcile with same sled_id and valid instance + let conn = datastore.pool_connection_authorized(&opctx).await.unwrap(); + let result = reconcile_joining_member( + &conn, + group.id(), + instance_id, + true, // instance_valid=true + Some(setup.sled_id.into()), + ) + .await + .expect("Should reconcile"); + + assert_eq!(result.action, ReconcileAction::NoChange); + assert_eq!( + result.current_state, + Some(MulticastGroupMemberState::Joining) + ); + assert_eq!(result.current_sled_id, Some(setup.sled_id.into())); + + // Verify time_modified unchanged (no database update) + let member_after = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should get member") + .expect("Member should exist"); + + assert_eq!(member_after.time_modified, time_modified_before); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_reconcile_joining_member_not_found() { + let logctx = + dev::test_setup_log("test_reconcile_joining_member_not_found"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "reconcile-notfound-pool", + "reconcile-notfound-project", + ) + .await; + + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "test-group", + "224.10.1.15", + true, + ) + .await; + + // Create instance but don't attach it + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "test-instance", + setup.sled_id, + ) + .await; + let instance_id = *instance.as_untyped_uuid(); + + // Reconcile non-existent member + let conn = datastore.pool_connection_authorized(&opctx).await.unwrap(); + let result = reconcile_joining_member( + &conn, + group.id(), + instance_id, + true, + Some(setup.sled_id.into()), + ) + .await + .expect("Should reconcile"); + + assert_eq!(result.action, ReconcileAction::NotFound); + assert_eq!(result.current_state, None); + assert_eq!(result.current_sled_id, None); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_reconcile_joining_concurrent_state_change() { + let logctx = dev::test_setup_log( + "test_reconcile_joining_concurrent_state_change", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "reconcile-concurrent-pool", + "reconcile-concurrent-project", + ) + .await; + + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "test-group", + "224.10.1.16", + true, + ) + .await; + + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "test-instance", + setup.sled_id, + ) + .await; + let instance_id = *instance.as_untyped_uuid(); + + // Attach instance + datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should attach instance"); + + // Transition member to Joined state before reconciliation + datastore + .multicast_group_member_set_state_if_current( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + MulticastGroupMemberState::Joining, + MulticastGroupMemberState::Joined, + ) + .await + .expect("Should transition to Joined"); + + // Attempt to reconcile - should return NotFound since not in Joining + let conn = datastore.pool_connection_authorized(&opctx).await.unwrap(); + let result = reconcile_joining_member( + &conn, + group.id(), + instance_id, + false, // Would transition to Left if still Joining + Some(setup.sled_id.into()), + ) + .await + .expect("Should reconcile"); + + // Should return NotFound because member is not in Joining state + assert_eq!(result.action, ReconcileAction::NotFound); + + // Verify member is still in Joined state + let member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should get member") + .expect("Member should exist"); + + assert_eq!(member.state, MulticastGroupMemberState::Joined); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_reconcile_joining_migration_scenario() { + let logctx = + dev::test_setup_log("test_reconcile_joining_migration_scenario"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let setup = multicast::create_test_setup( + &opctx, + &datastore, + "reconcile-migration-pool", + "reconcile-migration-project", + ) + .await; + + // Create two sleds for migration scenario + let sled_id_a = setup.sled_id; + + let sled_id_b = SledUuid::new_v4(); + let sled_update_b = + SledUpdateBuilder::default().sled_id(sled_id_b).build(); + datastore + .sled_upsert(sled_update_b) + .await + .expect("Should insert sled B"); + + let group = multicast::create_test_group_with_state( + &opctx, + &datastore, + &setup, + "test-group", + "224.10.1.17", + true, + ) + .await; + + let (instance, _vmm) = create_instance_with_vmm( + &opctx, + &datastore, + &setup.authz_project, + "test-instance", + sled_id_a, + ) + .await; + let instance_id = *instance.as_untyped_uuid(); + + // Attach instance (starts on sled_a) + datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should attach instance"); + + // Simulate migration: reconcile with sled_id_b + let conn = datastore.pool_connection_authorized(&opctx).await.unwrap(); + let result = reconcile_joining_member( + &conn, + group.id(), + instance_id, + true, + Some(sled_id_b.into()), + ) + .await + .expect("Should reconcile migration"); + + // Should update sled_id but remain in Joining + match result.action { + ReconcileAction::UpdatedSledId { old, new } => { + assert_eq!(old, Some(sled_id_a.into())); + assert_eq!(new, Some(sled_id_b.into())); + } + other => panic!("Expected UpdatedSledId, got {:?}", other), + } + assert_eq!( + result.current_state, + Some(MulticastGroupMemberState::Joining) + ); + + // Verify member remains in Joining state with new sled_id + let member = datastore + .multicast_group_member_get_by_group_and_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .expect("Should get member") + .expect("Member should exist"); + + assert_eq!(member.state, MulticastGroupMemberState::Joining); + assert_eq!(member.sled_id, Some(sled_id_b.into())); + + db.terminate().await; + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/datastore/multicast/ops/mod.rs b/nexus/db-queries/src/db/datastore/multicast/ops/mod.rs new file mode 100644 index 00000000000..3b1c3a48974 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/multicast/ops/mod.rs @@ -0,0 +1,67 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Atomic database operations for multicast group members. +//! +//! Different operations need different concurrency patterns: +//! +//! ## Operations +//! +//! - **member_attach**: Atomic CTE for attaching instances to groups +//! - Used by instance create saga and reconfiguration +//! - Idempotent reactivation from "Left" state +//! - Validates group is "Active" before attaching +//! - Single CTE atomically validates group + instance + upserts member +//! +//! - **member_reconcile**: CAS operations for RPW reconciler +//! - Background sled_id updates during migration +//! - Transitions to "Left" when instance stops +//! +//! ## Design +//! +//! **member_attach uses CTE**: Prevents Time-of-Check-to-Time-of-Use (TOCTOU) +//! races where group or instance state changes between validation and member +//! creation. +//! +//! **member_reconcile uses CAS**: Reconciler already has instance state from +//! batch fetches, so simpler CAS is sufficient. +//! +//! ## Common Utils +//! +//! Helper functions convert state enums to SQL literals with compile-time +//! safety (ensures SQL strings match enum definitions). + +use nexus_db_model::{MulticastGroupMemberState, MulticastGroupState}; + +pub mod member_attach; +pub mod member_reconcile; + +/// Returns SQL literal for a group state (e.g., "'active'"). +/// +/// Compile-time safety: state names in SQL must match enum definition. +/// Returned string includes single quotes for direct SQL interpolation. +pub(super) const fn group_state_as_sql_literal( + state: MulticastGroupState, +) -> &'static str { + match state { + MulticastGroupState::Creating => "'creating'", + MulticastGroupState::Active => "'active'", + MulticastGroupState::Deleting => "'deleting'", + MulticastGroupState::Deleted => "'deleted'", + } +} + +/// Returns SQL literal for a member state (e.g., "'joined'"). +/// +/// Compile-time safety: state names in SQL must match enum definition. +/// Returned string includes single quotes for direct SQL interpolation. +pub(super) const fn member_state_as_sql_literal( + state: MulticastGroupMemberState, +) -> &'static str { + match state { + MulticastGroupMemberState::Joining => "'joining'", + MulticastGroupMemberState::Joined => "'joined'", + MulticastGroupMemberState::Left => "'left'", + } +} diff --git a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs index 0a4f24e0e7c..5cf7f9a9f73 100644 --- a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs +++ b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs @@ -460,6 +460,7 @@ mod test { start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, ), ) diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index b679bd63f6e..8fb61894efa 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -4016,6 +4016,7 @@ mod tests { start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, ), ) diff --git a/nexus/db-queries/src/db/pub_test_utils/helpers.rs b/nexus/db-queries/src/db/pub_test_utils/helpers.rs index 2fda16ef2a6..cbab648851f 100644 --- a/nexus/db-queries/src/db/pub_test_utils/helpers.rs +++ b/nexus/db-queries/src/db/pub_test_utils/helpers.rs @@ -250,6 +250,7 @@ pub async fn create_stopped_instance_record( start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, ); diff --git a/nexus/db-queries/src/db/pub_test_utils/mod.rs b/nexus/db-queries/src/db/pub_test_utils/mod.rs index 6662fe8cc06..be7ef037c8f 100644 --- a/nexus/db-queries/src/db/pub_test_utils/mod.rs +++ b/nexus/db-queries/src/db/pub_test_utils/mod.rs @@ -20,6 +20,7 @@ use uuid::Uuid; pub mod crdb; pub mod helpers; +pub mod multicast; enum Populate { Nothing, diff --git a/nexus/db-queries/src/db/pub_test_utils/multicast.rs b/nexus/db-queries/src/db/pub_test_utils/multicast.rs new file mode 100644 index 00000000000..dbbbcd638e0 --- /dev/null +++ b/nexus/db-queries/src/db/pub_test_utils/multicast.rs @@ -0,0 +1,218 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Multicast-specific datastore test helpers. + +use std::net::Ipv4Addr; + +use uuid::Uuid; + +use nexus_db_model::MulticastGroupState; +use nexus_db_model::{ + IncompleteVpc, IpPool, IpPoolReservationType, IpPoolResource, + IpPoolResourceType, IpVersion, +}; +use nexus_types::external_api::params; +use nexus_types::external_api::shared::{IpRange, Ipv4Range}; +use nexus_types::identity::Resource; +use omicron_common::api::external::{IdentityMetadataCreateParams, LookupType}; +use omicron_uuid_kinds::{GenericUuid, MulticastGroupUuid, SledUuid}; + +use crate::authz; +use crate::context::OpContext; +use crate::db::DataStore; +use crate::db::pub_test_utils::helpers::{SledUpdateBuilder, create_project}; + +/// Common test setup for multicast datastore tests. +pub struct TestSetup { + pub authz_project: authz::Project, + pub project_id: Uuid, + pub authz_pool: authz::IpPool, + pub authz_vpc: authz::Vpc, + pub vpc_id: Uuid, + pub sled_id: SledUuid, +} + +/// Create a standard test setup with database, project, IP pool, and sled. +pub async fn create_test_setup( + opctx: &OpContext, + datastore: &DataStore, + pool_name: &'static str, + project_name: &'static str, +) -> TestSetup { + create_test_setup_with_range( + opctx, + datastore, + pool_name, + project_name, + (224, 10, 1, 1), + (224, 10, 1, 254), + ) + .await +} + +/// Create a test setup with a custom IPv4 multicast range for the pool. +pub async fn create_test_setup_with_range( + opctx: &OpContext, + datastore: &DataStore, + pool_name: &'static str, + project_name: &'static str, + range_start: (u8, u8, u8, u8), + range_end: (u8, u8, u8, u8), +) -> TestSetup { + // Create project using the existing helper + let (authz_project, project) = + create_project(opctx, datastore, project_name).await; + let project_id = project.id(); + + // Create VPC for multicast groups + let vpc_params = params::VpcCreate { + identity: IdentityMetadataCreateParams { + name: format!("{}-vpc", project_name).parse().unwrap(), + description: format!("Test VPC for project {}", project_name), + }, + ipv6_prefix: None, + dns_name: format!("{}-vpc", project_name).parse().unwrap(), + }; + + let vpc = IncompleteVpc::new( + Uuid::new_v4(), + project_id, + Uuid::new_v4(), // system_router_id + vpc_params, + ) + .expect("Should create incomplete VPC"); + + let (authz_vpc, vpc_record) = datastore + .project_create_vpc(&opctx, &authz_project, vpc) + .await + .expect("Should create VPC"); + let vpc_id = vpc_record.id(); + + // Create multicast IP pool + let pool_identity = IdentityMetadataCreateParams { + name: pool_name.parse().unwrap(), + description: format!("Test multicast pool: {}", pool_name), + }; + + let ip_pool = datastore + .ip_pool_create( + &opctx, + IpPool::new_multicast( + &pool_identity, + IpVersion::V4, + IpPoolReservationType::ExternalSilos, + ), + ) + .await + .expect("Should create multicast IP pool"); + + let authz_pool = authz::IpPool::new( + crate::authz::FLEET, + ip_pool.id(), + LookupType::ById(ip_pool.id()), + ); + + // Add range to pool + let range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new( + range_start.0, + range_start.1, + range_start.2, + range_start.3, + ), + Ipv4Addr::new(range_end.0, range_end.1, range_end.2, range_end.3), + ) + .unwrap(), + ); + datastore + .ip_pool_add_range(&opctx, &authz_pool, &ip_pool, &range) + .await + .expect("Should add multicast range to pool"); + + // Link pool to silo + let link = IpPoolResource { + resource_id: opctx.authn.silo_required().unwrap().id(), + resource_type: IpPoolResourceType::Silo, + ip_pool_id: ip_pool.id(), + is_default: false, + }; + datastore + .ip_pool_link_silo(&opctx, link) + .await + .expect("Should link multicast pool to silo"); + + // Create sled + let sled_id = SledUuid::new_v4(); + let sled_update = SledUpdateBuilder::new().sled_id(sled_id).build(); + datastore.sled_upsert(sled_update).await.unwrap(); + + TestSetup { + authz_project, + project_id, + authz_pool, + authz_vpc, + vpc_id, + sled_id, + } +} + +/// Create a test multicast group with the given parameters. +pub async fn create_test_group( + opctx: &OpContext, + datastore: &DataStore, + setup: &TestSetup, + group_name: &str, + multicast_ip: &str, +) -> nexus_db_model::ExternalMulticastGroup { + create_test_group_with_state( + opctx, + datastore, + setup, + group_name, + multicast_ip, + false, + ) + .await +} + +/// Create a test multicast group, optionally transitioning to "Active" state. +pub async fn create_test_group_with_state( + opctx: &OpContext, + datastore: &DataStore, + setup: &TestSetup, + group_name: &str, + multicast_ip: &str, + make_active: bool, +) -> nexus_db_model::ExternalMulticastGroup { + let params = params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: group_name.parse().unwrap(), + description: format!("Test group: {}", group_name), + }, + multicast_ip: Some(multicast_ip.parse().unwrap()), + source_ips: None, + pool: None, + mvlan: None, + }; + + let group = datastore + .multicast_group_create(&opctx, ¶ms, Some(setup.authz_pool.clone())) + .await + .expect("Should create multicast group"); + + if make_active { + datastore + .multicast_group_set_state( + opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + MulticastGroupState::Active, + ) + .await + .expect("Should transition group to 'Active' state"); + } + + group +} diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index f8c07314a42..a9353171cc0 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -960,6 +960,7 @@ mod tests { start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }); let conn = self diff --git a/nexus/db-queries/src/db/queries/external_multicast_group.rs b/nexus/db-queries/src/db/queries/external_multicast_group.rs new file mode 100644 index 00000000000..2324e3bc4b1 --- /dev/null +++ b/nexus/db-queries/src/db/queries/external_multicast_group.rs @@ -0,0 +1,272 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Queries for allocating external, customer-facing multicast groups from IP +//! pools. +//! +//! Based on [`super::external_ip`] allocation code, adapted for multicast +//! group semantics. + +use chrono::{DateTime, Utc}; +use diesel::pg::Pg; +use diesel::query_builder::{AstPass, Query, QueryFragment, QueryId}; +use diesel::{Column, QueryResult, RunQueryDsl, sql_types}; +use ipnetwork::IpNetwork; +use uuid::Uuid; + +use nexus_db_lookup::DbConnection; +use nexus_db_schema::schema; + +use crate::db::model::{ + ExternalMulticastGroup, Generation, IncompleteExternalMulticastGroup, + MulticastGroupState, Name, Vni, +}; +use crate::db::true_or_cast_error::matches_sentinel; + +const REALLOCATION_WITH_DIFFERENT_MULTICAST_GROUP_SENTINEL: &'static str = + "Reallocation of multicast group with different configuration"; + +/// Converts multicast group allocation errors to external errors. +pub fn from_diesel( + e: diesel::result::Error, +) -> omicron_common::api::external::Error { + let sentinels = [REALLOCATION_WITH_DIFFERENT_MULTICAST_GROUP_SENTINEL]; + if let Some(sentinel) = matches_sentinel(&e, &sentinels) { + match sentinel { + REALLOCATION_WITH_DIFFERENT_MULTICAST_GROUP_SENTINEL => { + return omicron_common::api::external::Error::invalid_request( + "Re-allocating multicast group with different configuration", + ); + } + // Fall-through to the generic error conversion. + _ => {} + } + } + + nexus_db_errors::public_error_from_diesel( + e, + nexus_db_errors::ErrorHandler::Server, + ) +} + +/// Query to allocate next available external multicast group address from IP pools. +/// +/// Similar pattern to [`super::external_ip::NextExternalIp`] but for multicast +/// addresses. Handles pool-based allocation, explicit address requests, and +/// idempotency. +pub struct NextExternalMulticastGroup { + group: IncompleteExternalMulticastGroup, + now: DateTime, +} + +impl NextExternalMulticastGroup { + pub fn new(group: IncompleteExternalMulticastGroup) -> Self { + let now = Utc::now(); + Self { group, now } + } + + fn push_next_multicast_ip_subquery<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> QueryResult<()> { + out.push_sql("SELECT "); + out.push_bind_param::(&self.group.id)?; + out.push_sql(" AS id, "); + + out.push_bind_param::(&self.group.name)?; + out.push_sql(" AS name, "); + out.push_bind_param::( + &self.group.description, + )?; + out.push_sql(" AS description, "); + + out.push_bind_param::>( + &self.now, + )?; + out.push_sql(" AS time_created, "); + out.push_bind_param::>( + &self.now, + )?; + out.push_sql(" AS time_modified, "); + + out.push_bind_param::, Option>>(&None)?; + out.push_sql(" AS time_deleted, "); + + // Pool ID from the candidates subquery (like external IP) + out.push_sql("ip_pool_id, "); + + // Pool range ID from the candidates subquery + out.push_sql("ip_pool_range_id, "); + + // VNI + out.push_bind_param::(&self.group.vni)?; + out.push_sql(" AS vni, "); + + // The multicast IP comes from the candidates subquery + out.push_sql("candidate_ip AS multicast_ip, "); + + // Handle source IPs array + out.push_sql("ARRAY["); + for (i, source_ip) in self.group.source_ips.iter().enumerate() { + if i > 0 { + out.push_sql(", "); + } + out.push_bind_param::( + source_ip, + )?; + } + out.push_sql("]::inet[] AS source_ips, "); + + // MVLAN for external uplink forwarding + out.push_bind_param::, Option>(&self.group.mvlan)?; + out.push_sql(" AS mvlan, "); + + out.push_bind_param::, Option>(&None)?; + out.push_sql(" AS underlay_group_id, "); + + out.push_bind_param::, Option>(&self.group.tag)?; + out.push_sql(" AS tag, "); + + // New multicast groups start in "Creating" state (RPW pattern) + out.push_bind_param::(&MulticastGroupState::Creating)?; + out.push_sql(" AS state, "); + + out.push_sql("nextval('omicron.public.multicast_group_version') AS version_added, "); + out.push_bind_param::, Option>(&None)?; + out.push_sql(" AS version_removed"); + + // FROM the candidates subquery with LEFT JOIN (like external IP) + out.push_sql(" FROM ("); + self.push_address_candidates_subquery(out.reborrow())?; + out.push_sql(") LEFT OUTER JOIN "); + schema::multicast_group::table.walk_ast(out.reborrow())?; + out.push_sql( + " ON (multicast_ip = candidate_ip AND time_deleted IS NULL)", + ); + out.push_sql( + " WHERE candidate_ip IS NOT NULL AND multicast_ip IS NULL LIMIT 1", + ); + + Ok(()) + } + + fn push_address_candidates_subquery<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> QueryResult<()> { + use schema::ip_pool_range::dsl; + + out.push_sql("SELECT "); + out.push_identifier(dsl::ip_pool_id::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::id::NAME)?; + out.push_sql(" AS ip_pool_range_id, "); + + // Handle explicit address vs automatic allocation + if let Some(explicit_addr) = &self.group.explicit_address { + out.push_sql("CASE "); + out.push_identifier(dsl::first_address::NAME)?; + out.push_sql(" <= "); + out.push_bind_param::(explicit_addr)?; + out.push_sql(" AND "); + out.push_bind_param::(explicit_addr)?; + out.push_sql(" <= "); + out.push_identifier(dsl::last_address::NAME)?; + out.push_sql(" WHEN TRUE THEN "); + out.push_bind_param::(explicit_addr)?; + out.push_sql(" ELSE NULL END"); + } else { + // Generate series of candidate IPs (like external IP does) + out.push_identifier(dsl::first_address::NAME)?; + out.push_sql(" + generate_series(0, "); + out.push_identifier(dsl::last_address::NAME)?; + out.push_sql(" - "); + out.push_identifier(dsl::first_address::NAME)?; + out.push_sql(")"); + } + + out.push_sql(" AS candidate_ip FROM "); + schema::ip_pool_range::table.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(dsl::ip_pool_id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.group.ip_pool_id)?; + out.push_sql(" AND "); + out.push_identifier(dsl::time_deleted::NAME)?; + out.push_sql(" IS NULL"); + // Filter for multicast address ranges (224.0.0.0/4 for IPv4, + // ff00::/8 for IPv6) + out.push_sql(" AND ("); + out.push_identifier(dsl::first_address::NAME)?; + out.push_sql(" << '224.0.0.0/4'::inet OR "); + out.push_identifier(dsl::first_address::NAME)?; + out.push_sql(" << 'ff00::/8'::inet)"); + + Ok(()) + } + + fn push_prior_allocation_subquery<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> QueryResult<()> { + out.push_sql("SELECT * FROM "); + schema::multicast_group::table.walk_ast(out.reborrow())?; + out.push_sql(" WHERE id = "); + out.push_bind_param::(&self.group.id)?; + out.push_sql(" AND time_deleted IS NULL"); + Ok(()) + } +} + +impl QueryFragment for NextExternalMulticastGroup { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + out.unsafe_to_cache_prepared(); + + // Create CTE for candidate multicast group + out.push_sql("WITH next_external_multicast_group AS ("); + self.push_next_multicast_ip_subquery(out.reborrow())?; + out.push_sql("), "); + + // Check for existing allocation (idempotency) + out.push_sql("previously_allocated_group AS ("); + self.push_prior_allocation_subquery(out.reborrow())?; + out.push_sql("), "); + + // Insert new record or return existing one + out.push_sql("multicast_group AS ("); + out.push_sql("INSERT INTO "); + schema::multicast_group::table.walk_ast(out.reborrow())?; + out.push_sql( + " (id, name, description, time_created, time_modified, time_deleted, ip_pool_id, ip_pool_range_id, vni, multicast_ip, source_ips, mvlan, underlay_group_id, tag, state, version_added, version_removed) + SELECT id, name, description, time_created, time_modified, time_deleted, ip_pool_id, ip_pool_range_id, vni, multicast_ip, source_ips, mvlan, underlay_group_id, tag, state, version_added, version_removed FROM next_external_multicast_group + WHERE NOT EXISTS (SELECT 1 FROM previously_allocated_group) + RETURNING id, name, description, time_created, time_modified, time_deleted, ip_pool_id, ip_pool_range_id, vni, multicast_ip, source_ips, mvlan, underlay_group_id, tag, state, version_added, version_removed", + ); + out.push_sql(") "); + + // Return either the newly inserted or previously allocated group + out.push_sql( + "SELECT id, name, description, time_created, time_modified, time_deleted, ip_pool_id, ip_pool_range_id, vni, multicast_ip, source_ips, mvlan, underlay_group_id, tag, state, version_added, version_removed FROM previously_allocated_group + UNION ALL + SELECT id, name, description, time_created, time_modified, time_deleted, ip_pool_id, ip_pool_range_id, vni, multicast_ip, source_ips, mvlan, underlay_group_id, tag, state, version_added, version_removed FROM multicast_group", + ); + + Ok(()) + } +} + +impl QueryId for NextExternalMulticastGroup { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl Query for NextExternalMulticastGroup { + type SqlType = <>::SelectExpression as diesel::Expression>::SqlType; +} + +impl RunQueryDsl for NextExternalMulticastGroup {} diff --git a/nexus/db-queries/src/db/queries/mod.rs b/nexus/db-queries/src/db/queries/mod.rs index 78e4dc55955..9c6e0d8db60 100644 --- a/nexus/db-queries/src/db/queries/mod.rs +++ b/nexus/db-queries/src/db/queries/mod.rs @@ -7,6 +7,7 @@ pub mod disk; pub mod external_ip; +pub mod external_multicast_group; pub mod ip_pool; #[macro_use] mod next_item; diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index 0881f5d0560..2ebae5d44dc 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -2023,6 +2023,7 @@ mod tests { start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let instance = Instance::new(instance_id, project_id, ¶ms); diff --git a/nexus/db-queries/src/policy_test/resource_builder.rs b/nexus/db-queries/src/policy_test/resource_builder.rs index d6dd1f2721b..2605ac32aa7 100644 --- a/nexus/db-queries/src/policy_test/resource_builder.rs +++ b/nexus/db-queries/src/policy_test/resource_builder.rs @@ -283,6 +283,7 @@ impl_dyn_authorized_resource_for_resource!(authz::Alert); impl_dyn_authorized_resource_for_resource!(authz::AlertReceiver); impl_dyn_authorized_resource_for_resource!(authz::WebhookSecret); impl_dyn_authorized_resource_for_resource!(authz::Zpool); +impl_dyn_authorized_resource_for_resource!(authz::MulticastGroup); impl_dyn_authorized_resource_for_global!(authz::AlertClassList); impl_dyn_authorized_resource_for_global!(authz::BlueprintConfig); @@ -291,6 +292,7 @@ impl_dyn_authorized_resource_for_global!(authz::Database); impl_dyn_authorized_resource_for_global!(authz::DeviceAuthRequestList); impl_dyn_authorized_resource_for_global!(authz::DnsConfig); impl_dyn_authorized_resource_for_global!(authz::IpPoolList); +impl_dyn_authorized_resource_for_global!(authz::MulticastGroupList); impl_dyn_authorized_resource_for_global!(authz::AuditLog); impl_dyn_authorized_resource_for_global!(authz::Inventory); impl_dyn_authorized_resource_for_global!(authz::QuiesceState); diff --git a/nexus/db-queries/src/policy_test/resources.rs b/nexus/db-queries/src/policy_test/resources.rs index f8a6f2890e2..3cafe9ba148 100644 --- a/nexus/db-queries/src/policy_test/resources.rs +++ b/nexus/db-queries/src/policy_test/resources.rs @@ -76,6 +76,7 @@ pub async fn make_resources( builder.new_resource(authz::DEVICE_AUTH_REQUEST_LIST); builder.new_resource(authz::INVENTORY); builder.new_resource(authz::IP_POOL_LIST); + builder.new_resource(authz::MULTICAST_GROUP_LIST); builder.new_resource(authz::QUIESCE_STATE); builder.new_resource(authz::UPDATE_TRUST_ROOT_LIST); builder.new_resource(authz::TARGET_RELEASE_CONFIG); @@ -368,6 +369,14 @@ async fn make_project( Uuid::new_v4(), LookupType::ByName(disk_name.clone()), )); + + let multicast_group_name = format!("{project_name}-multicast-group1"); + builder.new_resource(authz::MulticastGroup::new( + authz::FLEET, + Uuid::new_v4(), + LookupType::ByName(multicast_group_name), + )); + builder.new_resource(affinity_group.clone()); builder.new_resource(anti_affinity_group.clone()); builder.new_resource(instance.clone()); diff --git a/nexus/db-queries/tests/output/authz-roles.out b/nexus/db-queries/tests/output/authz-roles.out index c11031b873a..f16753bbc88 100644 --- a/nexus/db-queries/tests/output/authz-roles.out +++ b/nexus/db-queries/tests/output/authz-roles.out @@ -134,6 +134,23 @@ resource: authz::IpPoolList unauthenticated ! ! ! ! ! ! ! ! scim ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ +resource: authz::MulticastGroupList + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✘ ✔ ✘ ✘ ✘ ✔ ✘ + fleet-collaborator ✘ ✘ ✔ ✘ ✘ ✘ ✔ ✘ + fleet-viewer ✘ ✘ ✔ ✘ ✘ ✘ ✔ ✘ + silo1-admin ✘ ✘ ✔ ✘ ✘ ✘ ✔ ✘ + silo1-collaborator ✘ ✘ ✔ ✘ ✘ ✘ ✔ ✘ + silo1-limited-collaborator ✘ ✘ ✔ ✘ ✘ ✘ ✔ ✘ + silo1-viewer ✘ ✘ ✔ ✘ ✘ ✘ ✔ ✘ + silo1-proj1-admin ✘ ✘ ✔ ✘ ✘ ✘ ✔ ✘ + silo1-proj1-collaborator ✘ ✘ ✔ ✘ ✘ ✘ ✔ ✘ + silo1-proj1-limited-collaborator ✘ ✘ ✔ ✘ ✘ ✘ ✔ ✘ + silo1-proj1-viewer ✘ ✘ ✔ ✘ ✘ ✘ ✔ ✘ + unauthenticated ! ! ! ! ! ! ! ! + scim ✘ ✘ ✔ ✘ ✘ ✘ ✔ ✘ + resource: authz::QuiesceState USER Q R LC RP M MP CC D @@ -508,6 +525,23 @@ resource: Disk "silo1-proj1-disk1" unauthenticated ! ! ! ! ! ! ! ! scim ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ +resource: MulticastGroup "silo1-proj1-multicast-group1" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + fleet-collaborator ✘ ✔ ✔ ✔ ✔ ✔ ✘ ✔ + fleet-viewer ✘ ✔ ✔ ✔ ✔ ✔ ✘ ✔ + silo1-admin ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-collaborator ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-limited-collaborator ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-viewer ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-proj1-admin ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-proj1-collaborator ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-proj1-limited-collaborator ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-proj1-viewer ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + unauthenticated ! ! ! ! ! ! ! ! + scim ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + resource: AffinityGroup "silo1-proj1-affinity-group1" USER Q R LC RP M MP CC D @@ -763,6 +797,23 @@ resource: Disk "silo1-proj2-disk1" unauthenticated ! ! ! ! ! ! ! ! scim ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ +resource: MulticastGroup "silo1-proj2-multicast-group1" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + fleet-collaborator ✘ ✔ ✔ ✔ ✔ ✔ ✘ ✔ + fleet-viewer ✘ ✔ ✔ ✔ ✔ ✔ ✘ ✔ + silo1-admin ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-collaborator ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-limited-collaborator ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-viewer ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-proj1-admin ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-proj1-collaborator ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-proj1-limited-collaborator ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-proj1-viewer ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + unauthenticated ! ! ! ! ! ! ! ! + scim ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + resource: AffinityGroup "silo1-proj2-affinity-group1" USER Q R LC RP M MP CC D @@ -1307,6 +1358,23 @@ resource: Disk "silo2-proj1-disk1" unauthenticated ! ! ! ! ! ! ! ! scim ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ +resource: MulticastGroup "silo2-proj1-multicast-group1" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + fleet-collaborator ✘ ✔ ✔ ✔ ✔ ✔ ✘ ✔ + fleet-viewer ✘ ✔ ✔ ✔ ✔ ✔ ✘ ✔ + silo1-admin ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-collaborator ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-limited-collaborator ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-viewer ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-proj1-admin ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-proj1-collaborator ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-proj1-limited-collaborator ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + silo1-proj1-viewer ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + unauthenticated ! ! ! ! ! ! ! ! + scim ✘ ✔ ✘ ✔ ✔ ✔ ✘ ✔ + resource: AffinityGroup "silo2-proj1-affinity-group1" USER Q R LC RP M MP CC D diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index 849c780c985..753e4223d0d 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -63,6 +63,8 @@ define_enums! { IpPoolTypeEnum => "ip_pool_type", IpVersionEnum => "ip_version", MigrationStateEnum => "migration_state", + MulticastGroupStateEnum => "multicast_group_state", + MulticastGroupMemberStateEnum => "multicast_group_member_state", NetworkInterfaceKindEnum => "network_interface_kind", OximeterReadModeEnum => "oximeter_read_mode", PhysicalDiskKindEnum => "physical_disk_kind", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 7804d119af3..8700c739972 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -2773,6 +2773,57 @@ table! { volume_id -> Nullable, } } + +table! { + multicast_group (id) { + id -> Uuid, + name -> Text, + description -> Text, + time_created -> Timestamptz, + time_modified -> Timestamptz, + time_deleted -> Nullable, + ip_pool_id -> Uuid, + ip_pool_range_id -> Uuid, + vni -> Int4, + multicast_ip -> Inet, + source_ips -> Array, + mvlan -> Nullable, + underlay_group_id -> Nullable, + tag -> Nullable, + state -> crate::enums::MulticastGroupStateEnum, + version_added -> Int8, + version_removed -> Nullable, + } +} + +table! { + multicast_group_member (id) { + id -> Uuid, + time_created -> Timestamptz, + time_modified -> Timestamptz, + time_deleted -> Nullable, + external_group_id -> Uuid, + parent_id -> Uuid, + sled_id -> Nullable, + state -> crate::enums::MulticastGroupMemberStateEnum, + version_added -> Int8, + version_removed -> Nullable, + } +} + +table! { + underlay_multicast_group (id) { + id -> Uuid, + time_created -> Timestamptz, + time_modified -> Timestamptz, + time_deleted -> Nullable, + multicast_ip -> Inet, + tag -> Nullable, + version_added -> Int8, + version_removed -> Nullable, + } +} + allow_tables_to_appear_in_same_query!(user_data_export, snapshot, image); table! { diff --git a/nexus/examples/config-second.toml b/nexus/examples/config-second.toml index 7af309d0a56..5a7586e29e1 100644 --- a/nexus/examples/config-second.toml +++ b/nexus/examples/config-second.toml @@ -180,6 +180,13 @@ fm.sitrep_load_period_secs = 15 # only necessary to ensure that it always happens eventually. fm.sitrep_gc_period_secs = 600 probe_distributor.period_secs = 60 +multicast_reconciler.period_secs = 60 +# TTL for sled-to-backplane-port mapping cache +# Default: 3600 seconds (1 hour) - detects new sleds and inventory changes +# multicast_reconciler.sled_cache_ttl_secs = 3600 +# TTL for backplane topology cache (static platform configuration) +# Default: 86400 seconds (24 hours) - refreshed on-demand when validation fails +# multicast_reconciler.backplane_cache_ttl_secs = 86400 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 90f46fedd47..86eaee939ae 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -164,6 +164,13 @@ fm.sitrep_load_period_secs = 15 # only necessary to ensure that it always happens eventually. fm.sitrep_gc_period_secs = 600 probe_distributor.period_secs = 60 +multicast_reconciler.period_secs = 60 +# TTL for sled-to-backplane-port mapping cache +# Default: 3600 seconds (1 hour) - detects new sleds and inventory changes +# multicast_reconciler.sled_cache_ttl_secs = 3600 +# TTL for backplane topology cache (static platform configuration) +# Default: 86400 seconds (24 hours) - refreshed on-demand when validation fails +# multicast_reconciler.backplane_cache_ttl_secs = 86400 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/external-api/output/nexus_tags.txt b/nexus/external-api/output/nexus_tags.txt index da5f2eee772..929fb6ce25e 100644 --- a/nexus/external-api/output/nexus_tags.txt +++ b/nexus/external-api/output/nexus_tags.txt @@ -49,6 +49,18 @@ affinity_group_member_list GET /v1/affinity-groups/{affinity_ affinity_group_update PUT /v1/affinity-groups/{affinity_group} affinity_group_view GET /v1/affinity-groups/{affinity_group} instance_affinity_group_list GET /v1/instances/{instance}/affinity-groups +instance_multicast_group_join PUT /v1/instances/{instance}/multicast-groups/{multicast_group} +instance_multicast_group_leave DELETE /v1/instances/{instance}/multicast-groups/{multicast_group} +instance_multicast_group_list GET /v1/instances/{instance}/multicast-groups +lookup_multicast_group_by_ip GET /v1/system/multicast-groups/by-ip/{address} +multicast_group_create POST /v1/multicast-groups +multicast_group_delete DELETE /v1/multicast-groups/{multicast_group} +multicast_group_list GET /v1/multicast-groups +multicast_group_member_add POST /v1/multicast-groups/{multicast_group}/members +multicast_group_member_list GET /v1/multicast-groups/{multicast_group}/members +multicast_group_member_remove DELETE /v1/multicast-groups/{multicast_group}/members/{instance} +multicast_group_update PUT /v1/multicast-groups/{multicast_group} +multicast_group_view GET /v1/multicast-groups/{multicast_group} probe_create POST /experimental/v1/probes probe_delete DELETE /experimental/v1/probes/{probe} probe_list GET /experimental/v1/probes diff --git a/nexus/external-api/src/lib.rs b/nexus/external-api/src/lib.rs index d8a5f578ea4..2c6161c42f2 100644 --- a/nexus/external-api/src/lib.rs +++ b/nexus/external-api/src/lib.rs @@ -19,7 +19,10 @@ use http::Response; use ipnetwork::IpNetwork; use nexus_types::{ authn::cookies::Cookies, - external_api::{headers, params, shared, views}, + external_api::{ + headers, params, shared, + views::{self, MulticastGroupMember}, + }, }; use omicron_common::api::external::{ http_pagination::{ @@ -142,6 +145,12 @@ const PUT_UPDATE_REPOSITORY_MAX_BYTES: usize = 4 * GIB; url = "http://docs.oxide.computer/api/metrics" } }, + "multicast-groups" = { + description = "Multicast groups provide efficient one-to-many network communication.", + external_docs = { + url = "http://docs.oxide.computer/api/multicast-groups" + } + }, "policy" = { description = "System-wide IAM policy", external_docs = { @@ -1226,6 +1235,128 @@ pub trait NexusExternalApi { query_params: Query, ) -> Result, HttpError>; + // Multicast Groups + + /// List all multicast groups. + #[endpoint { + method = GET, + path = "/v1/multicast-groups", + tags = ["experimental"], + }] + async fn multicast_group_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Create a multicast group. + /// + /// Multicast groups are fleet-scoped resources that can be joined by + /// instances across projects and silos. A single multicast IP serves + /// all group members regardless of project or silo boundaries. + #[endpoint { + method = POST, + path = "/v1/multicast-groups", + tags = ["experimental"], + }] + async fn multicast_group_create( + rqctx: RequestContext, + group_params: TypedBody, + ) -> Result, HttpError>; + + /// Fetch a multicast group. + #[endpoint { + method = GET, + path = "/v1/multicast-groups/{multicast_group}", + tags = ["experimental"], + }] + async fn multicast_group_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Update a multicast group. + #[endpoint { + method = PUT, + path = "/v1/multicast-groups/{multicast_group}", + tags = ["experimental"], + }] + async fn multicast_group_update( + rqctx: RequestContext, + path_params: Path, + updated_group: TypedBody, + ) -> Result, HttpError>; + + /// Delete a multicast group. + #[endpoint { + method = DELETE, + path = "/v1/multicast-groups/{multicast_group}", + tags = ["experimental"], + }] + async fn multicast_group_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result; + + /// Look up multicast group by IP address. + #[endpoint { + method = GET, + path = "/v1/system/multicast-groups/by-ip/{address}", + tags = ["experimental"], + }] + async fn lookup_multicast_group_by_ip( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// List members of a multicast group. + #[endpoint { + method = GET, + path = "/v1/multicast-groups/{multicast_group}/members", + tags = ["experimental"], + }] + async fn multicast_group_member_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result>, HttpError>; + + /// Add instance to a multicast group. + /// + /// Functionally equivalent to updating the instance's `multicast_groups` field. + /// Both approaches modify the same underlying membership and trigger the same + /// reconciliation logic. + /// + /// Specify instance by name (requires `?project=`) or UUID. + #[endpoint { + method = POST, + path = "/v1/multicast-groups/{multicast_group}/members", + tags = ["experimental"], + }] + async fn multicast_group_member_add( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + member_params: TypedBody, + ) -> Result, HttpError>; + + /// Remove instance from a multicast group. + /// + /// Functionally equivalent to removing the group from the instance's + /// `multicast_groups` field. Both approaches modify the same underlying + /// membership and trigger reconciliation. + /// + /// Specify instance by name (requires `?project=`) or UUID. + #[endpoint { + method = DELETE, + path = "/v1/multicast-groups/{multicast_group}/members/{instance}", + tags = ["experimental"], + }] + async fn multicast_group_member_remove( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + // Disks /// List disks @@ -2437,6 +2568,55 @@ pub trait NexusExternalApi { query_params: Query, ) -> Result; + // Instance Multicast Groups + + /// List multicast groups for instance + #[endpoint { + method = GET, + path = "/v1/instances/{instance}/multicast-groups", + tags = ["experimental"], + }] + async fn instance_multicast_group_list( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result< + HttpResponseOk>, + HttpError, + >; + + /// Join multicast group. + /// + /// This is functionally equivalent to adding the instance via the group's + /// member management endpoint or updating the instance's `multicast_groups` + /// field. All approaches modify the same membership and trigger reconciliation. + #[endpoint { + method = PUT, + path = "/v1/instances/{instance}/multicast-groups/{multicast_group}", + tags = ["experimental"], + }] + async fn instance_multicast_group_join( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Leave multicast group. + /// + /// This is functionally equivalent to removing the instance via the group's + /// member management endpoint or updating the instance's `multicast_groups` + /// field. All approaches modify the same membership and trigger reconciliation. + #[endpoint { + method = DELETE, + path = "/v1/instances/{instance}/multicast-groups/{multicast_group}", + tags = ["experimental"], + }] + async fn instance_multicast_group_leave( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + // Snapshots /// List snapshots diff --git a/nexus/inventory/tests/output/collector_basic.txt b/nexus/inventory/tests/output/collector_basic.txt index abd0a5f0e71..1d9f9830ab6 100644 --- a/nexus/inventory/tests/output/collector_basic.txt +++ b/nexus/inventory/tests/output/collector_basic.txt @@ -3,8 +3,8 @@ baseboards: part "FAKE_SIM_SIDECAR" serial "SimSidecar1" part "i86pc" serial "SimGimlet00" part "i86pc" serial "SimGimlet01" - part "sim-gimlet" serial "sim-03265caf-da7d-46c7-b1c2-39fa90ce5c65" - part "sim-gimlet" serial "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" + part "i86pc" serial "sim-03265caf-da7d-46c7-b1c2-39fa90ce5c65" + part "i86pc" serial "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" cabooses: board "SimGimletSp" name "SimGimlet" version "0.0.1" git_commit "fefefefe" sign None @@ -84,7 +84,7 @@ rot pages found: sled agents found: sled 03265caf-da7d-46c7-b1c2-39fa90ce5c65 (Scrimlet) - baseboard Some(BaseboardId { part_number: "sim-gimlet", serial_number: "sim-03265caf-da7d-46c7-b1c2-39fa90ce5c65" }) + baseboard Some(BaseboardId { part_number: "i86pc", serial_number: "sim-03265caf-da7d-46c7-b1c2-39fa90ce5c65" }) ledgered sled config: generation: 3 remove_mupdate_override: None @@ -100,7 +100,7 @@ sled agents found: result for zone 8b88a56f-3eb6-4d80-ba42-75d867bc427d: Ok reconciler task idle sled 9cb9b78f-5614-440c-b66d-e8e81fab69b0 (Scrimlet) - baseboard Some(BaseboardId { part_number: "sim-gimlet", serial_number: "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" }) + baseboard Some(BaseboardId { part_number: "i86pc", serial_number: "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" }) ledgered sled config: generation: 3 remove_mupdate_override: None diff --git a/nexus/inventory/tests/output/collector_sled_agent_errors.txt b/nexus/inventory/tests/output/collector_sled_agent_errors.txt index 094d9381d11..c3772599c7e 100644 --- a/nexus/inventory/tests/output/collector_sled_agent_errors.txt +++ b/nexus/inventory/tests/output/collector_sled_agent_errors.txt @@ -3,7 +3,7 @@ baseboards: part "FAKE_SIM_SIDECAR" serial "SimSidecar1" part "i86pc" serial "SimGimlet00" part "i86pc" serial "SimGimlet01" - part "sim-gimlet" serial "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" + part "i86pc" serial "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" cabooses: board "SimGimletSp" name "SimGimlet" version "0.0.1" git_commit "fefefefe" sign None @@ -83,7 +83,7 @@ rot pages found: sled agents found: sled 9cb9b78f-5614-440c-b66d-e8e81fab69b0 (Scrimlet) - baseboard Some(BaseboardId { part_number: "sim-gimlet", serial_number: "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" }) + baseboard Some(BaseboardId { part_number: "i86pc", serial_number: "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" }) ledgered sled config: generation: 3 remove_mupdate_override: None diff --git a/nexus/mgs-updates/src/test_util/host_phase_2_test_state.rs b/nexus/mgs-updates/src/test_util/host_phase_2_test_state.rs index 17a3a60206a..80082e01377 100644 --- a/nexus/mgs-updates/src/test_util/host_phase_2_test_state.rs +++ b/nexus/mgs-updates/src/test_util/host_phase_2_test_state.rs @@ -127,7 +127,7 @@ impl HostPhase2TestContext { .version_policy(dropshot::VersionPolicy::Dynamic(Box::new( dropshot::ClientSpecifiesVersionInHeader::new( omicron_common::api::VERSION_HEADER, - sled_agent_api::latest_version(), + sled_agent_api::VERSION_MULTICAST_SUPPORT, ), ))) .start() @@ -220,12 +220,13 @@ mod api_impl { use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, }; + use sled_agent_api::v7::InstanceEnsureBody; + use sled_agent_api::v7::InstanceMulticastBody; use sled_agent_api::*; use sled_agent_types::bootstore::BootstoreStatus; use sled_agent_types::disk::DiskEnsureBody; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody; - use sled_agent_types::instance::InstanceEnsureBody; use sled_agent_types::instance::InstanceExternalIpBody; use sled_agent_types::instance::VmmPutStateBody; use sled_agent_types::instance::VmmPutStateResponse; @@ -530,7 +531,15 @@ mod api_impl { unimplemented!() } - async fn vmm_register( + async fn vmm_register_v1( + _rqctx: RequestContext, + _path_params: Path, + _body: TypedBody, + ) -> Result, HttpError> { + unimplemented!() + } + + async fn vmm_register_v7( _rqctx: RequestContext, _path_params: Path, _body: TypedBody, @@ -576,6 +585,50 @@ mod api_impl { unimplemented!() } + async fn vmm_join_multicast_group( + _rqctx: RequestContext, + _path_params: Path, + body: TypedBody, + ) -> Result { + let body_args = body.into_inner(); + match body_args { + InstanceMulticastBody::Join(_) => { + // MGS test utility - just return success for test compatibility + Ok(HttpResponseUpdatedNoContent()) + } + InstanceMulticastBody::Leave(_) => { + // This endpoint is for joining - reject leave operations + Err(HttpError::for_bad_request( + None, + "Join endpoint cannot process Leave operations" + .to_string(), + )) + } + } + } + + async fn vmm_leave_multicast_group( + _rqctx: RequestContext, + _path_params: Path, + body: TypedBody, + ) -> Result { + let body_args = body.into_inner(); + match body_args { + InstanceMulticastBody::Leave(_) => { + // MGS test utility - just return success for test compatibility + Ok(HttpResponseUpdatedNoContent()) + } + InstanceMulticastBody::Join(_) => { + // This endpoint is for leaving - reject join operations + Err(HttpError::for_bad_request( + None, + "Leave endpoint cannot process Join operations" + .to_string(), + )) + } + } + } + async fn disk_put( _rqctx: RequestContext, _path_params: Path, diff --git a/nexus/reconfigurator/execution/src/test_utils.rs b/nexus/reconfigurator/execution/src/test_utils.rs index 0aad3330fe9..737a2b16b59 100644 --- a/nexus/reconfigurator/execution/src/test_utils.rs +++ b/nexus/reconfigurator/execution/src/test_utils.rs @@ -110,8 +110,13 @@ pub fn overridables_for_test( let sled_id = id_str.parse().unwrap(); let ip = Ipv6Addr::LOCALHOST; let mgs_port = cptestctx.gateway.get(&switch_location).unwrap().port; - let dendrite_port = - cptestctx.dendrite.get(&switch_location).unwrap().port; + let dendrite_port = cptestctx + .dendrite + .read() + .unwrap() + .get(&switch_location) + .unwrap() + .port; let mgd_port = cptestctx.mgd.get(&switch_location).unwrap().port; overrides.override_switch_zone_ip(sled_id, ip); overrides.override_dendrite_port(sled_id, dendrite_port); diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 3ed5ff471b2..206223ff5b0 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -112,6 +112,7 @@ use super::tasks::inventory_collection; use super::tasks::inventory_load; use super::tasks::lookup_region_port; use super::tasks::metrics_producer_gc; +use super::tasks::multicast::MulticastGroupReconciler; use super::tasks::nat_cleanup; use super::tasks::phantom_disks; use super::tasks::physical_disk_adoption; @@ -154,6 +155,7 @@ use omicron_uuid_kinds::OmicronZoneUuid; use oximeter::types::ProducerRegistry; use std::collections::BTreeMap; use std::sync::Arc; +use std::sync::atomic::AtomicBool; use tokio::sync::mpsc; use tokio::sync::watch; use update_common::artifacts::ArtifactsWithPlan; @@ -167,6 +169,8 @@ pub(crate) struct BackgroundTasksInternal { pub(crate) external_endpoints: watch::Receiver>, inventory_load_rx: watch::Receiver>>, + /// Flag to signal cache invalidation for multicast reconciler + pub(crate) multicast_invalidate_cache: Option>, } impl BackgroundTasksInternal { @@ -189,6 +193,7 @@ pub struct BackgroundTasksInitializer { external_endpoints_tx: watch::Sender>, inventory_load_tx: watch::Sender>>, + multicast_invalidate_flag: Arc, } impl BackgroundTasksInitializer { @@ -207,10 +212,15 @@ impl BackgroundTasksInitializer { watch::channel(None); let (inventory_load_tx, inventory_load_rx) = watch::channel(None); + // Create the multicast cache invalidation flag that will be shared + // between the reconciler and Nexus (via `BackgroundTasksInternal`) + let multicast_invalidate_flag = Arc::new(AtomicBool::new(false)); + let initializer = BackgroundTasksInitializer { driver: Driver::new(), external_endpoints_tx, inventory_load_tx, + multicast_invalidate_flag: multicast_invalidate_flag.clone(), }; let background_tasks = BackgroundTasks { @@ -261,7 +271,12 @@ impl BackgroundTasksInitializer { task_fm_sitrep_loader: Activator::new(), task_fm_sitrep_gc: Activator::new(), task_probe_distributor: Activator::new(), + task_multicast_reconciler: Activator::new(), + // Handles to activate background tasks that do not get used by Nexus + // at-large. These background tasks are implementation details as far as + // the rest of Nexus is concerned. These handles don't even really need to + // be here, but it's convenient. task_internal_dns_propagation: Activator::new(), task_external_dns_propagation: Activator::new(), }; @@ -269,6 +284,7 @@ impl BackgroundTasksInitializer { let internal = BackgroundTasksInternal { external_endpoints: external_endpoints_rx, inventory_load_rx, + multicast_invalidate_cache: Some(multicast_invalidate_flag), }; (initializer, background_tasks, internal) @@ -344,6 +360,7 @@ impl BackgroundTasksInitializer { task_fm_sitrep_loader, task_fm_sitrep_gc, task_probe_distributor, + task_multicast_reconciler, // Add new background tasks here. Be sure to use this binding in a // call to `Driver::register()` below. That's what actually wires // up the Activator to the corresponding background task. @@ -529,7 +546,7 @@ impl BackgroundTasksInitializer { period: config.inventory.period_secs_load, task_impl: Box::new(inventory_loader), opctx: opctx.child(BTreeMap::new()), - watchers: vec![Box::new(inventory_collect_watcher)], + watchers: vec![Box::new(inventory_collect_watcher.clone())], activator: task_inventory_loader, }); @@ -951,7 +968,7 @@ impl BackgroundTasksInitializer { period: config.region_snapshot_replacement_finish.period_secs, task_impl: Box::new(RegionSnapshotReplacementFinishDetector::new( datastore.clone(), - sagas, + sagas.clone(), )), opctx: opctx.child(BTreeMap::new()), watchers: vec![], @@ -1057,6 +1074,27 @@ impl BackgroundTasksInitializer { } }); + driver.register(TaskDefinition { + name: "multicast_reconciler", + description: "reconciles multicast group and member state with dendrite switch configuration", + period: config.multicast_reconciler.period_secs, + task_impl: Box::new(MulticastGroupReconciler::new( + datastore.clone(), + resolver.clone(), + sagas.clone(), + args.multicast_enabled, + config.multicast_reconciler.sled_cache_ttl_secs, + config.multicast_reconciler.backplane_cache_ttl_secs, + self.multicast_invalidate_flag.clone(), + )), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![ + Box::new(inventory_collect_watcher.clone()), + Box::new(inventory_load_watcher.clone()), + ], + activator: task_multicast_reconciler, + }); + driver.register(TaskDefinition { name: "sp_ereport_ingester", description: "collects error reports from service processors", @@ -1123,6 +1161,8 @@ pub struct BackgroundTasksData { pub datastore: Arc, /// background task configuration pub config: BackgroundTaskConfig, + /// whether multicast functionality is enabled (or not) + pub multicast_enabled: bool, /// rack identifier pub rack_id: Uuid, /// nexus identifier diff --git a/nexus/src/app/background/tasks/instance_reincarnation.rs b/nexus/src/app/background/tasks/instance_reincarnation.rs index 7858676891f..dbb695359a5 100644 --- a/nexus/src/app/background/tasks/instance_reincarnation.rs +++ b/nexus/src/app/background/tasks/instance_reincarnation.rs @@ -396,6 +396,7 @@ mod test { start: state == InstanceState::Vmm, auto_restart_policy, anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, ) .await; diff --git a/nexus/src/app/background/tasks/mod.rs b/nexus/src/app/background/tasks/mod.rs index 815d42c5868..64df7770da1 100644 --- a/nexus/src/app/background/tasks/mod.rs +++ b/nexus/src/app/background/tasks/mod.rs @@ -27,6 +27,7 @@ pub mod inventory_collection; pub mod inventory_load; pub mod lookup_region_port; pub mod metrics_producer_gc; +pub mod multicast; pub mod nat_cleanup; pub mod networking; pub mod phantom_disks; diff --git a/nexus/src/app/background/tasks/multicast/groups.rs b/nexus/src/app/background/tasks/multicast/groups.rs new file mode 100644 index 00000000000..3aa9330e057 --- /dev/null +++ b/nexus/src/app/background/tasks/multicast/groups.rs @@ -0,0 +1,737 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Group-specific multicast reconciler functions. +//! +//! This module handles multicast group lifecycle operations within an RPW +//! (Reliable Persistent Workflow). Groups represent the fundamental +//! multicast forwarding entities represented by dataplane configuration (via +//! DPD) applied on switches. +//! +//! # RPW Group Processing Model +//! +//! Unlike sagas that orchestrate targeted, synchronous changes, the RPW +//! reconciler ensures the dataplane (via DPD) reflects the intended state from +//! the database. +//! Group processing is idempotent and resilient to failures. +//! +//! ## Operations Handled +//! - **"Creating" state**: Initiate DPD "ensure" to apply configuration +//! - **"Active" state**: Detect DPD drift and launch UPDATE saga when DB state differs +//! - **"Deleting" state**: Switch cleanup and database removal +//! - **Extensible processing**: Support for different group types +//! +//! # Group State Transition Matrix +//! +//! The RPW reconciler handles all possible state transitions for multicast +//! groups: +//! +//! ## Group State Lifecycle +//! ```text +//! "Creating" → "Active" → "Deleting" → "Deleted" (removed from DB) +//! ↓ ↓ ↓ +//! (saga=external+underlay) (check+sync) (cleanup) +//! ``` +//! +//! ## State Transition Permutations +//! +//! ### CREATING State Transitions +//! | Condition | Underlay Group | Saga Status | Action | Next State | +//! |-----------|---------------|-------------|--------|------------| +//! | 1 | Missing | N/A | Create underlay + start saga | "Creating" (saga handles →"Active") | +//! | 2 | Exists | N/A | Start DPD ensure | "Creating" (ensure handles →"Active") | +//! | 3 | Any | Failed | Log error, retry next pass | "Creating" (NoChange) | +//! +//! ### ACTIVE State Transitions +//! | Condition | DPD State | Action | Next State | +//! |-----------|-----------|---------|------------| +//! | 1 | Matches DB | No action | "Active" (NoChange) | +//! | 2 | Differs from DB | Launch UPDATE saga to fix drift | "Active" (StateChanged) | +//! | 3 | Missing/error | Launch UPDATE saga to fix drift | "Active" (StateChanged) | +//! +//! ### DELETING State Transitions +//! | Condition | DPD cleanup (external+underlay) | DB cleanup (row) | Action | Next State | +//! |-----------|-------------------------------|-------------------|--------|------------| +//! | 1 | Success | Success | Delete DB row | "Deleted" (no row) | +//! | 2 | Failed | N/A | Log error, retry next pass | "Deleting" (NoChange) | +//! | 3 | Success | Failed | Log error, retry next pass | "Deleting" (NoChange) | +//! +//! Note: "Deleted" is a terminal outcome (the group row no longer exists). All +//! DPD cleanup happens while in "Deleting"; there are no transitions for +//! "Deleted" because the reconciler no longer sees the group. +//! +//! ## Triggering Events +//! - **"Creating"**: User API creates group → DB inserts with "Creating" state +//! - **"Active"**: DPD ensure completes successfully → state = "Active" +//! - **"Deleting"**: User API deletes group → DB sets state = "Deleting" +//! - **"Deleted"**: RPW reconciler completes cleanup → removes from DB +//! +//! ## Error Handling +//! - **Saga failures**: Group stays in "Creating", reconciler retries +//! - **DPD failures**: Group stays in current state, logged and retried +//! - **DB failures**: Operations retried in subsequent reconciler passes +//! - **Partial cleanup**: "Deleting" state preserved until complete cleanup + +use anyhow::Context; +use futures::stream::{self, StreamExt}; +use slog::{debug, error, trace, warn}; + +use nexus_db_model::{MulticastGroup, MulticastGroupState}; +use nexus_db_queries::context::OpContext; +use nexus_types::identity::Resource; +use omicron_common::api::external::DataPageParams; +use omicron_uuid_kinds::{GenericUuid, MulticastGroupUuid}; + +use super::{MulticastGroupReconciler, StateTransition}; +use crate::app::multicast::dataplane::MulticastDataplaneClient; +use crate::app::saga::create_saga_dag; +use crate::app::sagas; + +/// Check if DPD tag matches database name. +fn dpd_state_matches_name( + dpd_group: &dpd_client::types::MulticastGroupExternalResponse, + db_group: &MulticastGroup, +) -> bool { + dpd_group.tag.as_ref().map_or(false, |tag| tag == db_group.name().as_str()) +} + +/// Check if DPD sources match database sources. +fn dpd_state_matches_sources( + dpd_group: &dpd_client::types::MulticastGroupExternalResponse, + db_group: &MulticastGroup, +) -> bool { + let db_sources: Vec<_> = + db_group.source_ips.iter().map(|ip| ip.ip()).collect(); + let dpd_sources = dpd_group.sources.clone().unwrap_or_default(); + + // Extract exact IPs from DPD sources (filter out subnets) + let mut dpd_ips: Vec<_> = dpd_sources + .into_iter() + .filter_map(|src| match src { + dpd_client::types::IpSrc::Exact(ip) => Some(ip), + dpd_client::types::IpSrc::Subnet(_) => None, + }) + .collect(); + + let mut db_sources_sorted = db_sources; + dpd_ips.sort(); + db_sources_sorted.sort(); + + dpd_ips == db_sources_sorted +} + +/// Check if DPD vlan_id matches database mvlan. +fn dpd_state_matches_mvlan( + dpd_group: &dpd_client::types::MulticastGroupExternalResponse, + db_group: &MulticastGroup, +) -> bool { + let db_mvlan = db_group.mvlan.map(|v| v as u16); + dpd_group.external_forwarding.vlan_id == db_mvlan +} + +/// Trait for processing different types of multicast groups +trait GroupStateProcessor { + /// Process a group in "Creating" state. + async fn process_creating( + &self, + reconciler: &MulticastGroupReconciler, + opctx: &OpContext, + group: &MulticastGroup, + ) -> Result; + + /// Process a group in "Deleting" state. + async fn process_deleting( + &self, + reconciler: &MulticastGroupReconciler, + opctx: &OpContext, + group: &MulticastGroup, + dataplane_client: &MulticastDataplaneClient, + ) -> Result; + + /// Process a group in "Active" state (check DPD sync status). + async fn process_active( + &self, + reconciler: &MulticastGroupReconciler, + opctx: &OpContext, + group: &MulticastGroup, + dataplane_client: &MulticastDataplaneClient, + ) -> Result; +} + +/// Processor for external multicast groups (customer/operator-facing). +struct ExternalGroupProcessor; + +impl GroupStateProcessor for ExternalGroupProcessor { + /// Handle groups in "Creating" state. + async fn process_creating( + &self, + reconciler: &MulticastGroupReconciler, + opctx: &OpContext, + group: &MulticastGroup, + ) -> Result { + reconciler.handle_creating_external_group(opctx, group).await + } + + /// Handle groups in "Deleting" state. + async fn process_deleting( + &self, + reconciler: &MulticastGroupReconciler, + opctx: &OpContext, + group: &MulticastGroup, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + reconciler + .handle_deleting_external_group(opctx, group, dataplane_client) + .await + } + + /// Handle groups in "Active" state (check DPD sync status). + async fn process_active( + &self, + reconciler: &MulticastGroupReconciler, + opctx: &OpContext, + group: &MulticastGroup, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + reconciler + .handle_active_external_group(opctx, group, dataplane_client) + .await + } +} + +impl MulticastGroupReconciler { + /// Generic group reconciliation logic for any state. + /// + /// This consolidates the common pattern of: + /// 1. List groups by state + /// 2. Process concurrently + /// 3. Collect and log results + async fn reconcile_groups_by_state( + &self, + opctx: &OpContext, + state: MulticastGroupState, + dataplane_client: Option<&MulticastDataplaneClient>, + ) -> Result { + trace!(opctx.log, "searching for multicast groups"; "state" => %state); + + let groups = self + .datastore + .multicast_groups_list_by_state( + opctx, + state, + &DataPageParams::max_page(), + ) + .await + .map_err(|e| { + error!( + opctx.log, + "failed to list multicast groups"; + "error" => %e, + "state" => %state + ); + format!("failed to list {state} multicast groups") + })?; + + trace!(opctx.log, "found multicast groups"; "count" => groups.len(), "state" => %state); + + // Process groups concurrently with configurable parallelism + let results = stream::iter(groups) + .map(|group| async move { + let result = self + .process_group_state(opctx, &group, dataplane_client) + .await; + (group, result) + }) + .buffer_unordered(self.group_concurrency_limit) + .collect::>() + .await; + + // Handle results with state-appropriate logging and counting + let mut processed = 0; + let total_results = results.len(); + for (group, result) in results { + match result { + Ok(transition) => { + // Count successful transitions based on state expectations + let should_count = match state { + // Creating: count StateChanged and NoChange + MulticastGroupState::Creating => matches!( + transition, + StateTransition::StateChanged + | StateTransition::NoChange + ), + // Deleting: count StateChanged and NeedsCleanup + MulticastGroupState::Deleting => matches!( + transition, + StateTransition::StateChanged + | StateTransition::NeedsCleanup + ), + // Active: count StateChanged and NoChange + MulticastGroupState::Active => matches!( + transition, + StateTransition::StateChanged + | StateTransition::NoChange + ), + MulticastGroupState::Deleted => true, + }; + + if should_count { + processed += 1; + } + + debug!( + opctx.log, + "processed multicast group"; + "state" => %state, + "group" => ?group, + "transition" => ?transition + ); + } + Err(e) => { + warn!( + opctx.log, + "failed to process multicast group"; + "state" => %state, + "group" => ?group, + "error" => %e + ); + } + } + } + + if total_results > 0 { + debug!( + opctx.log, + "group reconciliation completed"; + "state" => %state, + "processed" => processed, + "total" => total_results + ); + } + + Ok(processed) + } + + /// Process multicast groups that are in "Creating" state. + pub async fn reconcile_creating_groups( + &self, + opctx: &OpContext, + ) -> Result { + self.reconcile_groups_by_state( + opctx, + MulticastGroupState::Creating, + None, + ) + .await + } + + /// Process multicast groups that are in "Deleting" state. + pub async fn reconcile_deleting_groups( + &self, + opctx: &OpContext, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + self.reconcile_groups_by_state( + opctx, + MulticastGroupState::Deleting, + Some(dataplane_client), + ) + .await + } + + /// Reconcile active multicast groups with DPD (drift detection and correction). + pub async fn reconcile_active_groups( + &self, + opctx: &OpContext, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + self.reconcile_groups_by_state( + opctx, + MulticastGroupState::Active, + Some(dataplane_client), + ) + .await + } + + /// Main dispatch function for processing group state changes. + /// Routes to appropriate processor based on group type and state. + async fn process_group_state( + &self, + opctx: &OpContext, + group: &MulticastGroup, + dataplane_client: Option<&MulticastDataplaneClient>, + ) -> Result { + // Future: Match on group type to select different processors if + // we add more nuanced group types + let processor = ExternalGroupProcessor; + + match group.state { + MulticastGroupState::Creating => { + processor.process_creating(self, opctx, group).await + } + MulticastGroupState::Deleting => { + let dataplane_client = dataplane_client.ok_or_else(|| { + anyhow::Error::msg( + "dataplane client required for deleting state", + ) + })?; + processor + .process_deleting(self, opctx, group, dataplane_client) + .await + } + MulticastGroupState::Active => { + let dataplane_client = dataplane_client.ok_or_else(|| { + anyhow::Error::msg( + "dataplane client required for active state", + ) + })?; + processor + .process_active(self, opctx, group, dataplane_client) + .await + } + MulticastGroupState::Deleted => { + debug!( + opctx.log, + "cleaning up deleted multicast group from local database"; + "group_id" => %group.id(), + "group_name" => group.name().as_str() + ); + + // Try to delete underlay group record if it exists + if let Some(underlay_group_id) = group.underlay_group_id { + self.datastore + .underlay_multicast_group_delete( + opctx, + underlay_group_id, + ) + .await + .ok(); + } + // Try to delete external group record + self.datastore + .multicast_group_delete( + opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + ) + .await + .ok(); + + Ok(StateTransition::StateChanged) + } + } + } + + /// External group handler for groups in "Creating" state. + async fn handle_creating_external_group( + &self, + opctx: &OpContext, + group: &MulticastGroup, + ) -> Result { + debug!( + opctx.log, + "processing external multicast group transition: 'Creating' → 'Active'"; + "group_id" => %group.id(), + "group_name" => group.name().as_str(), + "multicast_ip" => %group.multicast_ip, + "multicast_scope" => if group.multicast_ip.ip().is_ipv4() { "IPv4_External" } else { "IPv6_External" }, + "vni" => ?group.vni, + "underlay_linked" => group.underlay_group_id.is_some() + ); + + // TODO: Add front port selection for egress traffic (instances → + // external). When transitioning groups to Active, we need to identify + // and validate front ports against DPD's QSFP topology (similar to + // `backplane_map` validation for rear ports). These uplink members use + // `Direction::External` and follow a different lifecycle - added when + // first instance joins, removed when last instance leaves. + // Should integrate with `switch_ports_with_uplinks()` or + // equivalent front port discovery mechanism, which would be + // configurable, and later learned (i.e., via `mcastd`/IGMP). + + // Handle underlay group creation/linking (same logic as before) + self.process_creating_group_inner(opctx, group).await?; + + // Successfully started saga - the saga will handle state transition to "Active". + // We return NoChange because the reconciler shouldn't change the state; + // the saga applies external + underlay configuration via DPD. + Ok(StateTransition::NoChange) + } + + /// External group handler for groups in "Deleting" state. + async fn handle_deleting_external_group( + &self, + opctx: &OpContext, + group: &MulticastGroup, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + debug!( + opctx.log, + "processing external multicast group transition: 'Deleting' → 'Deleted' (switch cleanup)"; + "group_id" => %group.id(), + "group_name" => group.name().as_str(), + "multicast_ip" => %group.multicast_ip, + "multicast_scope" => if group.multicast_ip.ip().is_ipv4() { "IPv4_External" } else { "IPv6_External" }, + "underlay_group_id" => ?group.underlay_group_id, + "dpd_cleanup_required" => true + ); + + self.process_deleting_group_inner(opctx, group, dataplane_client) + .await?; + Ok(StateTransition::StateChanged) + } + + /// External group handler for groups in "Active" state. + /// + /// Checks if the group's DPD state matches the database state. If not, + /// launches the UPDATE saga to sync. This handles updates triggered by + /// the UPDATE API endpoint and self-corrects any DPD drift. + async fn handle_active_external_group( + &self, + opctx: &OpContext, + group: &MulticastGroup, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + let underlay_group_id = group.underlay_group_id.ok_or_else(|| { + anyhow::Error::msg( + "active multicast group missing underlay_group_id", + ) + })?; + + // Check if DPD state matches DB state (read-before-write for drift detection) + let needs_update = match dataplane_client + .fetch_external_group_for_drift_check(group.multicast_ip.ip()) + .await + { + Ok(Some(dpd_group)) => { + let name_matches = dpd_state_matches_name(&dpd_group, group); + let sources_match = + dpd_state_matches_sources(&dpd_group, group); + let mvlan_matches = dpd_state_matches_mvlan(&dpd_group, group); + + let needs_update = + !name_matches || !sources_match || !mvlan_matches; + + if needs_update { + debug!( + opctx.log, + "detected DPD state mismatch for active group"; + "group_id" => %group.id(), + "name_matches" => name_matches, + "sources_match" => sources_match, + "mvlan_matches" => mvlan_matches + ); + } + + needs_update + } + Ok(None) => { + // Group not found in DPD - need to create + debug!( + opctx.log, + "active group not found in DPD, will update"; + "group_id" => %group.id() + ); + true + } + Err(e) => { + // Error fetching from DPD - log and retry + warn!( + opctx.log, + "error fetching active group from DPD, will retry update"; + "group_id" => %group.id(), + "error" => %e + ); + true + } + }; + + if needs_update { + debug!( + opctx.log, + "updating active multicast group in DPD"; + "group_id" => %group.id(), + "multicast_ip" => %group.multicast_ip + ); + + let saga_params = sagas::multicast_group_dpd_update::Params { + serialized_authn: + nexus_db_queries::authn::saga::Serialized::for_opctx(opctx), + external_group_id: group.id(), + underlay_group_id, + }; + + let dag = create_saga_dag::< + sagas::multicast_group_dpd_update::SagaMulticastGroupDpdUpdate, + >(saga_params) + .context("failed to create multicast group update saga")?; + + let saga_id = self + .sagas + .saga_start(dag) + .await + .context("failed to start multicast group update saga")?; + + debug!( + opctx.log, + "DPD update saga initiated for active group"; + "external_group_id" => %group.id(), + "saga_id" => %saga_id, + ); + + Ok(StateTransition::StateChanged) + } else { + Ok(StateTransition::NoChange) + } + } + + /// Process a single multicast group in "Creating" state. + async fn process_creating_group_inner( + &self, + opctx: &OpContext, + group: &MulticastGroup, + ) -> Result<(), anyhow::Error> { + debug!( + opctx.log, + "processing creating multicast group"; + "group" => ?group + ); + + // Handle underlay group creation/linking + let underlay_group = match group.underlay_group_id { + Some(underlay_id) => { + let underlay = self + .datastore + .underlay_multicast_group_fetch(opctx, underlay_id) + .await + .with_context(|| { + format!("failed to fetch linked underlay group {underlay_id}") + })?; + + debug!( + opctx.log, + "found linked underlay group"; + "group" => ?group, + "underlay_group" => ?underlay + ); + underlay + } + None => { + debug!( + opctx.log, + "creating new underlay group"; + "group" => ?group + ); + + // Generate underlay multicast IP using IPv6 admin-local scope (RFC 7346) + let underlay_ip = self + .map_external_to_underlay_ip(group.multicast_ip.ip()) + .context( + "failed to map customer multicast IP to underlay", + )?; + + let new_underlay = self + .datastore + .ensure_underlay_multicast_group( + opctx, + group.clone(), + underlay_ip.into(), + ) + .await + .context("failed to create underlay multicast group")?; + + new_underlay + } + }; + + // Launch DPD transaction saga for atomic dataplane configuration + let saga_params = sagas::multicast_group_dpd_ensure::Params { + serialized_authn: + nexus_db_queries::authn::saga::Serialized::for_opctx(opctx), + external_group_id: group.id(), + underlay_group_id: underlay_group.id, + }; + + debug!( + opctx.log, + "initiating DPD transaction saga for multicast forwarding configuration"; + "external_group_id" => %group.id(), + "external_multicast_ip" => %group.multicast_ip, + "underlay_group_id" => %underlay_group.id, + "underlay_multicast_ip" => %underlay_group.multicast_ip, + "vni" => ?group.vni, + "saga_type" => "multicast_group_dpd_ensure", + "dpd_operation" => "create_external_and_underlay_groups" + ); + + let dag = create_saga_dag::< + sagas::multicast_group_dpd_ensure::SagaMulticastGroupDpdEnsure, + >(saga_params) + .context("failed to create multicast group transaction saga")?; + + let saga_id = self + .sagas + .saga_start(dag) + .await + .context("failed to start multicast group transaction saga")?; + + debug!( + opctx.log, + "DPD multicast forwarding configuration saga initiated"; + "external_group_id" => %group.id(), + "underlay_group_id" => %underlay_group.id, + "saga_id" => %saga_id, + "pending_dpd_operations" => "[create_external_group, create_underlay_group, configure_nat_mapping]", + "expected_outcome" => "Creating → Active" + ); + + Ok(()) + } + + /// Process a single multicast group in "Deleting" state. + async fn process_deleting_group_inner( + &self, + opctx: &OpContext, + group: &MulticastGroup, + dataplane_client: &MulticastDataplaneClient, + ) -> Result<(), anyhow::Error> { + let tag = Self::generate_multicast_tag(group); + + debug!( + opctx.log, + "executing DPD multicast group cleanup by tag"; + "group_id" => %group.id(), + "multicast_ip" => %group.multicast_ip, + "dpd_tag" => %tag, + "cleanup_scope" => "all_switches_in_rack", + "dpd_operation" => "multicast_reset_by_tag", + "cleanup_includes" => "[external_group, underlay_group, forwarding_rules, member_ports]" + ); + + // Use dataplane client from reconciliation pass to cleanup switch(es) + // state by tag + dataplane_client + .remove_groups(&tag) + .await + .context("failed to cleanup dataplane switch configuration")?; + + // Delete underlay group record + if let Some(underlay_group_id) = group.underlay_group_id { + self.datastore + .underlay_multicast_group_delete(opctx, underlay_group_id) + .await + .context("failed to delete underlay group from database")?; + } + + // Delete of external group record + self.datastore + .multicast_group_delete( + opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + ) + .await + .context("failed to complete external group deletion")?; + + Ok(()) + } +} diff --git a/nexus/src/app/background/tasks/multicast/members.rs b/nexus/src/app/background/tasks/multicast/members.rs new file mode 100644 index 00000000000..cfa298a3f77 --- /dev/null +++ b/nexus/src/app/background/tasks/multicast/members.rs @@ -0,0 +1,2533 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Member-specific multicast reconciler functions. +//! +//! This module handles multicast group member lifecycle operations. Members +//! represent endpoints that receive multicast traffic, typically instances +//! running on compute sleds, but potentially other resource types in the +//! future. +//! +//! # RPW Member Processing Model +//! +//! Member management is more complex than group management because members have +//! dynamic lifecycle tied to instance state (start/stop/migrate) and require +//! dataplane updates. The RPW maintains eventual consistency between intended +//! membership (database) and actual forwarding (dataplane configuration). +//! +//! ## 3-State Member Lifecycle +//! +//! - **Joining**: Member created but not yet receiving traffic +//! - Created by instance lifecycle sagas (create/start) +//! - Waiting for group activation and sled assignment +//! - RPW transitions to "Joined" when ready +//! +//! - **Joined**: Member actively receiving multicast traffic +//! - Dataplane configured via DPD client(s) +//! - Instance is running and reachable on assigned sled +//! - RPW responds to sled migrations +//! +//! - **Left**: Member not receiving traffic (temporary or permanent) +//! - Instance stopping/stopped, failed, or explicitly detached +//! - time_deleted=NULL: temporary (can rejoin) +//! - time_deleted=SET: permanent deletion pending +//! +//! Migration note: migration is not treated as leaving. The reconciler removes +//! dataplane membership from the old sled and applies it on the new sled while +//! keeping the member in "Joined" (reconfigures in place). +//! +//! ## Operations Handled +//! +//! - **State transitions**: "Joining" → "Joined" → "Left" with reactivation +//! - **Dataplane updates**: Applying and removing configuration via DPD +//! client(s) on switches +//! - **Sled migration**: Detecting moves and updating dataplane configuration +//! (no transition to "Left") +//! - **Cleanup**: Removing orphaned switch state for deleted members +//! - **Extensible processing**: Support for different member types (designed for +//! future extension) +//! +//! ## Separation of Concerns: RPW +/- Sagas +//! +//! **Sagas:** +//! - Instance create/start → member "Joining" state +//! - Instance stop/delete → member "Left" state + time_deleted +//! - Sled assignment updates during instance operations +//! - Database state changes only (no switch operations) +//! +//! **RPW (background):** +//! - Determining switch ports and updating dataplane switches when members join +//! - Handling sled migrations +//! - Instance state monitoring and member state transitions +//! - Cleanup of deleted members from switch state +//! +//! # Member State Transition Matrix +//! +//! The RPW reconciler handles all possible state transitions for multicast group +//! members: +//! +//! ## Valid Instance States for Multicast +//! - **Valid**: Creating, Starting, Running, Rebooting, Migrating, Repairing +//! - **Invalid**: Stopping, Stopped, Failed, Destroyed, NotFound, Error +//! +//! ## State Transitions +//! +//! ### JOINING State Transitions +//! | # | Group State | Instance Valid | Has sled_id | Action | Next State | +//! |---|-------------|----------------|-------------|---------|------------| +//! | 1 | "Creating" | Any | Any | Wait for activation | "Joining" | +//! | 2 | "Active" | Invalid | Any | Clear sled_id → "Left" | "Left" | +//! | 3 | "Active" | Valid | No | Wait for sled assignment | "Joining" | +//! | 4 | "Active" | Valid | Yes | Add to DPD → "Joined" | "Joined" | +//! +//! ### JOINED State Transitions +//! | # | Instance Valid | Sled Changed | Has sled_id | Action | Next State | +//! |---|----------------|--------------|-------------|---------|------------| +//! | 1 | Invalid | Any | Any | Remove DPD + clear sled_id → "Left" | "Left" | +//! | 2 | Valid | Yes | Yes | Remove old + update sled_id + add new | "Joined" | +//! | 3 | Valid | No | Yes | Verify DPD config (idempotent) | "Joined" | +//! | 4 | Valid | N/A | No | Remove DPD → "Left" (edge case) | "Left" | +//! +//! ### LEFT State Transitions +//! | # | time_deleted | Instance Valid | Group State | Action | Next State | +//! |---|--------------|----------------|-------------|---------|------------| +//! | 1 | Set | Any | Any | Cleanup DPD config | NeedsCleanup | +//! | 2 | None | Invalid | Any | No action (stay stopped) | "Left" | +//! | 3 | None | Valid | "Creating" | Wait for activation | "Left" | +//! | 4 | None | Valid | "Active" | Reactivate member | "Joining" | + +use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::sync::Arc; +use std::time::SystemTime; + +use anyhow::{Context, Result}; +use futures::stream::{self, StreamExt}; +use slog::{debug, info, trace, warn}; +use uuid::Uuid; + +use nexus_db_model::{ + DbTypedUuid, MulticastGroup, MulticastGroupMember, + MulticastGroupMemberState, MulticastGroupState, Sled, +}; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::datastore::multicast::ops::member_reconcile::{ + ReconcileAction, ReconcileJoiningResult, +}; +use nexus_types::deployment::SledFilter; +use nexus_types::identity::{Asset, Resource}; +use omicron_common::api::external::{DataPageParams, InstanceState}; +use omicron_uuid_kinds::{ + GenericUuid, InstanceUuid, MulticastGroupUuid, PropolisUuid, SledKind, + SledUuid, +}; + +use super::{MulticastGroupReconciler, StateTransition, SwitchBackplanePort}; +use crate::app::multicast::dataplane::MulticastDataplaneClient; + +/// Pre-fetched instance state data for batch processing. +/// Maps instance_id -> (is_valid_for_multicast, current_sled_id). +type InstanceStateMap = HashMap)>; + +/// Backplane port mapping from DPD-client. +/// Maps switch port ID to backplane link configuration. +type BackplaneMap = + BTreeMap; + +/// Result of computing the union of member ports across a group. +/// +/// Indicates whether all "Joined" members were successfully resolved when +/// computing the port union. Callers should only prune stale ports when +/// the union is `Complete` to avoid disrupting members that failed resolution. +enum MemberPortUnion { + /// Union is complete: all "Joined" members were successfully resolved. + Complete(BTreeSet), + /// Union is partial: some "Joined" members failed to resolve. + /// The port set may be incomplete. + Partial(BTreeSet), +} + +/// Check if a DPD member is a rear/underlay port (instance member). +fn is_rear_underlay_member( + member: &dpd_client::types::MulticastGroupMember, +) -> bool { + matches!(member.port_id, dpd_client::types::PortId::Rear(_)) + && member.direction == dpd_client::types::Direction::Underlay +} + +/// Represents a sled_id update for a multicast group member. +#[derive(Debug, Clone, Copy)] +struct SledIdUpdate { + old: Option>, + new: Option>, +} + +/// Trait for processing different types of multicast group members. +trait MemberStateProcessor { + /// Process a member in "Joining" state. + async fn process_joining( + &self, + reconciler: &MulticastGroupReconciler, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_states: &InstanceStateMap, + dataplane_client: &MulticastDataplaneClient, + ) -> Result; + + /// Process a member in "Joined" state. + async fn process_joined( + &self, + reconciler: &MulticastGroupReconciler, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_states: &InstanceStateMap, + dataplane_client: &MulticastDataplaneClient, + ) -> Result; + + /// Process a member in "Left" state. + async fn process_left( + &self, + reconciler: &MulticastGroupReconciler, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_states: &InstanceStateMap, + dataplane_client: &MulticastDataplaneClient, + ) -> Result; +} + +/// Processor for instance-based multicast group members. +struct InstanceMemberProcessor; + +impl MemberStateProcessor for InstanceMemberProcessor { + async fn process_joining( + &self, + reconciler: &MulticastGroupReconciler, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_states: &InstanceStateMap, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + reconciler + .handle_instance_joining( + opctx, + group, + member, + instance_states, + dataplane_client, + ) + .await + } + + async fn process_joined( + &self, + reconciler: &MulticastGroupReconciler, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_states: &InstanceStateMap, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + reconciler + .handle_instance_joined( + opctx, + group, + member, + instance_states, + dataplane_client, + ) + .await + } + + async fn process_left( + &self, + reconciler: &MulticastGroupReconciler, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_states: &InstanceStateMap, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + reconciler + .handle_instance_left( + opctx, + group, + member, + instance_states, + dataplane_client, + ) + .await + } +} + +impl MulticastGroupReconciler { + /// Process member state changes ("Joining"→"Joined"→"Left"). + pub async fn reconcile_member_states( + &self, + opctx: &OpContext, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + trace!(opctx.log, "reconciling member state changes"); + + let mut processed = 0; + + // Get all groups that need member state processing ("Creating" and "Active") + let groups = self.get_reconcilable_groups(opctx).await?; + + for group in groups { + match self + .process_group_member_states(opctx, &group, dataplane_client) + .await + { + Ok(count) => { + processed += count; + if count > 0 { + debug!( + opctx.log, + "processed member state changes for group"; + "group" => ?group, + "members_processed" => count + ); + } + } + Err(e) => { + warn!( + opctx.log, + "failed to process member states for group"; + "group" => ?group, + "error" => %e + ); + } + } + } + + debug!( + opctx.log, + "member state reconciliation completed"; + "members_processed" => processed + ); + + Ok(processed) + } + + /// Process member state changes for a single group. + async fn process_group_member_states( + &self, + opctx: &OpContext, + group: &MulticastGroup, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + let mut processed = 0; + + // Get members in various states that need processing + let members = self.get_group_members(opctx, group.id()).await?; + + // Batch-fetch instance states for all members to avoid N+1 queries + let instance_states = + Arc::new(self.batch_fetch_instance_states(opctx, &members).await?); + + // Process members concurrently with configurable parallelism + let results = stream::iter(members) + .map(|member| { + let instance_states = Arc::clone(&instance_states); + async move { + let res = self + .process_member_state( + opctx, + group, + &member, + &instance_states, + dataplane_client, + ) + .await; + (member, res) + } + }) + .buffer_unordered(self.member_concurrency_limit) // Configurable concurrency + .collect::>() + .await; + + // Process results and update counters + for (member, result) in results { + match result { + Ok(transition) => match transition { + StateTransition::StateChanged + | StateTransition::NoChange => { + processed += 1; + debug!( + opctx.log, + "processed member state change"; + "member" => ?member, + "group" => ?group, + "transition" => ?transition + ); + } + StateTransition::NeedsCleanup => { + processed += 1; + debug!( + opctx.log, + "member marked for cleanup"; + "member" => ?member, + "group" => ?group + ); + } + }, + Err(e) => { + warn!( + opctx.log, + "failed to process member state change"; + "member" => ?member, + "group" => ?group, + "error" => %e + ); + } + } + } + + Ok(processed) + } + + /// Main dispatch function for processing member state changes. + /// + /// Routes to appropriate node based on member type. + async fn process_member_state( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_states: &InstanceStateMap, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + // For now, all members are instance-based, but this is where we'd + // dispatch to different processors for different member types + let processor = InstanceMemberProcessor; + + match member.state { + MulticastGroupMemberState::Joining => { + processor + .process_joining( + self, + opctx, + group, + member, + instance_states, + dataplane_client, + ) + .await + } + MulticastGroupMemberState::Joined => { + processor + .process_joined( + self, + opctx, + group, + member, + instance_states, + dataplane_client, + ) + .await + } + MulticastGroupMemberState::Left => { + processor + .process_left( + self, + opctx, + group, + member, + instance_states, + dataplane_client, + ) + .await + } + } + } + + /// Instance-specific handler for members in "Joining" state. + /// + /// Validates instance state and attempts to transition the member to "Joined" + /// when ready. Uses CAS operations for concurrent-safe state updates. + async fn handle_instance_joining( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_states: &InstanceStateMap, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + // Extract pre-fetched instance state + let (instance_valid, current_sled_id) = + self.get_instance_state_from_cache(instance_states, member); + + // Execute reconciliation CAS operation + let reconcile_res = self + .execute_joining_reconciliation( + opctx, + group, + member, + instance_valid, + current_sled_id, + ) + .await?; + + // Process reconciliation result + self.process_joining_reconcile_result( + opctx, + group, + member, + instance_valid, + reconcile_res, + dataplane_client, + ) + .await + } + + /// Extract instance state from pre-fetched cache. + fn get_instance_state_from_cache( + &self, + instance_states: &InstanceStateMap, + member: &MulticastGroupMember, + ) -> (bool, Option) { + instance_states.get(&member.parent_id).copied().unwrap_or((false, None)) + } + + /// Execute the reconciliation CAS operation for a member in "Joining" state. + async fn execute_joining_reconciliation( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_valid: bool, + current_sled_id: Option, + ) -> Result { + let current_sled_id_db = current_sled_id.map(|id| id.into()); + + self.datastore + .multicast_group_member_reconcile_joining( + opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(member.parent_id), + instance_valid, + current_sled_id_db, + ) + .await + .context("failed to reconcile member in 'Joining' state") + } + + /// Process the result of a "Joining" state reconciliation operation. + async fn process_joining_reconcile_result( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_valid: bool, + reconcile_result: ReconcileJoiningResult, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + match reconcile_result.action { + ReconcileAction::TransitionedToLeft => { + self.handle_transitioned_to_left(opctx, group, member).await + } + + ReconcileAction::UpdatedSledId { old, new } => { + self.handle_sled_id_updated( + opctx, + group, + member, + instance_valid, + SledIdUpdate { old, new }, + dataplane_client, + ) + .await + } + + ReconcileAction::NotFound | ReconcileAction::NoChange => { + self.handle_no_change_or_not_found( + opctx, + group, + member, + instance_valid, + dataplane_client, + ) + .await + } + } + } + + /// Handle the case where a member was transitioned to "Left" state. + async fn handle_transitioned_to_left( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + ) -> Result { + info!( + opctx.log, + "multicast member lifecycle transition: 'Joining' → 'Left'"; + "member_id" => %member.id, + "instance_id" => %member.parent_id, + "group_id" => %group.id(), + "group_name" => group.name().as_str(), + "group_multicast_ip" => %group.multicast_ip, + "reason" => "instance_not_valid_for_multicast_traffic" + ); + Ok(StateTransition::StateChanged) + } + + /// Handle the case where a member's sled_id was updated. + async fn handle_sled_id_updated( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_valid: bool, + sled_id_update: SledIdUpdate, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + debug!( + opctx.log, + "updated member sled_id, checking if ready to join"; + "member_id" => %member.id, + "old_sled_id" => ?sled_id_update.old, + "new_sled_id" => ?sled_id_update.new, + "group_state" => ?group.state, + "instance_valid" => instance_valid + ); + + self.try_complete_join_if_ready( + opctx, + group, + member, + instance_valid, + dataplane_client, + ) + .await + } + + /// Handle the case where no changes were made or member was not found. + async fn handle_no_change_or_not_found( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_valid: bool, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + // Check if member is already in Joined state + if member.state == MulticastGroupMemberState::Joined { + debug!( + opctx.log, + "member already in 'Joined' state, no action needed"; + "member_id" => %member.id, + "group_id" => %group.id(), + "group_name" => group.name().as_str() + ); + return Ok(StateTransition::NoChange); + } + + // Try to complete the join if conditions are met + self.try_complete_join_if_ready( + opctx, + group, + member, + instance_valid, + dataplane_client, + ) + .await + } + + fn is_ready_to_join( + &self, + group: &MulticastGroup, + instance_valid: bool, + ) -> bool { + group.state == MulticastGroupState::Active && instance_valid + } + + async fn try_complete_join_if_ready( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_valid: bool, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + if self.is_ready_to_join(group, instance_valid) { + self.complete_instance_member_join( + opctx, + group, + member, + dataplane_client, + ) + .await?; + Ok(StateTransition::StateChanged) + } else { + debug!( + opctx.log, + "member not ready to join: waiting for next run"; + "member_id" => %member.id, + "group_id" => %group.id(), + "group_name" => group.name().as_str(), + "instance_valid" => instance_valid, + "group_state" => ?group.state + ); + Ok(StateTransition::NoChange) + } + } + + /// Instance-specific handler for members in "Joined" state. + async fn handle_instance_joined( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_states: &InstanceStateMap, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + // Get pre-fetched instance state and sled_id + let (instance_valid, current_sled_id) = instance_states + .get(&member.parent_id) + .copied() + .unwrap_or((false, None)); + + match (instance_valid, current_sled_id) { + // Invalid instance -> remove from dataplane and transition to "Left" + (false, _) => { + self.handle_invalid_instance( + opctx, + group, + member, + dataplane_client, + ) + .await + } + + // Valid instance with sled, but sled changed (migration) + (true, Some(sled_id)) if member.sled_id != Some(sled_id.into()) => { + self.handle_sled_migration( + opctx, + group, + member, + sled_id, + dataplane_client, + ) + .await + } + + // Valid instance with sled, sled unchanged -> verify configuration + (true, Some(_)) => { + self.verify_members(opctx, group, member, dataplane_client) + .await?; + trace!( + opctx.log, + "member configuration verified, no changes needed"; + "member_id" => %member.id, + "group_id" => %group.id() + ); + Ok(StateTransition::NoChange) + } + + // Valid instance but no sled_id (shouldn't typically happen in "Joined" state) + (true, None) => { + self.handle_joined_without_sled( + opctx, + group, + member, + dataplane_client, + ) + .await + } + } + } + + /// Handle a joined member whose instance became invalid. + async fn handle_invalid_instance( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + // Remove from dataplane first + if let Err(e) = self + .remove_member_from_dataplane(opctx, member, dataplane_client) + .await + { + debug!( + opctx.log, + "failed to remove member from dataplane, will retry"; + "member_id" => %member.id, + "error" => ?e + ); + return Err(e); + } + + // Update database state (atomically set "Left" and clear `sled_id`) + let updated = self + .datastore + .multicast_group_member_to_left_if_current( + opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(member.parent_id), + MulticastGroupMemberState::Joined, + ) + .await + .context( + "failed to conditionally transition member from 'Joined' to 'Left'", + )?; + + if !updated { + debug!( + opctx.log, + "skipping 'Joined' → 'Left' transition due to concurrent update"; + "member_id" => %member.id, + "instance_id" => %member.parent_id, + "group_id" => %group.id() + ); + return Ok(StateTransition::NoChange); + } + + info!( + opctx.log, + "multicast member lifecycle transition: 'Joined' → 'Left' (instance invalid)"; + "member_id" => %member.id, + "instance_id" => %member.parent_id, + "group_id" => %group.id(), + "group_multicast_ip" => %group.multicast_ip, + "dpd_operation" => "remove_member_from_underlay_group", + "reason" => "instance_no_longer_valid_for_multicast_traffic" + ); + Ok(StateTransition::StateChanged) + } + + /// Handle sled migration for a "Joined" member. + async fn handle_sled_migration( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + new_sled_id: SledUuid, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + info!( + opctx.log, + "detected sled migration for 'Joined' member: re-applying configuration"; + "member_id" => %member.id, + "instance_id" => %member.parent_id, + "group_id" => %group.id(), + "group_name" => group.name().as_str(), + "group_multicast_ip" => %group.multicast_ip, + "old_sled_id" => ?member.sled_id, + "new_sled_id" => %new_sled_id + ); + + // Remove from old sled's dataplane first + if let Err(e) = self + .remove_member_from_dataplane(opctx, member, dataplane_client) + .await + { + debug!( + opctx.log, + "failed to remove member from old sled, will retry"; + "member_id" => %member.id, + "old_sled_id" => ?member.sled_id, + "error" => ?e + ); + return Err(e); + } + + // Update sled_id in database using CAS + let updated = self + .datastore + .multicast_group_member_update_sled_id_if_current( + opctx, + InstanceUuid::from_untyped_uuid(member.parent_id), + member.sled_id, + Some(new_sled_id.into()), + ) + .await + .context( + "failed to conditionally update member sled_id for migration", + )?; + + if !updated { + debug!( + opctx.log, + "skipping sled_id update after migration due to concurrent change"; + "member_id" => %member.id, + "group_id" => %group.id(), + "old_sled_id" => ?member.sled_id, + "new_sled_id" => %new_sled_id + ); + return Ok(StateTransition::NoChange); + } + + // Re-apply configuration on new sled + // If this fails (e.g., sled not yet in inventory), transition to "Joining" for retry + match self + .complete_instance_member_join( + opctx, + group, + member, + dataplane_client, + ) + .await + { + Ok(()) => { + info!( + opctx.log, + "member configuration re-applied after sled migration"; + "member_id" => %member.id, + "instance_id" => %member.parent_id, + "group_id" => %group.id(), + "group_name" => group.name().as_str(), + "group_multicast_ip" => %group.multicast_ip, + "new_sled_id" => %new_sled_id, + "dpd_operation" => "re_add_member_to_underlay_multicast_group" + ); + Ok(StateTransition::StateChanged) + } + Err(e) => { + // Failed to join on new sled. We transition to "Joining" and + // retry next cycle/run. + warn!( + opctx.log, + "failed to complete join on new sled after migration: transitioning to 'Joining' for retry"; + "member_id" => %member.id, + "group_id" => %group.id(), + "new_sled_id" => %new_sled_id, + "error" => %e + ); + + // TODO: Cross-validate inventory sled→port mapping via DDM + // operational state. + // + // We currently trust inventory (MGS/SP topology) for sled→port + // mapping. + // + // We could add validation using DDM on switches to confirm + // operational connectivity: + // + // Query DDM (underlay routing daemon on switches): + // - GET /peers → Map + // - **Needs API addition**: DDM's PeerInfo should include + // port/interface or similar field showing which rear port + // each underlay peer is reachable through + // - Cross-reference: Does sled's underlay address appear as + // an "Active" peer on the expected rear port? + // + // On mismatch: Could invalidate cache, transition member to + // "Left", or trigger inventory reconciliation. Prevents wasted + // retries on sleds with actual connectivity loss vs. inventory + // mismatch. + + let updated = self + .datastore + .multicast_group_member_set_state_if_current( + opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(member.parent_id), + MulticastGroupMemberState::Joined, + MulticastGroupMemberState::Joining, + ) + .await + .context( + "failed to transition member to 'Joining' after join failure", + )?; + + if updated { + info!( + opctx.log, + "member transitioned to 'Joining': will retry on next reconciliation run"; + "member_id" => %member.id, + "group_id" => %group.id(), + "new_sled_id" => %new_sled_id + ); + Ok(StateTransition::StateChanged) + } else { + // Let the next cycle handle it + Ok(StateTransition::NoChange) + } + } + } + } + + /// Handle edge case where a "Joined" member has no sled_id. + async fn handle_joined_without_sled( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + warn!( + opctx.log, + "'Joined' member has no sled_id: transitioning to 'Left'"; + "member_id" => %member.id, + "parent_id" => %member.parent_id + ); + + // Remove from dataplane and transition to "Left" + if let Err(e) = self + .remove_member_from_dataplane(opctx, member, dataplane_client) + .await + { + warn!( + opctx.log, + "failed to remove member with no sled_id from dataplane"; + "member_id" => %member.id, + "error" => ?e + ); + return Err(e); + } + + let updated = self + .datastore + .multicast_group_member_set_state_if_current( + opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(member.parent_id), + MulticastGroupMemberState::Joined, + MulticastGroupMemberState::Left, + ) + .await + .context( + "failed to conditionally transition member with no sled_id to 'Left'", + )?; + + if !updated { + debug!( + opctx.log, + "skipping 'Joined'→'Left' transition (no sled_id) due to concurrent update"; + "member_id" => %member.id, + "parent_id" => %member.parent_id, + "group_id" => %group.id() + ); + return Ok(StateTransition::NoChange); + } + + info!( + opctx.log, + "multicast member forced to 'Left' state due to missing sled_id"; + "member_id" => %member.id, + "instance_id" => %member.parent_id, + "group_id" => %group.id(), + "group_multicast_ip" => %group.multicast_ip, + "dpd_operation" => "remove_member_from_underlay_group", + "reason" => "inconsistent_state_sled_id_missing_in_joined_state" + ); + Ok(StateTransition::StateChanged) + } + + /// Instance-specific handler for members in "Left" state. + async fn handle_instance_left( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + instance_states: &InstanceStateMap, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + // Get pre-fetched instance state and sled_id + let (instance_valid, current_sled_id) = instance_states + .get(&member.parent_id) + .copied() + .unwrap_or((false, None)); + + // Handle permanent deletion first + if member.time_deleted.is_some() { + self.cleanup_deleted_member(opctx, group, member, dataplane_client) + .await?; + + return Ok(StateTransition::NeedsCleanup); + } + + // Handle reactivation: instance valid and group active -> transition to "Joining" + if instance_valid && group.state == MulticastGroupState::Active { + return self + .reactivate_left_member(opctx, group, member, current_sled_id) + .await; + } + + // Clean up DPD if needed (best-effort) + if !instance_valid && member.sled_id.is_none() { + // This handles the case where a saga transitioned to "Left" (e.g., instance stop) + // but couldn't clean DPD because it doesn't have switch access. + if let Err(e) = self + .remove_member_from_dataplane(opctx, member, dataplane_client) + .await + { + debug!( + opctx.log, + "failed to clean up stale DPD state for 'Left' member"; + "member_id" => %member.id, + "error" => ?e + ); + } + } + + // Stay in "Left" state + Ok(StateTransition::NoChange) + } + + /// Reactivate a member in "Left" state when instance becomes valid again. + /// Transitions the member back to "Joining" state so it can rejoin the group. + async fn reactivate_left_member( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + current_sled_id: Option, + ) -> Result { + debug!( + opctx.log, + "transitioning member from 'Left' to 'Joining': instance became valid and group active"; + "member_id" => %member.id, + "parent_id" => %member.parent_id, + "group_id" => %group.id(), + "group_name" => group.name().as_str() + ); + + let updated = if let Some(sled_id) = current_sled_id { + self.datastore + .multicast_group_member_left_to_joining_if_current( + opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(member.parent_id), + sled_id.into(), + ) + .await + .context( + "failed to conditionally transition member from 'Left' to 'Joining' (with sled_id)", + )? + } else { + self.datastore + .multicast_group_member_set_state_if_current( + opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(member.parent_id), + MulticastGroupMemberState::Left, + MulticastGroupMemberState::Joining, + ) + .await + .context( + "failed to conditionally transition member from 'Left' to 'Joining'", + )? + }; + + if !updated { + debug!( + opctx.log, + "skipping Left→Joining transition due to concurrent update"; + "member_id" => %member.id, + "group_id" => %group.id() + ); + return Ok(StateTransition::NoChange); + } + + info!( + opctx.log, + "member transitioned to 'Joining' state"; + "member_id" => %member.id, + "group_id" => %group.id(), + "group_name" => group.name().as_str() + ); + Ok(StateTransition::StateChanged) + } + + /// Batch-fetch instance states for multiple members to avoid N+1 queries. + /// Returns a map of instance_id -> (is_valid_for_multicast, current_sled_id). + /// + /// - Batch-fetching all instance records in one query via the datastore + /// - Batch-fetching all VMM records in one query via the datastore + /// - Building the result map from the fetched data + async fn batch_fetch_instance_states( + &self, + opctx: &OpContext, + members: &[MulticastGroupMember], + ) -> Result { + let mut state_map = HashMap::new(); + + if members.is_empty() { + return Ok(state_map); + } + + // Extract unique instance IDs + let instance_ids: Vec = members + .iter() + .map(|m| InstanceUuid::from_untyped_uuid(m.parent_id)) + .collect(); + + // Use datastore method to batch-fetch instance and VMM data + let instance_vmm_data = self + .datastore + .instance_and_vmm_batch_fetch(opctx, &instance_ids) + .await + .context("failed to batch-fetch instance and VMM data")?; + + // Build the state map from the fetched data + state_map.extend(members.iter().map(|member| { + let (is_valid, sled_id) = if let Some((instance, vmm_opt)) = + instance_vmm_data.get(&member.parent_id) + { + let is_valid = matches!( + instance.runtime_state.nexus_state.state(), + InstanceState::Creating + | InstanceState::Starting + | InstanceState::Running + | InstanceState::Rebooting + | InstanceState::Migrating + | InstanceState::Repairing + ); + + let sled_id = vmm_opt.as_ref().map(|vmm| { + SledUuid::from_untyped_uuid(vmm.sled_id.into_untyped_uuid()) + }); + + (is_valid, sled_id) + } else { + // Instance not found (mark as invalid) + (false, None) + }; + + (member.parent_id, (is_valid, sled_id)) + })); + + debug!( + opctx.log, + "batch-fetched instance states for multicast reconciliation"; + "member_count" => members.len(), + "instances_found" => instance_vmm_data.len() + ); + + Ok(state_map) + } + + /// Look up an instance's current sled_id and update the member record if + /// found. + /// + /// Returns `None` if the instance has no sled assignment or cannot be found. + async fn lookup_and_update_member_sled_id( + &self, + opctx: &OpContext, + member: &MulticastGroupMember, + ) -> Result>, anyhow::Error> { + debug!( + opctx.log, + "member has no sled_id, attempting to look up instance sled"; + "member" => ?member + ); + + let instance_id = InstanceUuid::from_untyped_uuid(member.parent_id); + + // Try to get instance state + let instance_state = match self + .datastore + .instance_get_state(opctx, &instance_id) + .await + { + Ok(Some(state)) => state, + Ok(None) => { + debug!( + opctx.log, + "instance not found, cannot complete join"; + "member" => ?member + ); + return Ok(None); + } + Err(e) => { + debug!( + opctx.log, + "failed to look up instance state"; + "member" => ?member, + "error" => ?e + ); + return Ok(None); + } + }; + + // Try to get sled_id from VMM + let current_sled_id = match instance_state.propolis_id { + Some(propolis_id) => { + match self + .datastore + .vmm_fetch( + opctx, + &PropolisUuid::from_untyped_uuid(propolis_id), + ) + .await + { + Ok(vmm) => Some(SledUuid::from_untyped_uuid( + vmm.sled_id.into_untyped_uuid(), + )), + Err(_) => None, + } + } + None => None, + }; + + match current_sled_id { + Some(sled_id) => { + debug!( + opctx.log, + "found instance sled, updating member record"; + "member" => ?member, + "sled_id" => %sled_id + ); + + // Update the member record with the correct sled_id + self.datastore + .multicast_group_member_update_sled_id( + opctx, + InstanceUuid::from_untyped_uuid(member.parent_id), + Some(sled_id.into()), + ) + .await + .context("failed to update member sled_id")?; + + Ok(Some(sled_id.into())) + } + None => { + debug!( + opctx.log, + "instance has no sled_id, cannot complete join"; + "member" => ?member + ); + Ok(None) + } + } + } + + /// Complete a member join operation ("Joining" -> "Joined") for an instance. + async fn complete_instance_member_join( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + dataplane_client: &MulticastDataplaneClient, + ) -> Result<(), anyhow::Error> { + debug!( + opctx.log, + "completing member join"; + "member" => ?member, + "group" => ?group + ); + + // Get sled_id from member record, or look it up and update if missing + let sled_id = match member.sled_id { + Some(id) => id, + None => { + match self + .lookup_and_update_member_sled_id(opctx, member) + .await? + { + Some(id) => id, + None => return Ok(()), // No sled available, cannot join + } + } + }; + + self.add_member_to_dataplane( + opctx, + group, + member, + sled_id.into(), + dataplane_client, + ) + .await?; + + // Transition to "Joined" state (only if still in "Joining") + let updated = self + .datastore + .multicast_group_member_set_state_if_current( + opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(member.parent_id), + MulticastGroupMemberState::Joining, + MulticastGroupMemberState::Joined, + ) + .await + .context( + "failed to conditionally transition member to 'Joined' state", + )?; + if !updated { + debug!( + opctx.log, + "skipping Joining→Joined transition due to concurrent update"; + "member_id" => %member.id, + "group_id" => %group.id() + ); + } + + info!( + opctx.log, + "member join completed"; + "member_id" => %member.id, + "group_id" => %group.id(), + "sled_id" => %sled_id + ); + + Ok(()) + } + + /// Apply member dataplane configuration (via DPD-client). + async fn add_member_to_dataplane( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + sled_id: SledUuid, + dataplane_client: &MulticastDataplaneClient, + ) -> Result<(), anyhow::Error> { + let underlay_group_id = group.underlay_group_id.ok_or_else(|| { + anyhow::Error::msg(format!( + "no underlay group for external group {}", + group.id() + )) + })?; + + let underlay_group = self + .datastore + .underlay_multicast_group_fetch(opctx, underlay_group_id) + .await + .context( + "failed to fetch underlay group for member configuration", + )?; + + // Resolve sled to switch port configurations + let port_configs = self + .resolve_sled_to_switch_ports(opctx, sled_id, dataplane_client) + .await + .context("failed to resolve sled to switch ports")?; + + for port_config in &port_configs { + let dataplane_member = dpd_client::types::MulticastGroupMember { + port_id: port_config.port_id.clone(), + link_id: port_config.link_id, + direction: port_config.direction, + }; + + dataplane_client + .add_member(&underlay_group, dataplane_member) + .await + .context("failed to apply member configuration via DPD")?; + + debug!( + opctx.log, + "member added to DPD"; + "member_id" => %member.id, + "sled_id" => %sled_id, + "port_id" => %port_config.port_id + ); + } + + // TODO: Add uplink (front port) members for egress traffic through to + // Dendrite. + // + // When this is the first instance joining the group, we should also add + // uplink members with `Direction::External` for multicast egress + // traffic out of the rack. + // These uplink members follow a different lifecycle: + // - Added when first instance joins (check group member count) + // - Removed when last instance leaves (would be handled in + // `remove_member_from_dataplane`) + // + // Uplink ports are probably going to be a group-level configuration + // added by external params. + + info!( + opctx.log, + "multicast member configuration applied to switch forwarding tables"; + "member_id" => %member.id, + "instance_id" => %member.parent_id, + "sled_id" => %sled_id, + "port_count" => port_configs.len(), + "dpd_operation" => "add_member_to_underlay_multicast_group" + ); + + Ok(()) + } + + /// Remove member from known port configurations. + async fn remove_from_known_ports( + &self, + opctx: &OpContext, + member: &MulticastGroupMember, + sled_id: DbTypedUuid, + port_configs: &[SwitchBackplanePort], + underlay_group: &nexus_db_model::UnderlayMulticastGroup, + dataplane_client: &MulticastDataplaneClient, + ) -> Result<(), anyhow::Error> { + // Remove member from DPD for each port on the sled + for port_config in port_configs { + let dataplane_member = dpd_client::types::MulticastGroupMember { + port_id: port_config.port_id.clone(), + link_id: port_config.link_id, + direction: port_config.direction, + }; + + dataplane_client + .remove_member(underlay_group, dataplane_member) + .await + .context("failed to remove member configuration via DPD")?; + + debug!( + opctx.log, + "member removed from DPD"; + "port_id" => %port_config.port_id, + "sled_id" => %sled_id + ); + } + + info!( + opctx.log, + "multicast member configuration removed from switch forwarding tables"; + "member_id" => %member.id, + "instance_id" => %member.parent_id, + "sled_id" => %sled_id, + "port_count" => port_configs.len(), + "dpd_operation" => "remove_member_from_underlay_multicast_group", + "reason" => "instance_state_change_or_migration" + ); + Ok(()) + } + + /// Compute union of active rear/underlay port IDs across all "Joined" + /// members in a group. Excludes a specific member ID if provided + /// (useful when removing a member). + /// + /// Returns `MemberPortUnion::Complete` if all "Joined" members were + /// successfully resolved, or `MemberPortUnion::Partial` if some members + /// failed to resolve. + async fn compute_active_member_ports( + &self, + opctx: &OpContext, + group_id: Uuid, + dataplane_client: &MulticastDataplaneClient, + exclude_member_id: Option, + ) -> Result { + let group_members = self + .get_group_members(opctx, group_id) + .await + .context("failed to fetch group members for expected port union")?; + + // Filter to joined members, excluding specified member if provided + let joined_members = group_members + .into_iter() + .filter(|mem| { + exclude_member_id + .map_or(true, |id| mem.id.into_untyped_uuid() != id) + }) + .filter(|mem| mem.state == MulticastGroupMemberState::Joined) + .collect::>(); + + // Resolve all members to ports, tracking successes and failures + let member_ports = stream::iter(joined_members) + .then(|mem| async move { + // Check for missing sled_id + let Some(mem_sled_id) = mem.sled_id else { + warn!( + opctx.log, + "joined member missing sled_id: marking union incomplete"; + "member_id" => %mem.id, + "group_id" => %group_id + ); + return None; + }; + + // Attempt to resolve sled to switch ports + match self + .resolve_sled_to_switch_ports( + opctx, + mem_sled_id.into(), + dataplane_client, + ) + .await + { + Ok(ports) => Some((mem, ports)), + Err(e) => { + warn!( + opctx.log, + "failed to resolve member ports for union computation"; + "member_id" => %mem.id, + "sled_id" => %mem_sled_id, + "error" => %e + ); + None + } + } + }) + .collect::>() + .await; + + // Separate successful resolutions from failures + let (resolved, failures): (Vec<_>, Vec<_>) = + member_ports.into_iter().partition(Option::is_some); + let resolved: Vec<_> = resolved.into_iter().flatten().collect(); + let failure_cnt = failures.len(); + + // Extract rear/underlay ports from all successfully resolved members + let active_member_ports = resolved + .into_iter() + .flat_map(|(_, ports)| ports) + .filter_map(|cfg| { + let member = dpd_client::types::MulticastGroupMember { + port_id: cfg.port_id.clone(), + link_id: cfg.link_id, + direction: cfg.direction, + }; + is_rear_underlay_member(&member).then(|| cfg.port_id) + }) + .collect::>(); + + // Return `Complete` or `Partial` based on whether all members resolved + if failure_cnt == 0 { + Ok(MemberPortUnion::Complete(active_member_ports)) + } else { + Ok(MemberPortUnion::Partial(active_member_ports)) + } + } + + /// Remove member by querying DPD directly when sled info is unavailable. + /// (Used when `sled_id` unavailable or resolution fails). + async fn remove_member_fallback( + &self, + opctx: &OpContext, + member: &MulticastGroupMember, + underlay_group: &nexus_db_model::UnderlayMulticastGroup, + dataplane_client: &MulticastDataplaneClient, + ) -> Result<(), anyhow::Error> { + // Sled resolution failed or no sled_id available (e.g., removed + // from inventory, or member.sled_id=NULL). + // + // We only remove rear/underlay ports to avoid interfering with + // other member types (i.e., uplink/external members). + info!( + opctx.log, + "using fallback path: querying DPD directly for member removal"; + "member_id" => %member.id, + "member_sled_id" => ?member.sled_id, + "reason" => "sled_id_unavailable_or_resolution_failed" + ); + + let current_members = dataplane_client + .fetch_underlay_members(underlay_group.multicast_ip.ip()) + .await + .context("failed to fetch DPD state for member removal")?; + + // Compute union of active member ports across all currently + // "Joined" members for this group. We will only remove ports that are + // not required by any active member. + // + // We exclude the current member from the union since we're removing it. + let active_member_ports = match self + .compute_active_member_ports( + opctx, + member.external_group_id, + dataplane_client, + Some(member.id.into_untyped_uuid()), + ) + .await + { + Ok(MemberPortUnion::Complete(ports)) => ports, + Ok(MemberPortUnion::Partial(_ports)) => { + // Union is partial (some members failed resolution) + // Skip pruning to avoid removing ports that may still be needed + info!( + opctx.log, + "union incomplete: skipping stale port removal to avoid disrupting unresolved members"; + "member_id" => %member.id, + "reason" => "some_joined_members_failed_port_resolution" + ); + return Ok(()); + } + Err(e) => { + // Failed to compute union (avoid removing anything) + info!( + opctx.log, + "failed to compute active member ports for fallback removal: skipping cleanup"; + "member_id" => %member.id, + "error" => %e + ); + return Ok(()); + } + }; + + if let Some(members) = current_members { + for current_member in &members { + // Only consider rear/underlay ports (instance members) + if !is_rear_underlay_member(current_member) { + continue; + } + + // Remove only if not in union of active member ports + if !active_member_ports.contains(¤t_member.port_id) { + dataplane_client + .remove_member(underlay_group, current_member.clone()) + .await + .context( + "failed to remove member from DPD (fallback)", + )?; + + info!( + opctx.log, + "removed stale rear/underlay member via fallback"; + "member_id" => %member.id, + "port_id" => %current_member.port_id + ); + } + } + } + Ok(()) + } + + /// Remove member dataplane configuration (via DPD-client). + async fn remove_member_from_dataplane( + &self, + opctx: &OpContext, + member: &MulticastGroupMember, + dataplane_client: &MulticastDataplaneClient, + ) -> Result<(), anyhow::Error> { + let group = self + .datastore + .multicast_group_fetch( + opctx, + MulticastGroupUuid::from_untyped_uuid(member.external_group_id), + ) + .await + .context("failed to fetch group for member removal")?; + + let underlay_group_id = group.underlay_group_id.ok_or_else(|| { + anyhow::Error::msg(format!( + "no underlay group for external group {}", + member.external_group_id + )) + })?; + + let underlay_group = self + .datastore + .underlay_multicast_group_fetch(opctx, underlay_group_id) + .await + .context("failed to fetch underlay group for member removal")?; + + // Try to remove via known ports if we have a `sled_id` and can resolve it + if let Some(sled_id) = member.sled_id { + if let Ok(port_configs) = self + .resolve_sled_to_switch_ports( + opctx, + sled_id.into(), + dataplane_client, + ) + .await + { + self.remove_from_known_ports( + opctx, + member, + sled_id, + &port_configs, + &underlay_group, + dataplane_client, + ) + .await?; + return Ok(()); + } + } + + // Fallback: query DPD directly when `sled_id` unavailable or + // resolution fails + self.remove_member_fallback( + opctx, + member, + &underlay_group, + dataplane_client, + ) + .await?; + + Ok(()) + } + + /// Clean up member dataplane configuration with strict error handling. + /// Ensures dataplane consistency by failing if removal operations fail. + async fn cleanup_member_from_dataplane( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + dataplane_client: &MulticastDataplaneClient, + ) -> Result<(), anyhow::Error> { + debug!( + opctx.log, + "cleaning up member from dataplane"; + "member_id" => %member.id, + "group_id" => %group.id(), + "group_name" => group.name().as_str(), + "parent_id" => %member.parent_id, + "time_deleted" => ?member.time_deleted + ); + + // Strict removal from dataplane (fail on errors) + self.remove_member_from_dataplane(opctx, member, dataplane_client) + .await + .context( + "failed to remove member configuration via DPD during cleanup", + )?; + + info!( + opctx.log, + "member cleaned up from dataplane"; + "member_id" => %member.id, + "group_id" => %group.id(), + "group_name" => group.name().as_str() + ); + Ok(()) + } + + /// Verify that a "Joined" member is consistent with dataplane configuration. + /// + /// This function ensures the member is on the correct switch ports by: + /// - Fetching current DPD state to see what ports the member is actually on + /// - Computing expected ports from a refreshed cache + /// - Removing the member from any unexpected/stale rear ports + /// - Adding the member to expected ports + /// + /// This handles cases like `sp_slot` changes where the sled's physical + /// location changed but the `sled_id` stayed the same. + async fn verify_members( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + dataplane_client: &MulticastDataplaneClient, + ) -> Result<(), anyhow::Error> { + debug!( + opctx.log, + "verifying joined member consistency"; + "member_id" => %member.id, + "group_id" => %group.id(), + "group_name" => group.name().as_str() + ); + + // Get sled_id from member + let sled_id = match member.sled_id { + Some(id) => id, + None => { + debug!(opctx.log, + "member has no sled_id, skipping verification"; + "member_id" => %member.id + ); + return Ok(()); + } + }; + + // Get underlay group + let underlay_group_id = group.underlay_group_id.ok_or_else(|| { + anyhow::Error::msg(format!( + "no underlay group for external group {}", + group.id() + )) + })?; + + let underlay_group = self + .datastore + .underlay_multicast_group_fetch(opctx, underlay_group_id) + .await + .context("failed to fetch underlay group")?; + + // Resolve expected member configurations (may refresh cache if TTL expired) + let expected_port_configs = match self + .resolve_sled_to_switch_ports( + opctx, + sled_id.into(), + dataplane_client, + ) + .await + { + Ok(configs) => configs, + Err(e) => { + // If we can't resolve the sled anymore (e.g., removed from inventory), + // remove from dataplane and transition to "Left" + warn!( + opctx.log, + "failed to resolve sled to switch ports: removing from dataplane"; + "member_id" => %member.id, + "sled_id" => %sled_id, + "error" => %e + ); + + // Best effort removal on verification + let _ = self + .remove_member_from_dataplane( + opctx, + member, + dataplane_client, + ) + .await; + + let updated = self + .datastore + .multicast_group_member_to_left_if_current( + opctx, + MulticastGroupUuid::from_untyped_uuid(group.id()), + InstanceUuid::from_untyped_uuid(member.parent_id), + MulticastGroupMemberState::Joined, + ) + .await + .context("failed to transition member to 'Left' after port resolution failure")?; + + if updated { + info!( + opctx.log, + "member transitioned to 'Left': sled no longer resolvable"; + "member_id" => %member.id, + "group_id" => %group.id() + ); + } + return Ok(()); + } + }; + + // Fetch current DPD state to identify stale ports + // We fetch from one switch since all should be consistent + let current_dpd_members = dataplane_client + .fetch_underlay_members(underlay_group.multicast_ip.ip()) + .await + .context( + "failed to fetch current underlay group members from DPD", + )?; + + // Build union of active member ports across all currently + // joined members for this group. This avoids removing ports needed by + // other members while verifying a single member. + let active_member_ports = match self + .compute_active_member_ports( + opctx, + group.id(), + dataplane_client, + None, // Don't exclude any member + ) + .await + { + Ok(MemberPortUnion::Complete(ports)) => Some(ports), + Ok(MemberPortUnion::Partial(_ports)) => { + // Union is partial (skip stale port removal) + info!( + opctx.log, + "union incomplete: skipping stale port removal to avoid disrupting unresolved members"; + "member_id" => %member.id, + "group_id" => %group.id(), + "reason" => "some_joined_members_failed_port_resolution" + ); + None + } + Err(e) => { + // Failed to compute union (skip stale port removal) + info!( + opctx.log, + "failed to compute active member ports for verification: skipping stale port removal"; + "member_id" => %member.id, + "group_id" => %group.id(), + "error" => %e + ); + None + } + }; + + // Only prune stale ports if we successfully resolved All "Joined" members. + // If we could not compute active member ports or if some members failed + // to resolve, avoid removing anything to prevent disrupting other members. + // We'll still proceed to ensure adding expected ports for this member. + let mut stale_ports = Vec::new(); + if let Some(ref active_ports) = active_member_ports { + if let Some(current_members) = ¤t_dpd_members { + for current_member in current_members { + // Only consider rear ports with underlay direction + if !is_rear_underlay_member(current_member) { + continue; + } + + // If this port is not in our active member set, it's stale + if !active_ports.contains(¤t_member.port_id) { + stale_ports.push(current_member.clone()); + } + } + } + } + + // Remove stale ports first + if !stale_ports.is_empty() { + info!( + opctx.log, + "detected member on stale ports: removing before verifying expected ports"; + "member_id" => %member.id, + "sled_id" => %sled_id, + "group_id" => %group.id(), + "stale_port_count" => stale_ports.len(), + "reason" => "sled_physical_location_changed_or_cache_refresh" + ); + + for stale_member in &stale_ports { + match dataplane_client + .remove_member(&underlay_group, stale_member.clone()) + .await + { + Ok(()) => { + debug!( + opctx.log, + "removed member from stale port"; + "member_id" => %member.id, + "old_port_id" => %stale_member.port_id, + "sled_id" => %sled_id + ); + } + Err(e) => { + // Continue as the port might have been removed already + warn!( + opctx.log, + "failed to remove member from stale port (may already be gone)"; + "member_id" => %member.id, + "port_id" => %stale_member.port_id, + "error" => %e + ); + } + } + } + } + + // Add member to all expected ports + for port_config in &expected_port_configs { + let expected_member = dpd_client::types::MulticastGroupMember { + port_id: port_config.port_id.clone(), + link_id: port_config.link_id, + direction: port_config.direction, + }; + + match dataplane_client + .add_member(&underlay_group, expected_member) + .await + { + Ok(()) => { + debug!( + opctx.log, + "member verified/added to expected port"; + "member_id" => %member.id, + "sled_id" => %sled_id, + "port_id" => %port_config.port_id + ); + } + Err(e) => { + // Log as warning since we expect this to succeed + warn!( + opctx.log, + "failed to add member to expected port"; + "member_id" => %member.id, + "port_id" => %port_config.port_id, + "error" => %e + ); + return Err(e.into()); + } + } + } + + info!( + opctx.log, + "member verification completed"; + "member_id" => %member.id, + "sled_id" => %sled_id, + "expected_port_count" => expected_port_configs.len(), + "stale_ports_removed" => stale_ports.len() + ); + + Ok(()) + } + + /// Cleanup members that are "Left" and time_deleted. + /// This permanently removes member records that are no longer needed. + pub async fn cleanup_deleted_members( + &self, + opctx: &OpContext, + ) -> Result { + trace!(opctx.log, "cleaning up deleted multicast members"); + + let deleted_count = self + .datastore + .multicast_group_members_complete_delete(opctx) + .await + .context("failed to cleanup deleted members")?; + + if deleted_count > 0 { + info!( + opctx.log, + "cleaned up deleted multicast members"; + "members_deleted" => deleted_count + ); + } + + Ok(deleted_count) + } + + /// Get all members for a group. + async fn get_group_members( + &self, + opctx: &OpContext, + group_id: Uuid, + ) -> Result, anyhow::Error> { + self.datastore + .multicast_group_members_list_by_id( + opctx, + MulticastGroupUuid::from_untyped_uuid(group_id), + &DataPageParams::max_page(), + ) + .await + .context("failed to list group members") + } + + /// Check cache for a sled mapping. + async fn check_sled_cache( + &self, + cache_key: SledUuid, + ) -> Option> { + let cache = self.sled_mapping_cache.read().await; + let (cached_at, mappings) = &*cache; + + // If we can't determine elapsed time, consider cache expired + let elapsed = match cached_at.elapsed() { + Ok(duration) => duration, + Err(_) => return None, + }; + + if elapsed < self.sled_cache_ttl { + mappings.get(&cache_key).cloned() + } else { + None + } + } + + /// Detect backplane topology change and invalidate sled cache if needed. + /// + /// Compares the full (PortId, BackplaneLink) pairs to detect changes in: + /// - Port count (sleds added/removed) + /// - Port IDs (different physical slots) + /// - Link attributes (speed, lanes, connector type changes) + async fn handle_backplane_topology_change( + &self, + opctx: &OpContext, + previous_map: &Option, + new_map: &BackplaneMap, + ) { + if let Some(prev_map) = previous_map { + // Compare full maps (keys + values) to detect any topology changes + if prev_map != new_map { + info!( + opctx.log, + "backplane map topology change detected"; + "previous_port_count" => prev_map.len(), + "new_port_count" => new_map.len() + ); + info!( + opctx.log, + "invalidating sled mapping cache due to backplane topology change" + ); + self.invalidate_sled_mapping_cache().await; + } + } + } + + /// Fetch the backplane map from DPD-client with caching. + /// + /// The client responds with the entire mapping of all cubbies in a rack. + /// + /// The backplane map should remain consistent same across all switches, + /// so we query one switch and cache the result. + async fn fetch_backplane_map( + &self, + opctx: &OpContext, + dataplane_client: &MulticastDataplaneClient, + ) -> Result { + // Check cache first + let previous_map = { + let cache = self.backplane_map_cache.read().await; + if let Some((cached_at, ref map)) = *cache { + // If we can't determine elapsed time, consider cache expired + let elapsed = match cached_at.elapsed() { + Ok(duration) => duration, + Err(_) => { + // If errored, we consider cache expired and return + // previous map for comparison + return Ok(map.clone()); + } + }; + + if elapsed < self.backplane_cache_ttl { + trace!( + opctx.log, + "backplane map cache hit"; + "port_count" => map.len() + ); + return Ok(map.clone()); + } + // Cache expired but keep reference to previous map for comparison + Some(map.clone()) + } else { + None + } + }; + + // Fetch from DPD via dataplane client on cache miss + debug!( + opctx.log, + "fetching backplane map from DPD (cache miss or stale)" + ); + + let backplane_map = + dataplane_client.fetch_backplane_map().await.context( + "failed to query backplane_map from DPD via dataplane client", + )?; + + // Detect topology change and invalidate sled cache if needed + self.handle_backplane_topology_change( + opctx, + &previous_map, + &backplane_map, + ) + .await; + + info!( + opctx.log, + "fetched backplane map from DPD"; + "port_count" => backplane_map.len() + ); + + // Update cache + let mut cache = self.backplane_map_cache.write().await; + *cache = Some((SystemTime::now(), backplane_map.clone())); + + Ok(backplane_map) + } + + /// Resolve a sled ID to switch ports for multicast traffic. + pub async fn resolve_sled_to_switch_ports( + &self, + opctx: &OpContext, + sled_id: SledUuid, + dataplane_client: &MulticastDataplaneClient, + ) -> Result, anyhow::Error> { + // Check cache first + if let Some(port_configs) = self.check_sled_cache(sled_id).await { + return Ok(port_configs); + } + + // Refresh cache if stale or missing entry + if let Err(e) = + self.refresh_sled_mapping_cache(opctx, dataplane_client).await + { + warn!( + opctx.log, + "failed to refresh sled mapping cache, using stale data"; + "sled_id" => %sled_id, + "error" => %e + ); + // Try cache again even with stale data + if let Some(port_configs) = self.check_sled_cache(sled_id).await { + return Ok(port_configs); + } + // If cache refresh failed and no stale data, propagate error + return Err(e.context("failed to refresh sled mapping cache and no cached data available")); + } + + // Try cache again after successful refresh + if let Some(port_configs) = self.check_sled_cache(sled_id).await { + return Ok(port_configs); + } + + // Sled not found after successful cache refresh. We treat this as an error + // so callers can surface this condition rather than silently applying + // no changes. + Err(anyhow::Error::msg(format!( + "failed to resolve sled to switch ports: \ + sled {sled_id} not found in mapping cache (not a scrimlet or removed)" + ))) + } + + /// Find SP in inventory for a given sled's baseboard. + /// Tries exact match (serial + part), then falls back to serial-only. + fn find_sp_for_sled<'a>( + &self, + inventory: &'a nexus_types::inventory::Collection, + sled: &Sled, + ) -> Option<&'a nexus_types::inventory::ServiceProcessor> { + // Try exact match first (serial + part) + if let Some((_, sp)) = inventory.sps.iter().find(|(bb, _)| { + bb.serial_number == sled.serial_number() + && bb.part_number == sled.part_number() + }) { + return Some(sp); + } + + // Fall back to serial-only match + inventory + .sps + .iter() + .find(|(bb, _)| bb.serial_number == sled.serial_number()) + .map(|(_, sp)| sp) + } + + /// Map a single sled to switch port(s), validating against backplane map. + /// Returns Ok(Some(ports)) on success, Ok(None) if validation failed. + fn map_sled_to_ports( + &self, + opctx: &OpContext, + sled: &Sled, + sp_slot: u32, + backplane_map: &BackplaneMap, + ) -> Result>, anyhow::Error> { + let port_id = dpd_client::types::PortId::Rear( + dpd_client::types::Rear::try_from(format!("rear{sp_slot}")) + .context("invalid rear port number")?, + ); + + // Validate against hardware backplane map + if !backplane_map.contains_key(&port_id) { + warn!( + opctx.log, + "sled sp_slot validation failed (not in hardware backplane map)"; + "sled_id" => %sled.id(), + "sp_slot" => sp_slot, + "expected_port" => %format!("rear{}", sp_slot), + "reason" => "inventory_sp_slot_out_of_range_for_platform", + "action" => "skipped_sled_in_mapping_cache" + ); + return Ok(None); + } + + debug!( + opctx.log, + "mapped sled to rear port via inventory"; + "sled_id" => %sled.id(), + "sp_slot" => sp_slot, + "rear_port" => %format!("rear{}", sp_slot) + ); + + Ok(Some(vec![SwitchBackplanePort { + port_id, + link_id: dpd_client::types::LinkId(0), + direction: dpd_client::types::Direction::Underlay, + }])) + } + + /// Build sled-to-port mappings for all sleds using inventory and backplane data. + /// Returns (mappings, validation_failures). + fn build_sled_mappings( + &self, + opctx: &OpContext, + sleds: &[Sled], + inventory: &nexus_types::inventory::Collection, + backplane_map: &BackplaneMap, + ) -> Result< + (HashMap>, usize), + anyhow::Error, + > { + sleds.iter().try_fold( + (HashMap::new(), 0), + |(mut mappings, mut validation_failures), sled| { + let Some(sp) = self.find_sp_for_sled(inventory, sled) else { + debug!( + opctx.log, + "no SP data found for sled in current inventory collection"; + "sled_id" => %sled.id(), + "serial_number" => sled.serial_number(), + "part_number" => sled.part_number() + ); + return Ok((mappings, validation_failures)); + }; + + match self.map_sled_to_ports( + opctx, + sled, + sp.sp_slot.into(), + backplane_map, + )? { + Some(ports) => { + mappings.insert(sled.id(), ports); + } + None => { + validation_failures += 1; + } + } + + Ok((mappings, validation_failures)) + }, + ) + } + + /// Refresh the sled-to-switch-port mapping cache using inventory data. + /// + /// Maps each sled to its physical rear (backplane) port on the switch by: + /// 1. Getting sled's baseboard serial/part from the sled record + /// 2. Looking up the service processor (SP) in inventory for that baseboard + /// (SP information is collected from MGS by the inventory collector) + /// 3. Using `sp.sp_slot` (cubby number) to determine the rear port identifier + /// 4. Creating `PortId::Rear(RearPort::try_from(format!("rear{sp_slot}")))` + /// + /// On the Dendrite side (switch's DPD daemon), a similar mapping is performed: + /// + /// ```rust,ignore + /// // From dendrite/dpd/src/port_map.rs rev_ab_port_map() + /// for entry in SIDECAR_REV_AB_BACKPLANE_MAP.iter() { + /// let port = PortId::Rear(RearPort::try_from(entry.cubby).unwrap()); + /// inner.insert(port, Connector::QSFP(entry.tofino_connector.into())); + /// } + /// ``` + /// + /// Where `entry.cubby` is the physical cubby/slot number (same as our `sp_slot`), + /// and this maps it to a `PortId::Rear` that DPD can program on the Tofino ASIC. + async fn refresh_sled_mapping_cache( + &self, + opctx: &OpContext, + dataplane_client: &MulticastDataplaneClient, + ) -> Result<(), anyhow::Error> { + // Fetch required data + let inventory = self + .datastore + .inventory_get_latest_collection(opctx) + .await + .context("failed to get latest inventory collection")? + .ok_or_else(|| { + anyhow::Error::msg("no inventory collection available") + })?; + + // First attempt with current backplane map + let mut backplane_map = + self.fetch_backplane_map(opctx, dataplane_client).await?; + + let sleds = self + .datastore + .sled_list_all_batched(opctx, SledFilter::InService) + .await + .context("failed to list in-service sleds for inventory mapping")?; + + // Build sled → port mappings + let (mut mappings, mut validation_failures) = self + .build_sled_mappings(opctx, &sleds, &inventory, &backplane_map)?; + + // If we had validation failures, invalidate backplane cache and retry once + if validation_failures > 0 { + info!( + opctx.log, + "sled validation failures detected: invalidating backplane cache and retrying"; + "validation_failures" => validation_failures + ); + + // Invalidate the backplane cache + self.invalidate_backplane_cache().await; + + // Fetch fresh backplane map + backplane_map = self + .fetch_backplane_map(opctx, dataplane_client) + .await + .context( + "failed to fetch fresh backplane map after invalidation", + )?; + + // Retry mapping with fresh backplane data + (mappings, validation_failures) = self.build_sled_mappings( + opctx, + &sleds, + &inventory, + &backplane_map, + )?; + + // Log sleds that still fail with fresh backplane data + if validation_failures > 0 { + warn!( + opctx.log, + "some sleds still fail validation with fresh backplane map"; + "validation_failures" => validation_failures + ); + } + } + + // Update cache + let sled_count = mappings.len(); + let mut cache = self.sled_mapping_cache.write().await; + *cache = (SystemTime::now(), mappings); + + // Log results + if validation_failures > 0 { + warn!( + opctx.log, + "sled mapping cache refreshed with validation failures"; + "total_sleds" => sleds.len(), + "mapped_sleds" => sled_count, + "validation_failures" => validation_failures + ); + } else { + info!( + opctx.log, + "sled mapping cache refreshed successfully"; + "total_sleds" => sleds.len(), + "mapped_sleds" => sled_count + ); + } + + Ok(()) + } + + /// Cleanup a member that is marked for deletion (time_deleted set). + async fn cleanup_deleted_member( + &self, + opctx: &OpContext, + group: &MulticastGroup, + member: &MulticastGroupMember, + dataplane_client: &MulticastDataplaneClient, + ) -> Result<(), anyhow::Error> { + // Use the consolidated cleanup helper with strict error handling + self.cleanup_member_from_dataplane( + opctx, + group, + member, + dataplane_client, + ) + .await + } + + /// Get all multicast groups that need member reconciliation. + /// Returns both "Creating" and "Active" groups. + async fn get_reconcilable_groups( + &self, + opctx: &OpContext, + ) -> Result, anyhow::Error> { + // For now, we still make two queries but this is where we'd add + // a single combined query method if/when the datastore supports it + let mut groups = self + .datastore + .multicast_groups_list_by_state( + opctx, + MulticastGroupState::Creating, + &DataPageParams::max_page(), + ) + .await + .context("failed to list 'Creating' multicast groups")?; + + let active_groups = self + .datastore + .multicast_groups_list_by_state( + opctx, + MulticastGroupState::Active, + &DataPageParams::max_page(), + ) + .await + .context("failed to list 'Active' multicast groups")?; + + groups.extend(active_groups); + + debug!( + opctx.log, + "found groups for member reconciliation"; + "total_groups" => groups.len() + ); + + Ok(groups) + } +} diff --git a/nexus/src/app/background/tasks/multicast/mod.rs b/nexus/src/app/background/tasks/multicast/mod.rs new file mode 100644 index 00000000000..a8a26cac7a7 --- /dev/null +++ b/nexus/src/app/background/tasks/multicast/mod.rs @@ -0,0 +1,911 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for reconciling multicast group state with dendrite switch +//! configuration. +//! +//! # Reliable Persistent Workflow (RPW) +//! +//! This module implements the RPW pattern for multicast groups. It ensures +//! eventual consistency between database state and the physical network +//! switches (Dendrite). Sagas handle immediate transactional operations; +//! RPW handles ongoing background reconciliation. +//! +//! ## Distributed State Convergence +//! +//! Multicast converges state across several distributed components: +//! - Database state (groups, members, routing configuration) +//! - Dataplane state (match-action tables via Dendrite/DPD) +//! - Instance lifecycle (start/stop/migrate affecting group membership) +//! - Network topology (sled-to-switch mappings, port configurations) +//! +//! ## Architecture: RPW +/- Sagas +//! +//! **Sagas handle immediate operations:** +//! - User API requests (create/delete groups) +//! - Instance lifecycle events (start/stop) +//! - Database state transitions +//! - Initial validation and resource allocation +//! +//! **RPW handles background reconciliation:** +//! - Dataplane state convergence +//! - Group and Member state checks and transitions ("Joining" → "Joined" → "Left") +//! - Drift detection and correction +//! - Cleanup of orphaned resources +//! +//! ## Multicast Group Architecture +//! +//! ### External vs Underlay Groups +//! +//! The multicast implementation uses a bifurcated design with paired groups: +//! +//! **External Groups** (customer-facing): +//! - IPv4/IPv6 addresses allocated from IP pools +//! - Exposed via operator APIs and network interfaces +//! - Subject to VPC routing and firewall policies +//! +//! **Underlay Groups** (admin-scoped IPv6): +//! - IPv6 multicast scope per RFC 7346; admin-local is ff04::/16 +//! +//! - Internal rack forwarding to guest instances +//! - Mapped 1:1 with external groups via deterministic mapping +//! +//! ### Forwarding Architecture (Incoming multicast traffic to guests) +//! +//! Traffic flow for multicast into the rack and to guest instances: +//! `External Network → Switch ASIC → Underlay Group → OPTE (decap) → Instance` +//! +//! 1. **External traffic** arrives into the rack on an external multicast address +//! 2. **Switch ASIC translation** performs NAT/encapsulation from external to underlay multicast +//! 3. **Underlay forwarding** via DPD-programmed P4 tables across switch fabric +//! 4. **OPTE decapsulation** removes Geneve/IPv6/Ethernet outer headers on target sleds +//! 5. **Instance delivery** of inner (guest-facing) packet to guest +//! +//! TODO: Other traffic flows like egress from instances will be documented separately +//! +//! ## Reconciliation Components +//! +//! The reconciler handles: +//! - **Group lifecycle**: "Creating" → "Active" → "Deleting" → hard-deleted +//! - **Member lifecycle**: "Joining" → "Joined" → "Left" → soft-deleted → hard-deleted +//! - **Dataplane updates**: DPD API calls for P4 table updates +//! - **Topology mapping**: Sled-to-switch-port resolution (with caching) +//! +//! ## Deletion Semantics: Groups vs Members +//! +//! **Groups** use state machine deletion: +//! - User deletes group → state="Deleting" (no `time_deleted` set yet) +//! - RPW cleans up switch config and associated resources +//! - RPW hard-deletes the row (uses `diesel::delete`) +//! - Note: `deallocate_external_multicast_group` (IP pool deallocation) sets +//! `time_deleted` directly, but this is separate from user-initiated deletion +//! +//! **Members** use dual-purpose "Left" state with soft-delete: +//! - Instance stopped: state="Left", time_deleted=NULL +//! - Can rejoin when instance starts +//! - RPW can transition back to "Joining" when instance becomes valid +//! - Instance deleted: state="Left", time_deleted=SET (permanent soft-delete) +//! - Cannot be reactivated (new attach creates new member record) +//! - RPW removes DPD configuration +//! - Cleanup task eventually hard-deletes the row + +use std::collections::{BTreeMap, HashMap}; +use std::net::{IpAddr, Ipv6Addr}; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::time::{Duration, SystemTime}; + +use anyhow::Result; +use futures::FutureExt; +use futures::future::BoxFuture; +use internal_dns_resolver::Resolver; +use ipnet::Ipv6Net; +use serde_json::json; +use slog::{error, info}; +use tokio::sync::RwLock; + +use nexus_config::DEFAULT_UNDERLAY_MULTICAST_NET; +use nexus_db_model::MulticastGroup; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::identity::Resource; +use nexus_types::internal_api::background::MulticastGroupReconcilerStatus; +use omicron_uuid_kinds::SledUuid; + +use crate::app::background::BackgroundTask; +use crate::app::multicast::dataplane::MulticastDataplaneClient; +use crate::app::saga::StartSaga; + +pub(crate) mod groups; +pub(crate) mod members; + +/// Type alias for the sled mapping cache. +type SledMappingCache = + Arc>)>>; + +/// Type alias for the backplane map cache. +type BackplaneMapCache = Arc< + RwLock< + Option<( + SystemTime, + BTreeMap< + dpd_client::types::PortId, + dpd_client::types::BackplaneLink, + >, + )>, + >, +>; + +/// Result of processing a state transition for multicast entities. +#[derive(Debug)] +pub(crate) enum StateTransition { + /// No state change needed. + NoChange, + /// State changed successfully. + StateChanged, + /// Entity needs cleanup/removal. + NeedsCleanup, +} + +/// Switch port configuration for multicast group members. +#[derive(Clone, Debug)] +pub(crate) struct SwitchBackplanePort { + /// Switch port ID + pub port_id: dpd_client::types::PortId, + /// Switch link ID + pub link_id: dpd_client::types::LinkId, + /// Direction for multicast traffic (External or Underlay) + pub direction: dpd_client::types::Direction, +} + +/// Background task that reconciles multicast group state with dendrite +/// configuration using the Saga + RPW hybrid pattern. +pub(crate) struct MulticastGroupReconciler { + datastore: Arc, + resolver: Resolver, + sagas: Arc, + underlay_admin_prefix: Ipv6Net, + /// Cache for sled-to-backplane-port mappings. + /// Maps sled_id → rear backplane ports for multicast traffic routing. + sled_mapping_cache: SledMappingCache, + sled_cache_ttl: Duration, + /// Cache for backplane hardware topology from DPD. + /// Maps PortId → BackplaneLink for platform-specific port validation. + backplane_map_cache: BackplaneMapCache, + backplane_cache_ttl: Duration, + /// Maximum number of members to process concurrently per group. + member_concurrency_limit: usize, + /// Maximum number of groups to process concurrently. + group_concurrency_limit: usize, + /// Whether multicast functionality is enabled (or not). + enabled: bool, + /// Flag to signal cache invalidation on next activation. + /// + /// Set to `true` when topology changes occur (sled add/remove, inventory updates). + /// Checked and cleared at the start of each reconciliation pass. + invalidate_cache_on_next_run: Arc, +} + +impl MulticastGroupReconciler { + pub(crate) fn new( + datastore: Arc, + resolver: Resolver, + sagas: Arc, + enabled: bool, + sled_cache_ttl: Duration, + backplane_cache_ttl: Duration, + invalidate_cache_flag: Arc, + ) -> Self { + // Use the configured underlay admin-local prefix + let underlay_admin_prefix: Ipv6Net = DEFAULT_UNDERLAY_MULTICAST_NET + .to_string() + .parse() + .expect("DEFAULT_UNDERLAY_MULTICAST_NET must be valid Ipv6Net"); + + Self { + datastore, + resolver, + sagas, + underlay_admin_prefix, + sled_mapping_cache: Arc::new(RwLock::new(( + SystemTime::now(), + HashMap::new(), + ))), + sled_cache_ttl, + backplane_map_cache: Arc::new(RwLock::new(None)), + backplane_cache_ttl, + member_concurrency_limit: 100, + group_concurrency_limit: 100, + enabled, + invalidate_cache_on_next_run: invalidate_cache_flag, + } + } + + /// Generate tag for multicast groups. + /// + /// Both external and underlay groups use the same tag (the group name). + /// This pairs them logically for management and cleanup operations. + pub(crate) fn generate_multicast_tag(group: &MulticastGroup) -> String { + group.name().to_string() + } + + /// Generate admin-scoped IPv6 multicast address from an external multicast + /// address. + /// + /// Maps external addresses into the configured underlay admin-local prefix + /// (DEFAULT_UNDERLAY_MULTICAST_NET) using bitmask mapping. Preserves the + /// lower `128 - prefix_len` bits from the external address (the group ID) + /// and sets the high bits from the prefix. + /// + /// Admin-local scope (ff04::/16) is defined in RFC 7346. + /// See: + pub(crate) fn map_external_to_underlay_ip( + &self, + external_ip: IpAddr, + ) -> Result { + map_external_to_underlay_ip_impl( + self.underlay_admin_prefix, + external_ip, + ) + } + + /// Invalidate the backplane map cache, forcing refresh on next access. + /// + /// Called when: + /// - Sled validation fails (sp_slot not in cached backplane map) + /// - Need to refresh topology data after detecting potential changes + pub(crate) async fn invalidate_backplane_cache(&self) { + let mut cache = self.backplane_map_cache.write().await; + *cache = None; // Clear the cache entirely + } + + /// Invalidate the sled mapping cache, forcing refresh on next access. + /// + /// Called when: + /// - Backplane topology changes detected (different port count/layout) + /// - Need to re-validate sled mappings against new topology + pub(crate) async fn invalidate_sled_mapping_cache(&self) { + let mut cache = self.sled_mapping_cache.write().await; + // Set timestamp to epoch to force refresh + *cache = (SystemTime::UNIX_EPOCH, cache.1.clone()); + } +} + +/// Pure function implementation of external-to-underlay IP mapping. +/// This can be tested independently without requiring a full reconciler instance. +fn map_external_to_underlay_ip_impl( + underlay_admin_prefix: Ipv6Net, + external_ip: IpAddr, +) -> Result { + // Compute base (prefix network) and host mask + let base = underlay_admin_prefix.network(); + let prefix_len = underlay_admin_prefix.prefix_len(); + let host_bits = 128u32.saturating_sub(u32::from(prefix_len)); + let base_u128 = u128::from_be_bytes(base.octets()); + let mask: u128 = if host_bits == 128 { + u128::MAX + } else if host_bits == 0 { + 0 + } else { + (1u128 << host_bits) - 1 + }; + + // Derive a value to fit in the available host bits + let host_value: u128 = match external_ip { + IpAddr::V4(ipv4) => { + // IPv4 addresses need at least 32 host bits to preserve full address + // (IPv4 multicast validation happens at IP pool allocation time) + if host_bits < 32 { + return Err(anyhow::Error::msg(format!( + "Prefix {underlay_admin_prefix} has only {host_bits} host \ + bits, but IPv4 requires at least 32 bits" + ))); + } + u128::from(u32::from_be_bytes(ipv4.octets())) + } + IpAddr::V6(ipv6) => { + // IPv6 multicast validation (including ff01::/ff02:: exclusions) + // happens at IP pool allocation time + let full_addr = u128::from_be_bytes(ipv6.octets()); + + // XOR-fold the full 128-bit address into the available host bits + // to avoid collisions. This ensures different external addresses + // (even with identical lower bits but different scopes) map to + // different underlay addresses. + if host_bits < 128 { + // Split into chunks and XOR them together + let mut result = 0u128; + let mut remaining = full_addr; + while remaining != 0 { + result ^= remaining & mask; + remaining >>= host_bits; + } + result + } else { + // host_bits >= 128: use full address as-is + full_addr + } + } + }; + + // Combine base network + computed host value + let underlay_u128 = (base_u128 & !mask) | (host_value & mask); + let underlay_ipv6 = Ipv6Addr::from(underlay_u128.to_be_bytes()); + + // Validate bounds + if !underlay_admin_prefix.contains(&underlay_ipv6) { + return Err(anyhow::Error::msg(format!( + "Generated underlay IP {underlay_ipv6} falls outside configured \ + prefix {underlay_admin_prefix} (external {external_ip})." + ))); + } + + Ok(IpAddr::V6(underlay_ipv6)) +} + +impl BackgroundTask for MulticastGroupReconciler { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async move { + if !self.enabled { + info!(opctx.log, "multicast group reconciler not enabled"); + let mut status = MulticastGroupReconcilerStatus::default(); + status.disabled = true; + return json!(status); + } + + trace!(opctx.log, "multicast group reconciler activating"); + let status = self.run_reconciliation_pass(opctx).await; + + let did_work = status.groups_created + + status.groups_deleted + + status.groups_verified + + status.members_processed + + status.members_deleted + > 0; + + if status.errors.is_empty() { + if did_work { + info!( + opctx.log, + "multicast RPW reconciliation pass completed successfully"; + "external_groups_created" => status.groups_created, + "external_groups_deleted" => status.groups_deleted, + "active_groups_verified" => status.groups_verified, + "member_state_transitions" => status.members_processed, + "orphaned_members_cleaned" => status.members_deleted, + "dataplane_operations" => status.groups_created + status.groups_deleted + status.members_processed + ); + } else { + trace!( + opctx.log, + "multicast RPW reconciliation pass completed - dataplane consistent" + ); + } + } else { + error!( + opctx.log, + "multicast RPW reconciliation pass completed with dataplane inconsistencies"; + "external_groups_created" => status.groups_created, + "external_groups_deleted" => status.groups_deleted, + "active_groups_verified" => status.groups_verified, + "member_state_transitions" => status.members_processed, + "orphaned_members_cleaned" => status.members_deleted, + "dataplane_error_count" => status.errors.len() + ); + } + + json!(status) + } + .boxed() + } +} + +impl MulticastGroupReconciler { + /// Execute a full reconciliation pass. + async fn run_reconciliation_pass( + &mut self, + opctx: &OpContext, + ) -> MulticastGroupReconcilerStatus { + let mut status = MulticastGroupReconcilerStatus::default(); + + trace!(opctx.log, "starting multicast reconciliation pass"); + + // Check if cache invalidation was requested + if self + .invalidate_cache_on_next_run + .compare_exchange(true, false, Ordering::SeqCst, Ordering::SeqCst) + .is_ok() + { + info!( + opctx.log, + "invalidating multicast caches due to topology change" + ); + self.invalidate_backplane_cache().await; + self.invalidate_sled_mapping_cache().await; + } + + // Create dataplane client (across switches) once for the entire + // reconciliation pass (in case anything has changed) + let dataplane_client = match MulticastDataplaneClient::new( + self.resolver.clone(), + opctx.log.clone(), + ) + .await + { + Ok(client) => client, + Err(e) => { + let msg = format!( + "failed to create multicast dataplane client: {e:#}" + ); + status.errors.push(msg); + return status; + } + }; + + // Process creating groups + match self.reconcile_creating_groups(opctx).await { + Ok(count) => status.groups_created += count, + Err(e) => { + let msg = format!("failed to reconcile creating groups: {e:#}"); + status.errors.push(msg); + } + } + + // Process deleting groups + match self.reconcile_deleting_groups(opctx, &dataplane_client).await { + Ok(count) => status.groups_deleted += count, + Err(e) => { + let msg = format!("failed to reconcile deleting groups: {e:#}"); + status.errors.push(msg); + } + } + + // Reconcile active groups (verify state, update dataplane as needed) + match self.reconcile_active_groups(opctx, &dataplane_client).await { + Ok(count) => status.groups_verified += count, + Err(e) => { + let msg = format!("failed to reconcile active groups: {e:#}"); + status.errors.push(msg); + } + } + + // Process member state changes + match self.reconcile_member_states(opctx, &dataplane_client).await { + Ok(count) => status.members_processed += count, + Err(e) => { + let msg = format!("failed to reconcile member states: {e:#}"); + status.errors.push(msg); + } + } + + // Clean up deleted members ("Left" + `time_deleted`) + match self.cleanup_deleted_members(opctx).await { + Ok(count) => status.members_deleted += count, + Err(e) => { + let msg = format!("failed to cleanup deleted members: {e:#}"); + status.errors.push(msg); + } + } + + trace!( + opctx.log, + "multicast RPW reconciliation cycle completed"; + "external_groups_created" => status.groups_created, + "external_groups_deleted" => status.groups_deleted, + "active_groups_verified" => status.groups_verified, + "member_lifecycle_transitions" => status.members_processed, + "orphaned_member_cleanup" => status.members_deleted, + "total_dpd_operations" => status.groups_created + status.groups_deleted + status.members_processed, + "error_count" => status.errors.len() + ); + + status + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::net::{Ipv4Addr, Ipv6Addr}; + + use omicron_common::address::IPV6_ADMIN_SCOPED_MULTICAST_PREFIX; + + #[test] + fn test_map_ipv4_to_underlay_ipv6() { + // Test IPv4 multicast mapping to admin-scoped IPv6 using default + // prefix (ff04::/64). IPv4 fits in lower 32 bits. + let ipv4 = Ipv4Addr::new(224, 1, 2, 3); + let result = map_external_to_underlay_ip_impl( + DEFAULT_UNDERLAY_MULTICAST_NET, + IpAddr::V4(ipv4), + ) + .unwrap(); + + match result { + IpAddr::V6(ipv6) => { + // Should be ff04::e001:203 + // (224=0xe0, 1=0x01, 2=0x02, 3=0x03) + assert_eq!( + ipv6.segments(), + [ + IPV6_ADMIN_SCOPED_MULTICAST_PREFIX, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0xe001, + 0x0203, + ] + ); + } + _ => panic!("Expected IPv6 result"), + } + } + + #[test] + fn test_map_ipv4_edge_cases() { + // Test minimum IPv4 multicast address using production default prefix + let ipv4_min = Ipv4Addr::new(224, 0, 0, 1); + let result = map_external_to_underlay_ip_impl( + DEFAULT_UNDERLAY_MULTICAST_NET, + IpAddr::V4(ipv4_min), + ) + .unwrap(); + match result { + IpAddr::V6(ipv6) => { + assert_eq!( + ipv6.segments(), + [ + IPV6_ADMIN_SCOPED_MULTICAST_PREFIX, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0xe000, + 0x0001, + ] + ); + } + _ => panic!("Expected IPv6 result"), + } + + // Test maximum IPv4 multicast address using production default prefix + let ipv4_max = Ipv4Addr::new(239, 255, 255, 255); + let result = map_external_to_underlay_ip_impl( + DEFAULT_UNDERLAY_MULTICAST_NET, + IpAddr::V4(ipv4_max), + ) + .unwrap(); + match result { + IpAddr::V6(ipv6) => { + assert_eq!( + ipv6.segments(), + [ + IPV6_ADMIN_SCOPED_MULTICAST_PREFIX, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0xefff, + 0xffff, + ] + ); + } + _ => panic!("Expected IPv6 result"), + } + } + + #[test] + fn test_map_ipv6_multicast_to_admin_scoped() { + // Test algorithm with wider /16 prefix (not used in production). + // Tests site-local (ff05::/16) to admin-scoped (ff04::/16) with XOR folding. + // With /16, we XOR upper 112 bits with lower 112 bits. + let ipv6_site_local = Ipv6Addr::new( + 0xff05, 0x1234, 0x5678, 0x9abc, 0xdef0, 0x1234, 0x5678, 0x9abc, + ); + let prefix_16: Ipv6Net = "ff04::/16".parse().unwrap(); + let result = map_external_to_underlay_ip_impl( + prefix_16, + IpAddr::V6(ipv6_site_local), + ) + .unwrap(); + + match result { + IpAddr::V6(ipv6) => { + // XOR result of 112-bit chunks + assert_eq!( + ipv6.segments(), + [ + IPV6_ADMIN_SCOPED_MULTICAST_PREFIX, + 0x1234, + 0x5678, + 0x9abc, + 0xdef0, + 0x1234, + 0x5678, + 0x65b9, // XOR folded last segment + ] + ); + } + _ => panic!("Expected IPv6 result"), + } + } + + #[test] + fn test_map_ipv6_global_multicast_to_admin_scoped() { + // Test algorithm with wider /16 prefix (not used in production). + // Tests global (ff0e::/16) to admin-scoped (ff04::/16) with XOR folding. + // With /16, we XOR upper 112 bits with lower 112 bits. + let ipv6_global = Ipv6Addr::new( + 0xff0e, 0xabcd, 0x1234, 0x5678, 0x9abc, 0xdef0, 0x1234, 0x5678, + ); + let prefix_16: Ipv6Net = "ff04::/16".parse().unwrap(); + let result = map_external_to_underlay_ip_impl( + prefix_16, + IpAddr::V6(ipv6_global), + ) + .unwrap(); + + match result { + IpAddr::V6(ipv6) => { + // XOR result of 112-bit chunks + assert_eq!( + ipv6.segments(), + [ + IPV6_ADMIN_SCOPED_MULTICAST_PREFIX, + 0xabcd, + 0x1234, + 0x5678, + 0x9abc, + 0xdef0, + 0x1234, + 0xa976, // XOR folded last segment + ] + ); + } + _ => panic!("Expected IPv6 result"), + } + } + + #[test] + fn test_map_ipv6_already_admin_scoped() { + // Test algorithm with wider /16 prefix (not used in production). + // Admin-scoped multicast (ff04::/16) gets XOR folded like any other address. + // With /16, we XOR upper 112 bits with lower 112 bits. + let ipv6_admin = Ipv6Addr::new( + IPV6_ADMIN_SCOPED_MULTICAST_PREFIX, + 0x1111, + 0x2222, + 0x3333, + 0x4444, + 0x5555, + 0x6666, + 0x7777, + ); + let prefix_16: Ipv6Net = "ff04::/16".parse().unwrap(); + let result = + map_external_to_underlay_ip_impl(prefix_16, IpAddr::V6(ipv6_admin)) + .unwrap(); + + match result { + IpAddr::V6(ipv6) => { + // XOR result of 112-bit chunks + assert_eq!( + ipv6.segments(), + [ + IPV6_ADMIN_SCOPED_MULTICAST_PREFIX, + 0x1111, + 0x2222, + 0x3333, + 0x4444, + 0x5555, + 0x6666, + 0x8873, // XOR folded last segment + ] + ); + } + _ => panic!("Expected IPv6 result"), + } + } + + #[test] + fn test_prefix_validation_ipv4_too_small() { + // Test that a prefix that's too small for IPv4 mapping is rejected + // ff04::/120 only allows for the last 8 bits to vary, but IPv4 needs 32 bits + let ipv4 = Ipv4Addr::new(224, 1, 2, 3); + let prefix: Ipv6Net = "ff04::/120".parse().unwrap(); + let result = map_external_to_underlay_ip_impl(prefix, IpAddr::V4(ipv4)); + + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("has only 8 host bits") + && err_msg.contains("IPv4 requires at least 32 bits"), + "Expected IPv4 validation error, got: {err_msg}" + ); + } + + #[test] + fn test_prefix_preservation_hash_space_for_large_sets() { + // Smoke-test: For /64 (64 host bits), generating mappings for 100k + // unique IPv6 external addresses should produce 100k unique underlay + // addresses. With /64, we preserve segments 4-7, so vary those. + use std::collections::HashSet; + let prefix: Ipv6Net = "ff04::/64".parse().unwrap(); + + let mut set = HashSet::with_capacity(100_000); + for i in 0..100_000u32 { + // Construct a family of multicast IPv6 addresses (global scope ff0e) + // Vary segments 4-5 (which are preserved with /64) to ensure uniqueness + let ipv6 = Ipv6Addr::new( + 0xff0e, + 0, + 0, + 0, + (i >> 16) as u16, + (i & 0xffff) as u16, + 0x3333, + 0x4444, + ); + let underlay = + map_external_to_underlay_ip_impl(prefix, IpAddr::V6(ipv6)) + .unwrap(); + if let IpAddr::V6(u6) = underlay { + assert!(prefix.contains(&u6)); + set.insert(u6); + } else { + panic!("expected IPv6 underlay"); + } + } + assert_eq!(set.len(), 100_000); + } + + #[test] + fn test_prefix_validation_success_larger_prefix() { + // Test that a larger prefix (e.g., /48) works correctly + let ipv4 = Ipv4Addr::new(224, 1, 2, 3); + let prefix: Ipv6Net = "ff04::/48".parse().unwrap(); + let result = map_external_to_underlay_ip_impl(prefix, IpAddr::V4(ipv4)); + + assert!(result.is_ok()); + } + + #[test] + fn test_xor_folding_with_64bit_prefix() { + // Test XOR folding with /64 prefix: upper and lower 64-bit halves + // are XORed together to produce unique mapping + let ipv6 = Ipv6Addr::new( + 0xff0e, 0x1234, 0x5678, 0x9abc, 0x7ef0, 0x1122, 0x3344, 0x5566, + ); + let prefix: Ipv6Net = "ff04::/64".parse().unwrap(); + let result = + map_external_to_underlay_ip_impl(prefix, IpAddr::V6(ipv6)).unwrap(); + + match result { + IpAddr::V6(underlay) => { + // Expected: XOR of upper 64 bits (ff0e:1234:5678:9abc) and + // lower 64 bits (7ef0:1122:3344:5566) = 81fe:0316:653c:cfda + let segments = underlay.segments(); + assert_eq!(segments[0], IPV6_ADMIN_SCOPED_MULTICAST_PREFIX); + assert_eq!(segments[1], 0x0000); + assert_eq!(segments[2], 0x0000); + assert_eq!(segments[3], 0x0000); + assert_eq!(segments[4], 0x81fe); + assert_eq!(segments[5], 0x0316); + assert_eq!(segments[6], 0x653c); + assert_eq!(segments[7], 0xcfda); + } + _ => panic!("Expected IPv6 result"), + } + } + + #[test] + fn test_bounded_preservation_prefix_48() { + // Test XOR folding with /48 prefix (not used in production): + // XORs upper 80 bits with lower 80 bits. + let ipv6 = Ipv6Addr::new( + 0xff0e, 0x1234, 0x5678, 0x9abc, 0xdef0, 0x1122, 0x3344, 0x5566, + ); + let prefix: Ipv6Net = "ff04:1000::/48".parse().unwrap(); + let result = + map_external_to_underlay_ip_impl(prefix, IpAddr::V6(ipv6)).unwrap(); + + match result { + IpAddr::V6(underlay) => { + // XOR result of 80-bit chunks + assert_eq!( + underlay.segments(), + [ + IPV6_ADMIN_SCOPED_MULTICAST_PREFIX, + 0x1000, + 0x0000, + 0x9abc, + 0xdef0, + 0xee2c, // XOR folded + 0x2170, // XOR folded + 0x031e, // XOR folded + ] + ); + } + _ => panic!("Expected IPv6 result"), + } + } + + #[test] + fn test_xor_folding_prevents_collisions() { + // Test that different external addresses with identical lower bits + // but different upper bits (scopes) map to DIFFERENT underlay addresses. + // XOR folding mixes upper and lower halves to avoid collisions. + let ipv6_site = Ipv6Addr::new( + 0xff05, 0x1234, 0x5678, 0x9abc, 0xdef0, 0x1122, 0x3344, 0x5566, + ); + let ipv6_global = Ipv6Addr::new( + 0xff0e, 0xabcd, 0xef00, 0x0123, 0xdef0, 0x1122, 0x3344, 0x5566, + ); + + let prefix: Ipv6Net = "ff04::/64".parse().unwrap(); + + let result_site = + map_external_to_underlay_ip_impl(prefix, IpAddr::V6(ipv6_site)) + .unwrap(); + let result_global = + map_external_to_underlay_ip_impl(prefix, IpAddr::V6(ipv6_global)) + .unwrap(); + + // Should map to DIFFERENT underlay addresses because XOR folding + // incorporates the different upper 64 bits (including scope) + assert_ne!(result_site, result_global); + } + + #[test] + fn test_admin_scope_xor_folding() { + // Test that admin-scoped external addresses (ff04::) get XOR folded + // like any other multicast address, producing unique mappings + let external = Ipv6Addr::new( + IPV6_ADMIN_SCOPED_MULTICAST_PREFIX, + 0, + 0, + 0, + 0x1234, + 0x5678, + 0x9abc, + 0xdef0, + ); + + let prefix: Ipv6Net = "ff04::/64".parse().unwrap(); + let underlay = + map_external_to_underlay_ip_impl(prefix, IpAddr::V6(external)) + .unwrap(); + + // External and underlay will be different due to XOR folding + // (upper 64 bits XOR'd with lower 64 bits) + assert_ne!(IpAddr::V6(external), underlay); + + // Verify XOR result: ff04:0:0:0 XOR 1234:5678:9abc:def0 = ed30:5678:9abc:def0 + if let IpAddr::V6(u) = underlay { + assert_eq!( + u.segments(), + [ + IPV6_ADMIN_SCOPED_MULTICAST_PREFIX, + 0x0000, + 0x0000, + 0x0000, + 0xed30, // ff04 XOR 1234 + 0x5678, // 0000 XOR 5678 + 0x9abc, // 0000 XOR 9abc + 0xdef0, // 0000 XOR def0 + ] + ); + } else { + panic!("Expected IPv6 underlay"); + } + } +} diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 0bbd1012b14..098c289eb9f 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -54,6 +54,7 @@ use omicron_common::api::internal::nexus; use omicron_common::api::internal::shared::SourceNatConfig; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; +use omicron_uuid_kinds::MulticastGroupUuid; use omicron_uuid_kinds::PropolisUuid; use omicron_uuid_kinds::SledUuid; use propolis_client::support::InstanceSerialConsoleHelper; @@ -67,6 +68,7 @@ use sagas::instance_start; use sagas::instance_update; use sled_agent_client::types::InstanceMigrationTargetParams; use sled_agent_client::types::VmmPutStateBody; +use std::collections::HashSet; use std::matches; use std::net::SocketAddr; use std::sync::Arc; @@ -314,6 +316,10 @@ async fn normalize_anti_affinity_groups( } impl super::Nexus { + /// Look up an instance by name or UUID. + /// + /// The `project` parameter is required for name-based lookup (provides scope) + /// and must NOT be specified for UUID-based lookup. pub fn instance_lookup<'a>( &'a self, opctx: &'a OpContext, @@ -348,6 +354,121 @@ impl super::Nexus { } } + /// Handle multicast group membership changes during instance reconfiguration. + /// + /// Diff is computed against the instance's active memberships only + /// (i.e., rows with `time_deleted IS NULL`). Removed ("Left") rows are + /// ignored here and handled by the reconciler. + async fn handle_multicast_group_changes( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + multicast_groups: &[NameOrId], + ) -> Result<(), Error> { + let instance_id = authz_instance.id(); + + // Check if multicast is enabled - if not, skip all multicast operations + if !self.multicast_enabled() { + debug!(opctx.log, + "multicast not enabled, skipping multicast group changes"; + "instance_id" => %instance_id, + "requested_groups_count" => multicast_groups.len()); + return Ok(()); + } + + debug!( + opctx.log, + "processing multicast group changes"; + "instance_id" => %instance_id, + "requested_groups" => ?multicast_groups, + "requested_groups_count" => multicast_groups.len() + ); + + // Get current multicast group memberships (active-only) + let current_memberships = self + .datastore() + .multicast_group_members_list_by_instance( + opctx, + InstanceUuid::from_untyped_uuid(instance_id), + false, + ) + .await?; + let current_group_ids: HashSet<_> = + current_memberships.iter().map(|m| m.external_group_id).collect(); + + debug!( + opctx.log, + "current multicast memberships"; + "instance_id" => %instance_id, + "current_memberships_count" => current_memberships.len(), + "current_group_ids" => ?current_group_ids + ); + + // Resolve new multicast group names/IDs to group records + let mut new_group_ids = HashSet::new(); + for group_name_or_id in multicast_groups { + let multicast_group_selector = params::MulticastGroupSelector { + multicast_group: group_name_or_id.clone(), + }; + let multicast_group_lookup = + self.multicast_group_lookup(opctx, &multicast_group_selector)?; + let (.., db_group) = + multicast_group_lookup.fetch_for(authz::Action::Read).await?; + let id = db_group.id(); + new_group_ids.insert(id); + } + + // Determine which groups to leave and join + let groups_to_leave: Vec<_> = + current_group_ids.difference(&new_group_ids).cloned().collect(); + let groups_to_join: Vec<_> = + new_group_ids.difference(¤t_group_ids).cloned().collect(); + + debug!( + opctx.log, + "membership changes"; + "instance_id" => %instance_id, + "groups_to_leave" => ?groups_to_leave, + "groups_to_join" => ?groups_to_join + ); + + // Remove members from groups that are no longer wanted + for group_id in groups_to_leave { + debug!( + opctx.log, + "removing member from group"; + "instance_id" => %instance_id, + "group_id" => %group_id + ); + self.datastore() + .multicast_group_member_detach_by_group_and_instance( + opctx, + MulticastGroupUuid::from_untyped_uuid(group_id), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await?; + } + + // Add members to new groups + for group_id in groups_to_join { + debug!( + opctx.log, + "adding member to group (reconciler will handle dataplane updates)"; + "instance_id" => %instance_id, + "group_id" => %group_id + ); + self.datastore() + .multicast_group_member_attach_to_instance( + opctx, + MulticastGroupUuid::from_untyped_uuid(group_id), + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await?; + } + + Ok(()) + } + pub(crate) async fn instance_reconfigure( self: &Arc, opctx: &OpContext, @@ -363,6 +484,7 @@ impl super::Nexus { auto_restart_policy, boot_disk, cpu_platform, + multicast_groups, } = params; check_instance_cpu_memory_sizes(*ncpus, *memory)?; @@ -398,9 +520,32 @@ impl super::Nexus { memory, cpu_platform, }; - self.datastore() + + // Update the instance configuration + let result = self + .datastore() .instance_reconfigure(opctx, &authz_instance, update) - .await + .await; + + // Handle multicast group updates if specified + if let Some(ref multicast_groups) = multicast_groups { + self.handle_multicast_group_changes( + opctx, + &authz_instance, + multicast_groups, + ) + .await?; + } + + // Return early with any database errors before activating reconciler + let instance_result = result?; + + // Activate multicast reconciler after successful reconfiguration if multicast groups were modified + if multicast_groups.is_some() { + self.background_tasks.task_multicast_reconciler.activate(); + } + + Ok(instance_result) } pub(crate) async fn project_create_instance( @@ -554,7 +699,9 @@ impl super::Nexus { } } + // Activate background tasks after successful instance creation self.background_tasks.task_vpc_route_manager.activate(); + self.background_tasks.task_multicast_reconciler.activate(); // TODO: This operation should return the instance as it was created. // Refetching the instance state here won't return that version of the @@ -627,7 +774,9 @@ impl super::Nexus { ) .await?; + // Activate background tasks after successful saga completion self.background_tasks.task_vpc_route_manager.activate(); + self.background_tasks.task_multicast_reconciler.activate(); Ok(()) } @@ -680,7 +829,9 @@ impl super::Nexus { ) .await?; + // Activate background tasks after successful saga completion self.background_tasks.task_vpc_route_manager.activate(); + self.background_tasks.task_multicast_reconciler.activate(); // TODO correctness TODO robustness TODO design // Should we lookup the instance again here? @@ -776,6 +927,11 @@ impl super::Nexus { ) .await?; + // Activate multicast reconciler after successful instance start. + // The reconciler handles both group and member state, including + // Joining→Joined transitions now that sled_id is set. + self.background_tasks.task_multicast_reconciler.activate(); + self.db_datastore .instance_fetch_with_vmm(opctx, &authz_instance) .await @@ -806,6 +962,20 @@ impl super::Nexus { ) .await?; + // Update multicast member state for this instance to "Left" and clear + // `sled_id` - only if multicast is enabled + if self.multicast_enabled() { + self.db_datastore + .multicast_group_members_detach_by_instance( + opctx, + InstanceUuid::from_untyped_uuid(authz_instance.id()), + ) + .await?; + } + + // Activate multicast reconciler to handle switch-level changes + self.background_tasks.task_multicast_reconciler.activate(); + if let Err(e) = self .instance_request_state( opctx, @@ -1280,6 +1450,49 @@ impl super::Nexus { project_id: authz_project.id(), }; + let mut multicast_groups = Vec::new(); + + if self.multicast_enabled() { + let multicast_members = self + .db_datastore + .multicast_group_members_list_by_instance( + opctx, + InstanceUuid::from_untyped_uuid(authz_instance.id()), + false, // include_removed + ) + .await + .map_err(|e| { + Error::internal_error(&format!( + "failed to list multicast group members for instance: {e}" + )) + })?; + + for member in multicast_members { + // Get the group details for this membership + if let Ok(group) = self + .db_datastore + .multicast_group_fetch( + opctx, + omicron_uuid_kinds::MulticastGroupUuid::from_untyped_uuid( + member.external_group_id, + ), + ) + .await + { + multicast_groups.push( + sled_agent_client::types::InstanceMulticastMembership { + group_ip: group.multicast_ip.ip(), + sources: group + .source_ips + .into_iter() + .map(|src_ip| src_ip.ip()) + .collect(), + }, + ); + } + } + } + let local_config = sled_agent_client::types::InstanceSledLocalConfig { hostname, nics, @@ -1287,6 +1500,7 @@ impl super::Nexus { ephemeral_ip, floating_ips, firewall_rules, + multicast_groups, dhcp_config: sled_agent_client::types::DhcpConfig { dns_servers: self.external_dns_servers.clone(), // TODO: finish designing instance DNS @@ -2086,6 +2300,8 @@ impl super::Nexus { let sagas = self.sagas.clone(); let task_instance_updater = self.background_tasks.task_instance_updater.clone(); + let task_multicast_reconciler = + self.background_tasks.task_multicast_reconciler.clone(); let log = log.clone(); async move { debug!( @@ -2126,6 +2342,9 @@ impl super::Nexus { // instance, kick the instance-updater background task // to try and start it again in a timely manner. task_instance_updater.activate(); + } else { + // Activate multicast reconciler after successful saga completion + task_multicast_reconciler.activate(); } } } @@ -2476,6 +2695,7 @@ mod tests { start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let instance_id = InstanceUuid::from_untyped_uuid(Uuid::new_v4()); diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index 7342cb0eef5..ab5f1d066d1 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -79,10 +79,10 @@ impl Nexus { .await } - // The logic of this function should follow very closely what - // `instance_ensure_dpd_config` does. However, there are enough differences - // in the mechanics of how the logic is being carried out to justify having - // this separate function, it seems. + /// The logic of this function should follow very closely what + /// `instance_ensure_dpd_config` does. However, there are enough differences + /// in the mechanics of how the logic is being carried out to justify having + /// this separate function, it seems. pub(crate) async fn probe_ensure_dpd_config( &self, opctx: &OpContext, @@ -432,10 +432,6 @@ pub(crate) async fn instance_ensure_dpd_config( Ok(nat_entries) } -// The logic of this function should follow very closely what -// `instance_ensure_dpd_config` does. However, there are enough differences -// in the mechanics of how the logic is being carried out to justify having -// this separate function, it seems. pub(crate) async fn probe_ensure_dpd_config( datastore: &DataStore, log: &slog::Logger, diff --git a/nexus/src/app/ip_pool.rs b/nexus/src/app/ip_pool.rs index 1ef941cb735..a550246aef7 100644 --- a/nexus/src/app/ip_pool.rs +++ b/nexus/src/app/ip_pool.rs @@ -20,7 +20,11 @@ use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::model::Name; use nexus_types::identity::Resource; -use omicron_common::address::{IPV4_SSM_SUBNET, IPV6_SSM_SUBNET}; +use omicron_common::address::{ + IPV4_LINK_LOCAL_MULTICAST_SUBNET, IPV4_SSM_SUBNET, + IPV6_INTERFACE_LOCAL_MULTICAST_SUBNET, IPV6_LINK_LOCAL_MULTICAST_SUBNET, + IPV6_SSM_SUBNET, +}; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; @@ -52,6 +56,77 @@ fn not_found_from_lookup(pool_lookup: &lookup::IpPool<'_>) -> Error { } } +/// Validate multicast-specific constraints for IP ranges. +/// +/// Enforces restrictions on multicast address ranges: +/// - IPv4: Rejects link-local (224.0.0.0/24), prevents ASM/SSM boundary spanning +/// - IPv6: Rejects interface-local (ff01::/16) and link-local (ff02::/16), +/// prevents ASM/SSM boundary spanning +fn validate_multicast_range(range: &shared::IpRange) -> Result<(), Error> { + match range { + shared::IpRange::V4(v4_range) => { + let first = v4_range.first_address(); + let last = v4_range.last_address(); + + // Reject IPv4 link-local multicast range (224.0.0.0/24) + if IPV4_LINK_LOCAL_MULTICAST_SUBNET.contains(first) + || IPV4_LINK_LOCAL_MULTICAST_SUBNET.contains(last) + { + return Err(Error::invalid_request( + "Cannot add IPv4 link-local multicast range \ + (224.0.0.0/24) to IP pool", + )); + } + + // Validate range doesn't span ASM/SSM boundary + let first_is_ssm = IPV4_SSM_SUBNET.contains(first); + let last_is_ssm = IPV4_SSM_SUBNET.contains(last); + + if first_is_ssm != last_is_ssm { + return Err(Error::invalid_request( + "IP range cannot span ASM and SSM address spaces", + )); + } + } + shared::IpRange::V6(v6_range) => { + let first = v6_range.first_address(); + let last = v6_range.last_address(); + + // Reject interface-local (ff01::/16) and link-local (ff02::/16) + // IPv6 multicast ranges + if IPV6_INTERFACE_LOCAL_MULTICAST_SUBNET.contains(first) + || IPV6_INTERFACE_LOCAL_MULTICAST_SUBNET.contains(last) + { + return Err(Error::invalid_request( + "Cannot add IPv6 interface-local multicast range \ + (ff01::/16) to IP pool", + )); + } + + if IPV6_LINK_LOCAL_MULTICAST_SUBNET.contains(first) + || IPV6_LINK_LOCAL_MULTICAST_SUBNET.contains(last) + { + return Err(Error::invalid_request( + "Cannot add IPv6 link-local multicast range \ + (ff02::/16) to IP pool", + )); + } + + // Validate range doesn't span ASM/SSM boundary + let first_is_ssm = IPV6_SSM_SUBNET.contains(first); + let last_is_ssm = IPV6_SSM_SUBNET.contains(last); + + if first_is_ssm != last_is_ssm { + return Err(Error::invalid_request( + "IP range cannot span ASM and SSM address spaces", + )); + } + } + } + + Ok(()) +} + impl super::Nexus { pub fn ip_pool_lookup<'a>( &'a self, @@ -354,20 +429,40 @@ impl super::Nexus { )); } - // Validate uniformity: ensure range doesn't span multicast/unicast boundary - let range_is_multicast = match range { + // Validate uniformity and pool type constraints. + // Extract first/last addresses once and reuse for all validation checks. + match range { shared::IpRange::V4(v4_range) => { let first = v4_range.first_address(); let last = v4_range.last_address(); let first_is_multicast = first.is_multicast(); let last_is_multicast = last.is_multicast(); + // Ensure range doesn't span multicast/unicast boundary if first_is_multicast != last_is_multicast { return Err(Error::invalid_request( "IP range cannot span multicast and unicast address spaces", )); } - first_is_multicast + + // Validate pool type matches range type + match db_pool.pool_type { + IpPoolType::Multicast => { + if !first_is_multicast { + return Err(Error::invalid_request( + "Cannot add unicast address range to multicast IP pool", + )); + } + validate_multicast_range(range)?; + } + IpPoolType::Unicast => { + if first_is_multicast { + return Err(Error::invalid_request( + "Cannot add multicast address range to unicast IP pool", + )); + } + } + } } shared::IpRange::V6(v6_range) => { let first = v6_range.first_address(); @@ -375,59 +470,32 @@ impl super::Nexus { let first_is_multicast = first.is_multicast(); let last_is_multicast = last.is_multicast(); + // Ensure range doesn't span multicast/unicast boundary if first_is_multicast != last_is_multicast { return Err(Error::invalid_request( "IP range cannot span multicast and unicast address spaces", )); } - first_is_multicast - } - }; - - match db_pool.pool_type { - IpPoolType::Multicast => { - if !range_is_multicast { - return Err(Error::invalid_request( - "Cannot add unicast address range to multicast IP pool", - )); - } - // For multicast pools, validate that the range doesn't span - // ASM/SSM boundaries - match range { - shared::IpRange::V4(v4_range) => { - let first = v4_range.first_address(); - let last = v4_range.last_address(); - let first_is_ssm = IPV4_SSM_SUBNET.contains(first); - let last_is_ssm = IPV4_SSM_SUBNET.contains(last); - - if first_is_ssm != last_is_ssm { + // Validate pool type matches range type + match db_pool.pool_type { + IpPoolType::Multicast => { + if !first_is_multicast { return Err(Error::invalid_request( - "IP range cannot span ASM and SSM address spaces", + "Cannot add unicast address range to multicast IP pool", )); } + validate_multicast_range(range)?; } - shared::IpRange::V6(v6_range) => { - let first = v6_range.first_address(); - let last = v6_range.last_address(); - let first_is_ssm = IPV6_SSM_SUBNET.contains(first); - let last_is_ssm = IPV6_SSM_SUBNET.contains(last); - - if first_is_ssm != last_is_ssm { + IpPoolType::Unicast => { + if first_is_multicast { return Err(Error::invalid_request( - "IP range cannot span ASM and SSM address spaces", + "Cannot add multicast address range to unicast IP pool", )); } } } } - IpPoolType::Unicast => { - if range_is_multicast { - return Err(Error::invalid_request( - "Cannot add multicast address range to unicast IP pool", - )); - } - } } self.db_datastore @@ -512,20 +580,40 @@ impl super::Nexus { )); } - // Validate that the range matches the pool type and that they match uniformity - let range_is_multicast = match range { + // Validate uniformity and pool type constraints. + // Extract first/last addresses once and reuse for all validation checks. + match range { shared::IpRange::V4(v4_range) => { let first = v4_range.first_address(); let last = v4_range.last_address(); let first_is_multicast = first.is_multicast(); let last_is_multicast = last.is_multicast(); + // Ensure range doesn't span multicast/unicast boundary if first_is_multicast != last_is_multicast { return Err(Error::invalid_request( "IP range cannot span multicast and unicast address spaces", )); } - first_is_multicast + + // Validate pool type matches range type + match db_pool.pool_type { + IpPoolType::Multicast => { + if !first_is_multicast { + return Err(Error::invalid_request( + "Cannot add unicast address range to multicast IP pool", + )); + } + validate_multicast_range(range)?; + } + IpPoolType::Unicast => { + if first_is_multicast { + return Err(Error::invalid_request( + "Cannot add multicast address range to unicast IP pool", + )); + } + } + } } shared::IpRange::V6(v6_range) => { let first = v6_range.first_address(); @@ -533,28 +621,30 @@ impl super::Nexus { let first_is_multicast = first.is_multicast(); let last_is_multicast = last.is_multicast(); + // Ensure range doesn't span multicast/unicast boundary if first_is_multicast != last_is_multicast { return Err(Error::invalid_request( "IP range cannot span multicast and unicast address spaces", )); } - first_is_multicast - } - }; - match db_pool.pool_type { - IpPoolType::Multicast => { - if !range_is_multicast { - return Err(Error::invalid_request( - "Cannot add unicast address range to multicast IP pool", - )); - } - } - IpPoolType::Unicast => { - if range_is_multicast { - return Err(Error::invalid_request( - "Cannot add multicast address range to unicast IP pool", - )); + // Validate pool type matches range type + match db_pool.pool_type { + IpPoolType::Multicast => { + if !first_is_multicast { + return Err(Error::invalid_request( + "Cannot add unicast address range to multicast IP pool", + )); + } + validate_multicast_range(range)?; + } + IpPoolType::Unicast => { + if first_is_multicast { + return Err(Error::invalid_request( + "Cannot add multicast address range to unicast IP pool", + )); + } + } } } } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index b977690e8fa..1abd597f67c 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -80,6 +80,7 @@ mod ip_pool; mod lldp; mod login; mod metrics; +pub(crate) mod multicast; mod network_interface; pub(crate) mod oximeter; mod probe; @@ -131,6 +132,7 @@ pub(crate) const MAX_EXTERNAL_IPS_PER_INSTANCE: usize = nexus_db_queries::db::queries::external_ip::MAX_EXTERNAL_IPS_PER_INSTANCE as usize; pub(crate) const MAX_EPHEMERAL_IPS_PER_INSTANCE: usize = 1; +pub(crate) const MAX_MULTICAST_GROUPS_PER_INSTANCE: usize = 32; pub const MAX_VCPU_PER_INSTANCE: u16 = 64; @@ -224,6 +226,9 @@ pub struct Nexus { /// The tunable parameters from a configuration file tunables: Tunables, + /// Whether multicast functionality is enabled - used by sagas and API endpoints to check if multicast operations should proceed + multicast_enabled: bool, + /// Operational context used for Instance allocation opctx_alloc: OpContext, @@ -510,6 +515,13 @@ impl Nexus { timeseries_client, webhook_delivery_client, tunables: config.pkg.tunables.clone(), + // Whether multicast functionality is enabled. + // This is used by instance-related sagas and API endpoints to check + // if multicast operations should proceed. + // + // NOTE: This is separate from the RPW reconciler timing config, which + // only controls how often the background task runs. + multicast_enabled: config.pkg.multicast.enabled, opctx_alloc: OpContext::for_background( log.new(o!("component" => "InstanceAllocator")), Arc::clone(&authz), @@ -612,6 +624,7 @@ impl Nexus { opctx: background_ctx, datastore: db_datastore, config: task_config.pkg.background_tasks, + multicast_enabled: task_config.pkg.multicast.enabled, rack_id, nexus_id: task_config.deployment.id, resolver, @@ -664,6 +677,10 @@ impl Nexus { &self.authz } + pub fn multicast_enabled(&self) -> bool { + self.multicast_enabled + } + pub(crate) async fn wait_for_populate(&self) -> Result<(), anyhow::Error> { let mut my_rx = self.populate_status.clone(); loop { @@ -1331,6 +1348,7 @@ async fn map_switch_zone_addrs( use gateway_client::Client as MgsClient; info!(log, "Determining switch slots managed by switch zones"); let mut switch_zone_addrs = HashMap::new(); + for addr in switch_zone_addresses { let mgs_client = MgsClient::new( &format!("http://[{}]:{}", addr, MGS_PORT), diff --git a/nexus/src/app/multicast/dataplane.rs b/nexus/src/app/multicast/dataplane.rs new file mode 100644 index 00000000000..9e4a2067321 --- /dev/null +++ b/nexus/src/app/multicast/dataplane.rs @@ -0,0 +1,1194 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Shared multicast dataplane operations for sagas and reconciler. +//! +//! Unified interface for multicast group and member operations in the +//! dataplane (DPD - Data Plane Daemon). +//! +//! ## VNI and Forwarding Model +//! +//! All external multicast groups use `DEFAULT_MULTICAST_VNI` (77), a reserved +//! system VNI below `MIN_GUEST_VNI` (1024). The bifurcated architecture uses +//! NAT translation at switches: +//! +//! - External multicast packets arrive with VNI 77 +//! - Switches perform NAT translation to underlay IPv6 multicast addresses +//! - Forwarding decisions happen at the underlay layer +//! - Security relies on underlay group membership validation +//! +//! This enables cross-project and cross-silo multicast while maintaining +//! security through API authorization and underlay membership control. + +use std::collections::HashMap; +use std::net::IpAddr; + +use futures::future::try_join_all; +use ipnetwork::IpNetwork; +use oxnet::MulticastMac; +use slog::{Logger, debug, error, info}; + +use dpd_client::Error as DpdError; +use dpd_client::types::{ + AdminScopedIpv6, ExternalForwarding, InternalForwarding, IpSrc, MacAddr, + MulticastGroupCreateExternalEntry, MulticastGroupCreateUnderlayEntry, + MulticastGroupExternalResponse, MulticastGroupMember, + MulticastGroupResponse, MulticastGroupUnderlayResponse, + MulticastGroupUpdateExternalEntry, MulticastGroupUpdateUnderlayEntry, + NatTarget, Vni, +}; +use internal_dns_resolver::Resolver; + +use nexus_db_model::{ExternalMulticastGroup, UnderlayMulticastGroup}; +use nexus_types::identity::Resource; +use omicron_common::api::external::{Error, SwitchLocation}; +use omicron_common::vlan::VlanID; + +use crate::app::dpd_clients; + +/// Trait for extracting external responses from mixed DPD response types. +trait IntoExternalResponse { + /// Extract external response, failing if the response is not external. + fn into_external_response( + self, + ) -> Result; +} + +impl IntoExternalResponse for MulticastGroupResponse { + fn into_external_response( + self, + ) -> Result { + match self { + MulticastGroupResponse::External { + group_ip, + external_group_id, + tag, + internal_forwarding, + external_forwarding, + sources, + } => Ok(MulticastGroupExternalResponse { + group_ip, + external_group_id, + tag, + internal_forwarding, + external_forwarding, + sources, + }), + _ => { + Err(Error::internal_error("expected external group from get()")) + } + } + } +} + +/// Trait for converting database IPv6 types into DPD's +/// [`AdminScopedIpv6`] type. +trait IntoAdminScoped { + /// Convert to [`AdminScopedIpv6`], rejecting IPv4 addresses. + fn into_admin_scoped(self) -> Result; +} + +impl IntoAdminScoped for IpAddr { + fn into_admin_scoped(self) -> Result { + match self { + IpAddr::V6(ipv6) => Ok(AdminScopedIpv6(ipv6)), + IpAddr::V4(_) => Err(Error::invalid_request( + "underlay multicast groups must use IPv6 addresses", + )), + } + } +} + +/// Result type for multicast dataplane operations. +pub(crate) type MulticastDataplaneResult = Result; + +/// Client for multicast dataplane operations. +/// +/// This handles multicast group and member operations across all switches +/// in the rack, with automatic error handling and rollback. +/// +/// TODO: Add `switch_port_uplinks` configuration to multicast groups for egress +/// multicast traffic (instances → switches → external hosts). +/// +/// Current implementation handles ingress (external → switches → instances) +/// using rear ports with [`dpd_client::types::Direction::Underlay`]. For egress, +/// we need: +/// - Group-level uplink configuration (which front ports to use) +/// - Uplink members with [`dpd_client::types::Direction::External`] added to +/// underlay groups +/// - Integration with existing `switch_ports_with_uplinks()` for port discovery +pub(crate) struct MulticastDataplaneClient { + dpd_clients: HashMap, + log: Logger, +} + +/// Parameters for multicast group updates. +#[derive(Debug)] +pub(crate) struct GroupUpdateParams<'a> { + pub external_group: &'a ExternalMulticastGroup, + pub underlay_group: &'a UnderlayMulticastGroup, + pub new_name: &'a str, + pub new_sources: &'a [IpNetwork], +} + +impl MulticastDataplaneClient { + /// Create a new client - builds fresh DPD clients for current switch + /// topology. + pub(crate) async fn new( + resolver: Resolver, + log: Logger, + ) -> MulticastDataplaneResult { + let dpd_clients = dpd_clients(&resolver, &log).await.map_err(|e| { + error!( + log, + "failed to build DPD clients"; + "error" => %e + ); + Error::internal_error("failed to build DPD clients") + })?; + Ok(Self { dpd_clients, log }) + } + + /// Select a single switch deterministically for read operations. + /// + /// Used when all switches should have identical state and we only need + /// to query one. Selects the first switch in sorted order by location + /// for consistency across invocations. + fn select_one_switch( + &self, + ) -> MulticastDataplaneResult<(&SwitchLocation, &dpd_client::Client)> { + let mut switches: Vec<_> = self.dpd_clients.iter().collect(); + switches.sort_by_key(|(loc, _)| *loc); + switches + .into_iter() + .next() + .ok_or_else(|| Error::internal_error("no DPD clients available")) + } + + async fn dpd_ensure_underlay_created( + &self, + client: &dpd_client::Client, + ip: AdminScopedIpv6, + tag: &str, + switch: &SwitchLocation, + ) -> MulticastDataplaneResult { + let create = MulticastGroupCreateUnderlayEntry { + group_ip: ip.clone(), + members: Vec::new(), + tag: Some(tag.to_string()), + }; + match client.multicast_group_create_underlay(&create).await { + Ok(r) => Ok(r.into_inner()), + Err(DpdError::ErrorResponse(resp)) + if resp.status() == reqwest::StatusCode::CONFLICT => + { + debug!( + self.log, + "underlay exists; fetching"; + "underlay_ip" => %ip, + "switch" => %switch, + "dpd_operation" => "dpd_ensure_underlay_created" + ); + Ok(client + .multicast_group_get_underlay(&ip) + .await + .map_err(|e| { + error!( + self.log, + "underlay fetch failed"; + "underlay_ip" => %ip, + "switch" => %switch, + "error" => %e, + "dpd_operation" => "dpd_ensure_underlay_created" + ); + Error::internal_error("underlay fetch failed") + })? + .into_inner()) + } + Err(e) => { + error!( + self.log, + "underlay create failed"; + "underlay_ip" => %ip, + "switch" => %switch, + "error" => %e, + "dpd_operation" => "dpd_ensure_underlay_created" + ); + Err(Error::internal_error("underlay create failed")) + } + } + } + + async fn dpd_ensure_external_created( + &self, + client: &dpd_client::Client, + create: &MulticastGroupCreateExternalEntry, + switch: &SwitchLocation, + ) -> MulticastDataplaneResult { + match client.multicast_group_create_external(create).await { + Ok(r) => Ok(r.into_inner()), + Err(DpdError::ErrorResponse(resp)) + if resp.status() == reqwest::StatusCode::CONFLICT => + { + debug!( + self.log, + "external exists; fetching"; + "external_ip" => %create.group_ip, + "switch" => %switch, + "dpd_operation" => "dpd_ensure_external_created" + ); + let response = client + .multicast_group_get(&create.group_ip) + .await + .map_err(|e| { + error!( + self.log, + "external fetch failed"; + "external_ip" => %create.group_ip, + "switch" => %switch, + "error" => %e, + "dpd_operation" => "dpd_ensure_external_created" + ); + Error::internal_error("external fetch failed") + })?; + Ok(response.into_inner().into_external_response()?) + } + Err(e) => { + error!( + self.log, + "external create failed"; + "external_ip" => %create.group_ip, + "switch" => %switch, + "error" => %e, + "dpd_operation" => "dpd_ensure_external_created" + ); + Err(Error::internal_error("external create failed")) + } + } + } + + async fn dpd_update_external_or_create( + &self, + client: &dpd_client::Client, + group_ip: IpAddr, + update: &MulticastGroupUpdateExternalEntry, + create: &MulticastGroupCreateExternalEntry, + switch: &SwitchLocation, + ) -> MulticastDataplaneResult { + match client.multicast_group_update_external(&group_ip, update).await { + Ok(r) => Ok(r.into_inner()), + Err(DpdError::ErrorResponse(resp)) + if resp.status() == reqwest::StatusCode::NOT_FOUND => + { + // Create missing, then fetch-or-return + match client.multicast_group_create_external(create).await { + Ok(r) => Ok(r.into_inner()), + Err(DpdError::ErrorResponse(resp)) + if resp.status() == reqwest::StatusCode::CONFLICT => + { + let response = client + .multicast_group_get(&group_ip) + .await + .map_err(|e| { + error!( + self.log, + "external fetch after conflict failed"; + "external_ip" => %group_ip, + "switch" => %switch, + "error" => %e, + "dpd_operation" => "dpd_update_external_or_create" + ); + Error::internal_error( + "external fetch after conflict failed", + ) + })?; + Ok(response.into_inner().into_external_response()?) + } + Err(e) => { + error!( + self.log, + "external ensure failed"; + "external_ip" => %group_ip, + "switch" => %switch, + "error" => %e, + "dpd_operation" => "dpd_update_external_or_create" + ); + Err(Error::internal_error("external ensure failed")) + } + } + } + Err(e) => { + error!( + self.log, + "external update failed"; + "external_ip" => %group_ip, + "switch" => %switch, + "error" => %e, + "dpd_operation" => "dpd_update_external_or_create" + ); + Err(Error::internal_error("external update failed")) + } + } + } + + /// Get the number of switches this client is managing. + pub(crate) fn switch_count(&self) -> usize { + self.dpd_clients.len() + } + + /// Apply multicast group configuration across switches (via DPD). + pub(crate) async fn create_groups( + &self, + external_group: &ExternalMulticastGroup, + underlay_group: &UnderlayMulticastGroup, + ) -> MulticastDataplaneResult<( + MulticastGroupUnderlayResponse, + MulticastGroupExternalResponse, + )> { + debug!( + self.log, + "DPD multicast group creation initiated across rack switches"; + "external_group_id" => %external_group.id(), + "external_multicast_ip" => %external_group.multicast_ip, + "underlay_group_id" => %underlay_group.id, + "underlay_multicast_ip" => %underlay_group.multicast_ip, + "vni" => ?external_group.vni, + "switch_count" => self.switch_count(), + "multicast_scope" => if external_group.multicast_ip.ip().is_ipv4() { "IPv4_External" } else { "IPv6_External" }, + "source_mode" => if external_group.source_ips.is_empty() { "ASM" } else { "SSM" }, + "dpd_operation" => "create_groups" + ); + + let dpd_clients = &self.dpd_clients; + let tag = external_group.name().to_string(); + + // Convert MVLAN to u16 for DPD, validating through VlanID + let vlan_id = external_group + .mvlan + .map(|v| VlanID::new(v as u16)) + .transpose() + .map_err(|e| { + Error::internal_error(&format!("invalid VLAN ID: {e:#}")) + })? + .map(u16::from); + let underlay_ip_admin = + underlay_group.multicast_ip.ip().into_admin_scoped()?; + let underlay_ipv6 = match underlay_group.multicast_ip.ip() { + IpAddr::V6(ipv6) => ipv6, + IpAddr::V4(_) => { + return Err(Error::internal_error( + "underlay multicast groups must use IPv6 addresses", + )); + } + }; + + let nat_target = NatTarget { + internal_ip: underlay_ipv6, + inner_mac: MacAddr { a: underlay_ipv6.derive_multicast_mac() }, + vni: Vni::from(u32::from(external_group.vni.0)), + }; + + let sources_dpd = external_group + .source_ips + .iter() + .map(|ip| IpSrc::Exact(ip.ip())) + .collect::>(); + + let external_group_ip = external_group.multicast_ip.ip(); + + let create_operations = + dpd_clients.into_iter().map(|(switch_location, client)| { + let tag = tag.clone(); + let nat_target = nat_target.clone(); + let sources = sources_dpd.clone(); + let underlay_ip_admin = underlay_ip_admin.clone(); + async move { + // Ensure underlay is present idempotently + let underlay_response = self + .dpd_ensure_underlay_created( + client, + underlay_ip_admin, + &tag, + switch_location, + ) + .await?; + + let external_entry = MulticastGroupCreateExternalEntry { + group_ip: external_group_ip, + external_forwarding: ExternalForwarding { vlan_id }, + internal_forwarding: InternalForwarding { + nat_target: Some(nat_target), + }, + tag: Some(tag.clone()), + sources: Some(sources), + }; + + let external_response = self + .dpd_ensure_external_created( + client, + &external_entry, + switch_location, + ) + .await?; + + Ok::<_, Error>(( + switch_location, + underlay_response, + external_response, + )) + } + }); + + // Execute all switch operations in parallel + let results = try_join_all(create_operations).await.map_err(|e| { + error!( + self.log, + "DPD multicast forwarding configuration failed - dataplane inconsistency"; + "external_group_id" => %external_group.id(), + "external_multicast_ip" => %external_group.multicast_ip.ip(), + "underlay_multicast_ip" => %underlay_group.multicast_ip.ip(), + "multicast_scope" => if external_group.multicast_ip.ip().is_ipv4() { "IPv4_External" } else { "IPv6_External" }, + "switch_count" => self.switch_count(), + "dpd_error" => %e, + "recovery" => "saga_will_rollback_partial_configuration", + "dpd_operation" => "create_groups" + ); + // Rollback handled by saga layer + e + })?; + + // Collect results + let programmed_switches: Vec = + results.iter().map(|(loc, _, _)| **loc).collect(); + let (_, underlay_last, external_last) = + results.into_iter().last().ok_or_else(|| { + Error::internal_error("no switches were configured") + })?; + + debug!( + self.log, + "DPD multicast forwarding configuration completed - all switches configured"; + "external_group_id" => %external_group.id(), + "external_multicast_ip" => %external_group.multicast_ip, + "underlay_group_id" => %underlay_group.id, + "underlay_multicast_ip" => ?underlay_last.group_ip, + "switch_count" => programmed_switches.len(), + "dpd_operations_completed" => "[create_external_group, create_underlay_group, configure_nat_mapping]", + "external_forwarding_vlan" => ?external_last.external_forwarding.vlan_id, + "dpd_operation" => "create_groups" + ); + + Ok((underlay_last, external_last)) + } + + /// Update a multicast group's tag (name) and/or sources in the dataplane. + pub(crate) async fn update_groups( + &self, + params: GroupUpdateParams<'_>, + ) -> MulticastDataplaneResult<( + MulticastGroupUnderlayResponse, + MulticastGroupExternalResponse, + )> { + debug!( + self.log, + "updating multicast groups in dataplane"; + "external_group_id" => %params.external_group.id(), + "underlay_group_id" => %params.underlay_group.id, + "params" => ?params, + "dpd_operation" => "update_groups" + ); + + let dpd_clients = &self.dpd_clients; + + // Pre-compute shared data once + // Convert MVLAN to u16 for DPD, validating through VlanID + let vlan_id = params + .external_group + .mvlan + .map(|v| VlanID::new(v as u16)) + .transpose() + .map_err(|e| { + Error::internal_error(&format!("invalid VLAN ID: {e:#}")) + })? + .map(u16::from); + let underlay_ip_admin = + params.underlay_group.multicast_ip.ip().into_admin_scoped()?; + let underlay_ipv6 = match params.underlay_group.multicast_ip.ip() { + IpAddr::V6(ipv6) => ipv6, + IpAddr::V4(_) => { + return Err(Error::internal_error( + "underlay multicast groups must use IPv6 addresses", + )); + } + }; + + let nat_target = NatTarget { + internal_ip: underlay_ipv6, + inner_mac: MacAddr { a: underlay_ipv6.derive_multicast_mac() }, + vni: Vni::from(u32::from(params.external_group.vni.0)), + }; + + let new_name_str = params.new_name.to_string(); + let external_group_ip = params.external_group.multicast_ip.ip(); + + let sources_dpd = params + .new_sources + .iter() + .map(|ip| IpSrc::Exact(ip.ip())) + .collect::>(); + + let update_operations = + dpd_clients.into_iter().map(|(switch_location, client)| { + let new_name = new_name_str.clone(); + let nat_target = nat_target.clone(); + let sources = sources_dpd.clone(); + let underlay_ip_admin = underlay_ip_admin.clone(); + async move { + // Ensure/get underlay members, create if missing + let members = match client + .multicast_group_get_underlay(&underlay_ip_admin) + .await + { + Ok(r) => r.into_inner().members, + Err(DpdError::ErrorResponse(resp)) + if resp.status() + == reqwest::StatusCode::NOT_FOUND => + { + // Create missing underlay group with new tag and empty members + let created = self + .dpd_ensure_underlay_created( + client, + underlay_ip_admin.clone(), + &new_name, + switch_location, + ) + .await?; + created.members + } + Err(e) => { + error!( + self.log, + "failed to fetch underlay for update"; + "underlay_ip" => %underlay_ip_admin, + "switch" => %switch_location, + "error" => %e + ); + return Err(Error::internal_error( + "failed to fetch underlay for update", + )); + } + }; + + // Update underlay tag preserving members + let underlay_entry = MulticastGroupUpdateUnderlayEntry { + members, + tag: Some(new_name.clone()), + }; + let underlay_response = client + .multicast_group_update_underlay( + &underlay_ip_admin, + &underlay_entry, + ) + .await + .map_err(|e| { + error!( + self.log, + "failed to update underlay"; + "underlay_ip" => %underlay_ip_admin, + "switch" => %switch_location, + "error" => %e + ); + Error::internal_error("failed to update underlay") + })?; + + // Prepare external update/create entries with pre-computed data + let external_forwarding = ExternalForwarding { vlan_id }; + let internal_forwarding = + InternalForwarding { nat_target: Some(nat_target) }; + + let update_entry = MulticastGroupUpdateExternalEntry { + external_forwarding: external_forwarding.clone(), + internal_forwarding: internal_forwarding.clone(), + tag: Some(new_name.clone()), + sources: Some(sources.clone()), + }; + let create_entry = MulticastGroupCreateExternalEntry { + group_ip: external_group_ip, + external_forwarding, + internal_forwarding, + tag: Some(new_name.clone()), + sources: Some(sources), + }; + + let external_response = self + .dpd_update_external_or_create( + client, + external_group_ip, + &update_entry, + &create_entry, + switch_location, + ) + .await?; + + Ok::<_, Error>(( + switch_location, + underlay_response.into_inner(), + external_response, + )) + } + }); + + // Execute all switch operations in parallel + let results = try_join_all(update_operations).await.map_err(|e| { + error!( + self.log, + "DPD multicast group update failed - dataplane inconsistency"; + "external_group_id" => %params.external_group.id(), + "external_multicast_ip" => %params.external_group.multicast_ip.ip(), + "underlay_multicast_ip" => %params.underlay_group.multicast_ip.ip(), + "update_operation" => "modify_tag_and_sources", + "switch_count" => self.switch_count(), + "dpd_error" => %e + ); + e + })?; + + // Get the last response (all switches should return equivalent responses) + let results_len = results.len(); + let (_, underlay_last, external_last) = results + .into_iter() + .last() + .ok_or_else(|| Error::internal_error("no switches were updated"))?; + + debug!( + self.log, + "successfully updated multicast groups on all switches"; + "external_group_id" => %params.external_group.id(), + "switches_updated" => results_len, + "new_name" => params.new_name, + "dpd_operation" => "update_groups" + ); + + Ok((underlay_last, external_last)) + } + + /// Modify multicast group members across all switches in parallel. + async fn modify_group_membership( + &self, + underlay_group: &UnderlayMulticastGroup, + member: MulticastGroupMember, + operation_name: &str, + modify_fn: F, + ) -> MulticastDataplaneResult<()> + where + F: Fn( + Vec, + MulticastGroupMember, + ) -> Vec + + Clone + + Send + + 'static, + { + let dpd_clients = &self.dpd_clients; + let operation_name = operation_name.to_string(); + + let modify_ops = dpd_clients.iter().map(|(location, client)| { + let underlay_ip = underlay_group.multicast_ip.ip(); + let member = member.clone(); + let log = self.log.clone(); + let modify_fn = modify_fn.clone(); + let operation_name = operation_name.clone(); + + async move { + // Get current underlay group state + let current_group = client + .multicast_group_get_underlay(&underlay_ip.into_admin_scoped()?) + .await + .map_err(|e| { + error!( + log, + "underlay get failed"; + "underlay_ip" => %underlay_ip, + "switch" => %location, + "error" => %e, + "dpd_operation" => "modify_group_membership_get" + ); + Error::internal_error("underlay get failed") + })?; + + // Apply the modification function + let current_group_inner = current_group.into_inner(); + let updated_members = modify_fn(current_group_inner.members, member.clone()); + + let update_entry = MulticastGroupUpdateUnderlayEntry { + members: updated_members, + tag: current_group_inner.tag, + }; + + client + .multicast_group_update_underlay(&underlay_ip.into_admin_scoped()?, &update_entry) + .await + .map_err(|e| { + error!( + log, + "underlay member modify failed"; + "operation_name" => operation_name.as_str(), + "underlay_ip" => %underlay_ip, + "switch" => %location, + "error" => %e, + "dpd_operation" => "modify_group_membership_update" + ); + Error::internal_error("underlay member modify failed") + })?; + + info!( + log, + "DPD multicast member operation completed on switch"; + "operation_name" => operation_name.as_str(), + "underlay_group_ip" => %underlay_ip, + "member_port_id" => %member.port_id, + "member_link_id" => %member.link_id, + "member_direction" => ?member.direction, + "switch_location" => %location, + "dpd_operation" => %format!("{}_member_in_underlay_group", operation_name.as_str()) + ); + + Ok::<(), Error>(()) + } + }); + + try_join_all(modify_ops).await?; + Ok(()) + } + + /// Add a member to a multicast group in the dataplane. + pub(crate) async fn add_member( + &self, + underlay_group: &UnderlayMulticastGroup, + member: MulticastGroupMember, + ) -> MulticastDataplaneResult<()> { + info!( + self.log, + "DPD multicast member addition initiated across rack switches"; + "underlay_group_id" => %underlay_group.id, + "underlay_multicast_ip" => %underlay_group.multicast_ip, + "member_port_id" => %member.port_id, + "member_link_id" => %member.link_id, + "member_direction" => ?member.direction, + "switch_count" => self.switch_count(), + "dpd_operation" => "update_underlay_group_members" + ); + + self.modify_group_membership( + underlay_group, + member, + "add", + |mut existing_members, new_member| { + // Add to existing members (avoiding duplicates) + if !existing_members.iter().any(|m| { + m.port_id == new_member.port_id + && m.link_id == new_member.link_id + && m.direction == new_member.direction + }) { + existing_members.push(new_member); + } + existing_members + }, + ) + .await + } + + /// Remove a member from a multicast group in the dataplane. + pub(crate) async fn remove_member( + &self, + underlay_group: &UnderlayMulticastGroup, + member: MulticastGroupMember, + ) -> MulticastDataplaneResult<()> { + info!( + self.log, + "DPD multicast member removal initiated across rack switches"; + "underlay_group_id" => %underlay_group.id, + "underlay_multicast_ip" => %underlay_group.multicast_ip, + "member_port_id" => %member.port_id, + "member_link_id" => %member.link_id, + "member_direction" => ?member.direction, + "switch_count" => self.switch_count(), + "dpd_operation" => "update_underlay_group_members" + ); + + self.modify_group_membership( + underlay_group, + member, + "remove", + |existing_members, target_member| { + // Filter out the target member + existing_members + .into_iter() + .filter(|m| { + !(m.port_id == target_member.port_id + && m.link_id == target_member.link_id + && m.direction == target_member.direction) + }) + .collect() + }, + ) + .await + } + + /// Detect and log cross-switch drift for multicast groups. + /// + /// We logs errors if: + /// - Group is present on some switches but missing on others (presence drift) + /// - Group has different configurations across switches (config drift) + fn log_drift_issues<'a>( + &self, + group_ip: IpAddr, + first_location: &SwitchLocation, + first_config: &MulticastGroupResponse, + found_results: &[&'a ( + &'a SwitchLocation, + Option, + )], + not_found_count: usize, + ) { + let total_switches = found_results.len() + not_found_count; + + // Check for cross-switch presence drift (group missing on some switches) + if not_found_count > 0 { + error!( + self.log, + "cross-switch drift detected: group missing on some switches"; + "group_ip" => %group_ip, + "switches_with_group" => found_results.len(), + "switches_without_group" => not_found_count, + "total_switches" => total_switches, + "dpd_operation" => "fetch_external_group_for_drift_check" + ); + } + + // Check for config mismatches between switches (functional style) + found_results + .iter() + .filter_map(|(loc, resp)| resp.as_ref().map(|r| (loc, r))) + .filter(|(_, cfg)| *cfg != first_config) + .for_each(|(location, _)| { + error!( + self.log, + "cross-switch drift detected: different configs on switches"; + "group_ip" => %group_ip, + "first_switch" => %first_location, + "mismatched_switch" => %location, + "dpd_operation" => "fetch_external_group_for_drift_check" + ); + }); + } + + /// Fetch external multicast group DPD state for RPW drift detection. + /// + /// Queries all switches to detect configuration drift. If any switch has + /// different state (missing group, different config), it will return the + /// found state, so the reconciler can trigger an UPDATE + /// saga that will fix all switches atomically. + pub(crate) async fn fetch_external_group_for_drift_check( + &self, + group_ip: IpAddr, + ) -> MulticastDataplaneResult> { + debug!( + self.log, + "fetching external group state from all switches for drift detection"; + "group_ip" => %group_ip, + "switch_count" => self.switch_count(), + "dpd_operation" => "fetch_external_group_for_drift_check" + ); + + let fetch_ops = self.dpd_clients.iter().map(|(location, client)| { + let log = self.log.clone(); + async move { + match client.multicast_group_get(&group_ip).await { + Ok(response) => { + Ok((location, Some(response.into_inner()))) + } + Err(DpdError::ErrorResponse(resp)) + if resp.status() == reqwest::StatusCode::NOT_FOUND => + { + debug!( + log, + "external group not found on switch"; + "group_ip" => %group_ip, + "switch" => %location, + "dpd_operation" => "fetch_external_group_for_drift_check" + ); + Ok((location, None)) + } + Err(e) => { + error!( + log, + "external group fetch failed"; + "group_ip" => %group_ip, + "switch" => %location, + "error" => %e, + "dpd_operation" => "fetch_external_group_for_drift_check" + ); + Err(Error::internal_error(&format!( + "failed to fetch external group from DPD: {e}" + ))) + } + } + } + }); + + let results = try_join_all(fetch_ops).await?; + + // Partition results into found/not-found for drift analysis + let (found, not_found): (Vec<_>, Vec<_>) = + results.iter().partition(|(_, resp)| resp.is_some()); + + if found.is_empty() { + // Group doesn't exist on any switch + debug!( + self.log, + "external group not found on any switch (expected for new groups)"; + "group_ip" => %group_ip, + "switches_queried" => results.len(), + "dpd_operation" => "fetch_external_group_for_drift_check" + ); + return Ok(None); + } + + // Get first found config for comparison and return value + let (first_location, first_config) = found + .first() + .and_then(|(loc, resp)| resp.as_ref().map(|r| (*loc, r))) + .expect( + "found_results non-empty check guarantees at least one element", + ); + + // Detect and log any cross-switch drift + self.log_drift_issues( + group_ip, + first_location, + first_config, + &found, + not_found.len(), + ); + + debug!( + self.log, + "external group state fetched from all switches"; + "group_ip" => %group_ip, + "switches_queried" => results.len(), + "switches_with_group" => found.len(), + "dpd_operation" => "fetch_external_group_for_drift_check" + ); + + // Return first found config (reconciler will compare with DB and launch UPDATE if needed) + Ok(Some(first_config.clone().into_external_response()?)) + } + + /// Fetch the hardware backplane map from DPD for topology validation. + /// + /// Queries a single switch to get the backplane topology map, which should + /// be identical across all switches. Used by the reconciler to validate that + /// inventory `sp_slot` values are within the valid range for + /// the current hardware. + pub(crate) async fn fetch_backplane_map( + &self, + ) -> MulticastDataplaneResult< + std::collections::BTreeMap< + dpd_client::types::PortId, + dpd_client::types::BackplaneLink, + >, + > { + let (switch_location, client) = self.select_one_switch()?; + + debug!( + self.log, + "fetching backplane map from DPD for topology validation"; + "switch" => %switch_location, + "query_scope" => "single_switch", + "dpd_operation" => "fetch_backplane_map" + ); + + match client.backplane_map().await { + Ok(response) => { + let backplane_map_raw = response.into_inner(); + + // Convert HashMap to BTreeMap + // DPD returns string keys like "rear0", "rear1" - parse them to PortId + let backplane_map: std::collections::BTreeMap<_, _> = backplane_map_raw + .into_iter() + .filter_map(|(port_str, link)| { + match dpd_client::types::PortId::try_from(port_str.as_str()) { + Ok(port_id) => Some((port_id, link)), + Err(e) => { + error!( + self.log, + "failed to parse port ID from backplane map"; + "port_str" => %port_str, + "error" => %e, + "dpd_operation" => "fetch_backplane_map" + ); + None + } + } + }) + .collect(); + + debug!( + self.log, + "backplane map fetched from DPD"; + "switch" => %switch_location, + "port_count" => backplane_map.len(), + "dpd_operation" => "fetch_backplane_map" + ); + Ok(backplane_map) + } + Err(e) => { + error!( + self.log, + "backplane map fetch failed"; + "switch" => %switch_location, + "error" => %e, + "dpd_operation" => "fetch_backplane_map" + ); + Err(Error::internal_error(&format!( + "failed to fetch backplane map from DPD: {e}" + ))) + } + } + } + + /// Fetch current underlay group members from a single switch. + /// + /// Used by the reconciler to detect stale ports that need to be removed + /// when a member's physical location changes. Queries a single switch + /// since all switches should have identical underlay state. + /// + /// For determinism in drift checks, we select the first switch in sorted + /// order by switch location. + pub(crate) async fn fetch_underlay_members( + &self, + underlay_ip: IpAddr, + ) -> MulticastDataplaneResult>> { + let (switch_location, client) = self.select_one_switch()?; + + debug!( + self.log, + "fetching underlay group members from DPD for drift detection"; + "underlay_ip" => %underlay_ip, + "switch" => %switch_location, + "dpd_operation" => "fetch_underlay_members" + ); + + match client + .multicast_group_get_underlay(&underlay_ip.into_admin_scoped()?) + .await + { + Ok(response) => { + let members = response.into_inner().members; + debug!( + self.log, + "underlay group members fetched from DPD"; + "underlay_ip" => %underlay_ip, + "switch" => %switch_location, + "member_count" => members.len(), + "dpd_operation" => "fetch_underlay_members" + ); + Ok(Some(members)) + } + Err(DpdError::ErrorResponse(resp)) + if resp.status() == reqwest::StatusCode::NOT_FOUND => + { + debug!( + self.log, + "underlay group not found on switch"; + "underlay_ip" => %underlay_ip, + "switch" => %switch_location, + "dpd_operation" => "fetch_underlay_members" + ); + Ok(None) + } + Err(e) => { + error!( + self.log, + "underlay group fetch failed"; + "underlay_ip" => %underlay_ip, + "switch" => %switch_location, + "error" => %e, + "dpd_operation" => "fetch_underlay_members" + ); + Err(Error::internal_error(&format!( + "failed to fetch underlay group from DPD: {e}" + ))) + } + } + } + + pub(crate) async fn remove_groups( + &self, + tag: &str, + ) -> MulticastDataplaneResult<()> { + debug!( + self.log, + "cleaning up multicast groups by tag"; + "tag" => tag + ); + + let dpd_clients = &self.dpd_clients; + + // Execute cleanup operations on all switches in parallel + let cleanup_ops = dpd_clients.iter().map(|(location, client)| { + let tag = tag.to_string(); + let log = self.log.clone(); + async move { + match client.multicast_reset_by_tag(&tag).await { + Ok(_) => { + debug!( + log, + "cleaned up multicast groups"; + "switch" => %location, + "tag" => %tag + ); + Ok::<(), Error>(()) + } + Err(DpdError::ErrorResponse(resp)) + if resp.status() == reqwest::StatusCode::NOT_FOUND => + { + // Tag not found on this switch - this is fine, means nothing to clean up + debug!( + log, + "no multicast groups found with tag on switch (expected)"; + "switch" => %location, + "tag" => %tag + ); + Ok::<(), Error>(()) + } + Err(e) => { + error!( + log, + "failed to clean up multicast groups by tag"; + "switch" => %location, + "tag" => %tag, + "error" => %e, + "dpd_operation" => "remove_groups" + ); + Err(Error::internal_error( + "failed to clean up multicast groups by tag", + )) + } + } + } + }); + + // Wait for all cleanup operations to complete + try_join_all(cleanup_ops).await?; + + info!( + self.log, + "successfully cleaned up multicast groups by tag"; + "tag" => tag + ); + Ok(()) + } +} diff --git a/nexus/src/app/multicast/mod.rs b/nexus/src/app/multicast/mod.rs new file mode 100644 index 00000000000..6cf27dc88db --- /dev/null +++ b/nexus/src/app/multicast/mod.rs @@ -0,0 +1,508 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Multicast group management for network traffic distribution. +//! +//! Group creation, member management, and IP pool integration following +//! the bifurcated design from [RFD 488](https://rfd.shared.oxide.computer/rfd/488). +//! +//! ## Fleet-Scoped Authorization Model +//! +//! Multicast groups are **fleet-scoped resources** (authz parent = "Fleet"), +//! similar to IP pools. This enables: +//! +//! - **Cross-project multicast**: Instances from different projects can join +//! the same group without IP waste +//! - **Cross-silo multicast**: Instances from different silos can join the +//! same group (when pools are linked to multiple silos) +//! +//! ### Authorization Rules +//! +//! - **Creating/modifying/deleting groups**: Any authenticated user in the fleet (silo users) +//! can create, modify, and delete multicast groups +//! - **Reading/listing groups**: Any authenticated user in the fleet can read and list groups +//! (enables discovery of available groups for joining instances) +//! - **Listing group members**: Only requires Read permission on the group (fleet-scoped), +//! not permissions on individual member instances +//! - **Adding/removing members**: Requires Read on group + Modify on the specific instance +//! (project collaborators can attach only their own instances to any fleet-scoped group) +//! +//! ### VNI Assignment +//! +//! All fleet-scoped multicast groups use `DEFAULT_MULTICAST_VNI` (77), which is +//! reserved for fleet-wide multicast traffic and below the `MIN_GUEST_VNI` (1024) +//! threshold. This ensures consistent behavior across all multicast groups. + +use std::net::IpAddr; +use std::sync::Arc; + +use ref_cast::RefCast; + +use nexus_config::DEFAULT_UNDERLAY_MULTICAST_NET; +use nexus_db_lookup::{LookupPath, lookup}; +use nexus_db_model::Name; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::{authz, db}; +use nexus_types::external_api::{params, views}; +use omicron_common::address::{IPV4_SSM_SUBNET, IPV6_SSM_SUBNET}; +use omicron_common::api::external::{ + self, CreateResult, DataPageParams, DeleteResult, Error, ListResultVec, + LookupResult, NameOrId, UpdateResult, http_pagination::PaginatedBy, +}; +use omicron_uuid_kinds::{GenericUuid, InstanceUuid, MulticastGroupUuid}; + +pub(crate) mod dataplane; + +impl super::Nexus { + /// Look up a fleet-scoped multicast group by name or ID. + pub(crate) fn multicast_group_lookup<'a>( + &'a self, + opctx: &'a OpContext, + multicast_group_selector: &'a params::MulticastGroupSelector, + ) -> LookupResult> { + // Multicast groups are fleet-scoped (like IP pools) + match &multicast_group_selector.multicast_group { + NameOrId::Id(id) => { + let multicast_group = + LookupPath::new(opctx, &self.db_datastore) + .multicast_group_id(*id); + Ok(multicast_group) + } + NameOrId::Name(name) => { + let multicast_group = + LookupPath::new(opctx, &self.db_datastore) + .multicast_group_name(Name::ref_cast(name)); + Ok(multicast_group) + } + } + } + + /// Create a multicast group. + pub(crate) async fn multicast_group_create( + &self, + opctx: &OpContext, + params: ¶ms::MulticastGroupCreate, + ) -> CreateResult { + // Authorization FIRST: check before validating parameters + // This ensures 403 Forbidden is returned before 400 Bad Request + opctx + .authorize(authz::Action::CreateChild, &authz::MULTICAST_GROUP_LIST) + .await?; + + // If an explicit multicast IP is provided, validate ASM/SSM semantics + // and ensure it does not collide with the fixed underlay prefix. + // - ASM IPs must not specify sources + // - SSM IPs must specify at least one source + if let Some(mcast_ip) = params.multicast_ip { + let empty: Vec = Vec::new(); + let sources: &[IpAddr] = + params.source_ips.as_deref().unwrap_or(&empty); + validate_ssm_configuration(mcast_ip, sources)?; + + // Block external IPv6 multicast addresses that fall within the + // fixed underlay admin-local prefix (reserved for underlay). + if let IpAddr::V6(ipv6) = mcast_ip { + // Convert fixed underlay prefix to ipnet and compare + let fixed_underlay: ipnet::Ipv6Net = + DEFAULT_UNDERLAY_MULTICAST_NET + .to_string() + .parse() + .expect("valid fixed underlay admin prefix"); + if fixed_underlay.contains(&ipv6) { + return Err(Error::invalid_request(&format!( + "IPv6 address {ipv6} is within the reserved underlay multicast prefix {}", + fixed_underlay + ))); + } + } + } + + let authz_pool = match ¶ms.pool { + Some(pool_selector) => { + let authz_pool = self + .ip_pool_lookup(opctx, &pool_selector)? + .lookup_for(authz::Action::CreateChild) + .await? + .0; + + // Validate that the pool is of type Multicast + Some( + self.db_datastore + .resolve_pool_for_allocation( + opctx, + Some(authz_pool), + nexus_db_model::IpPoolType::Multicast, + ) + .await?, + ) + } + None => None, + }; + + // Create multicast group (fleet-scoped, uses DEFAULT_MULTICAST_VNI) + let group = self + .db_datastore + .multicast_group_create(opctx, params, authz_pool) + .await?; + + // Activate reconciler to process the new group ("Creating" → "Active") + self.background_tasks.task_multicast_reconciler.activate(); + Ok(group) + } + + /// Fetch a multicast group. + pub(crate) async fn multicast_group_fetch( + &self, + opctx: &OpContext, + group_lookup: &lookup::MulticastGroup<'_>, + ) -> LookupResult { + let (.., group_id) = + group_lookup.lookup_for(authz::Action::Read).await?; + self.db_datastore + .multicast_group_fetch( + opctx, + MulticastGroupUuid::from_untyped_uuid(group_id.id()), + ) + .await + } + + /// Look up multicast group by IP address. + pub(crate) async fn multicast_group_lookup_by_ip( + &self, + opctx: &OpContext, + ip_addr: std::net::IpAddr, + ) -> LookupResult { + self.db_datastore.multicast_group_lookup_by_ip(opctx, ip_addr).await + } + + /// List all multicast groups. + pub(crate) async fn multicast_groups_list( + &self, + opctx: &OpContext, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { + opctx + .authorize( + authz::Action::ListChildren, + &authz::MULTICAST_GROUP_LIST, + ) + .await?; + self.db_datastore.multicast_groups_list(opctx, pagparams).await + } + + /// Update a multicast group. + pub(crate) async fn multicast_group_update( + &self, + opctx: &OpContext, + group_lookup: &lookup::MulticastGroup<'_>, + params: ¶ms::MulticastGroupUpdate, + ) -> UpdateResult { + let (.., group_id) = + group_lookup.lookup_for(authz::Action::Modify).await?; + + // Get the current group to check state and get underlay group ID + let current_group = self + .db_datastore + .multicast_group_fetch( + opctx, + MulticastGroupUuid::from_untyped_uuid(group_id.id()), + ) + .await?; + + // Ensure group is in "Active" state (should have `underlay_group_id`) + if current_group.state != db::model::MulticastGroupState::Active { + return Err(Error::invalid_request(&format!( + "cannot update multicast group in state: {state}. group must be in \"Active\" state.", + state = current_group.state + ))); + } + + // Ensure the group has an associated underlay group (required for updates) + current_group.underlay_group_id.ok_or_else(|| { + Error::internal_error( + "active multicast group missing `underlay_group_id`", + ) + })?; + + // Validate the new source configuration if provided + if let Some(ref new_source_ips) = params.source_ips { + validate_ssm_configuration( + current_group.multicast_ip.ip(), + new_source_ips, + )?; + } + + // Update the database + let result = self + .db_datastore + .multicast_group_update( + opctx, + MulticastGroupUuid::from_untyped_uuid(group_id.id()), + params, + ) + .await?; + + // Activate RPW to apply changes to DPD (eventually consistent) + // The reconciler will detect drift and launch the UPDATE saga + self.background_tasks.task_multicast_reconciler.activate(); + + Ok(result) + } + + /// Tag a multicast group for deletion. + pub(crate) async fn multicast_group_delete( + &self, + opctx: &OpContext, + group_lookup: &lookup::MulticastGroup<'_>, + ) -> DeleteResult { + let (.., group_id) = + group_lookup.lookup_for(authz::Action::Delete).await?; + + // Mark for deletion via RPW: sets state="Deleting" (not soft-delete). + // RPW cleanup ensures DPD configuration is removed before final deletion. + self.db_datastore + .mark_multicast_group_for_removal( + opctx, + MulticastGroupUuid::from_untyped_uuid(group_id.id()), + ) + .await?; + + // Activate reconciler to process the "Deleting" state + self.background_tasks.task_multicast_reconciler.activate(); + + Ok(()) + } + + /// Add an instance to a multicast group. + pub(crate) async fn multicast_group_member_attach( + self: &Arc, + opctx: &OpContext, + group_lookup: &lookup::MulticastGroup<'_>, + instance_lookup: &lookup::Instance<'_>, + ) -> CreateResult { + // Multicast groups are fleet-scoped - users only need Read permission on the group + // and Modify permission on the instance to attach it + let (.., authz_group) = + group_lookup.lookup_for(authz::Action::Read).await?; + let (.., authz_instance) = + instance_lookup.lookup_for(authz::Action::Modify).await?; + + let member = self + .db_datastore + .multicast_group_member_add( + opctx, + MulticastGroupUuid::from_untyped_uuid(authz_group.id()), + InstanceUuid::from_untyped_uuid(authz_instance.id()), + ) + .await?; + + // Activate reconciler to process the new member ("Joining" → "Joined") + self.background_tasks.task_multicast_reconciler.activate(); + Ok(member) + } + + /// Remove an instance from a multicast group. + pub(crate) async fn multicast_group_member_detach( + self: &Arc, + opctx: &OpContext, + group_lookup: &lookup::MulticastGroup<'_>, + instance_lookup: &lookup::Instance<'_>, + ) -> DeleteResult { + // Multicast groups are fleet-scoped - users only need Read permission on the group + // and Modify permission on the instance to detach it + let (.., authz_group) = + group_lookup.lookup_for(authz::Action::Read).await?; + let (.., authz_instance) = + instance_lookup.lookup_for(authz::Action::Modify).await?; + + // First, get the member ID by group and instance + // For idempotency, if the member doesn't exist, we consider the removal successful + let member = match self + .db_datastore + .multicast_group_member_get_by_group_and_instance( + opctx, + MulticastGroupUuid::from_untyped_uuid(authz_group.id()), + InstanceUuid::from_untyped_uuid(authz_instance.id()), + ) + .await? + { + Some(member) => member, + None => { + // Member doesn't exist - removal is idempotent, return success + return Ok(()); + } + }; + + self.db_datastore + .multicast_group_member_delete_by_id(opctx, member.id) + .await?; + + // Activate reconciler to process the member removal + self.background_tasks.task_multicast_reconciler.activate(); + Ok(()) + } + + /// List members of a multicast group. + /// + /// ## Authorization + /// + /// This operation only requires "Read" permission on the multicast group + /// itself (fleet-scoped). It does NOT check permissions on the individual + /// instances that are members of the group. + /// + /// This asymmetry is intentional: + /// - **Listing members**: Allows discovery of which instances are in a group + /// (useful for understanding multicast group membership across projects) + /// - **Adding/removing members**: Requires Modify permission on the specific + /// instance (project-scoped), enforcing that users can only manage instances + /// they own + /// + /// Note: When unauthorized users attempt to add/remove instances they don't + /// have access to, the instance lookup fails with 404 (not 403) to prevent + /// information leakage about instances in inaccessible projects. + pub(crate) async fn multicast_group_members_list( + &self, + opctx: &OpContext, + group_lookup: &lookup::MulticastGroup<'_>, + pagparams: &DataPageParams<'_, uuid::Uuid>, + ) -> ListResultVec { + let (.., group_id) = + group_lookup.lookup_for(authz::Action::Read).await?; + self.db_datastore + .multicast_group_members_list( + opctx, + MulticastGroupUuid::from_untyped_uuid(group_id.id()), + pagparams, + ) + .await + } + + /// List all multicast group memberships for an instance. + /// + /// Active-only: returns memberships that have not been soft-deleted + /// (i.e., `time_deleted IS NULL`). For diagnostics that require + /// historical memberships, query the datastore with + /// `include_removed = true`. + pub(crate) async fn instance_list_multicast_groups( + &self, + opctx: &OpContext, + instance_lookup: &lookup::Instance<'_>, + ) -> ListResultVec { + let (.., authz_instance) = + instance_lookup.lookup_for(authz::Action::Read).await?; + let members = self + .db_datastore + .multicast_group_members_list_by_instance( + opctx, + InstanceUuid::from_untyped_uuid(authz_instance.id()), + false, + ) + .await?; + members + .into_iter() + .map(views::MulticastGroupMember::try_from) + .collect::, _>>() + } +} + +/// Validate Source-Specific Multicast (SSM) configuration per RFC 4607: +/// +/// +/// This function validates that: +/// 1. For IPv4 SSM: multicast address is in 232/8 range +/// 2. For IPv6 SSM: multicast address is in FF30::/12 range (covers all FF3x::/32 SSM scopes) +fn validate_ssm_configuration( + multicast_ip: IpAddr, + source_ips: &[IpAddr], +) -> Result<(), omicron_common::api::external::Error> { + let is_ssm_address = match multicast_ip { + IpAddr::V4(addr) => IPV4_SSM_SUBNET.contains(addr), + IpAddr::V6(addr) => IPV6_SSM_SUBNET.contains(addr), + }; + + let has_sources = !source_ips.is_empty(); + + match (is_ssm_address, has_sources) { + (true, false) => Err(external::Error::invalid_request( + "SSM multicast addresses require at least one source IP", + )), + (false, true) => Err(external::Error::invalid_request( + "ASM multicast addresses cannot have sources. \ + Use SSM range (232.x.x.x for IPv4, FF3x:: for IPv6) for source-specific multicast", + )), + _ => Ok(()), // (true, true) and (false, false) are valid + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::net::{Ipv4Addr, Ipv6Addr}; + + #[test] + fn test_validate_ssm_configuration() { + // Valid ASM - ASM address with no sources + assert!( + validate_ssm_configuration( + IpAddr::V4(Ipv4Addr::new(224, 1, 1, 1)), + &[] + ) + .is_ok() + ); + + // Valid SSM - SSM address with sources + assert!( + validate_ssm_configuration( + IpAddr::V4(Ipv4Addr::new(232, 1, 1, 1)), + &[IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))] + ) + .is_ok() + ); + + // Valid SSM IPv6 - FF3x::/32 range with sources + assert!( + validate_ssm_configuration( + IpAddr::V6(Ipv6Addr::new(0xff31, 0, 0, 0, 0, 0, 0, 1)), + &[IpAddr::V6(Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 1))] + ) + .is_ok() + ); + + // Invalid - ASM address with sources + assert!( + validate_ssm_configuration( + IpAddr::V4(Ipv4Addr::new(224, 1, 1, 1)), + &[IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))] + ) + .is_err() + ); + + // Invalid - SSM address without sources + assert!( + validate_ssm_configuration( + IpAddr::V4(Ipv4Addr::new(232, 1, 1, 1)), + &[] + ) + .is_err() + ); + + // Invalid - IPv6 ASM address with sources + assert!( + validate_ssm_configuration( + IpAddr::V6(Ipv6Addr::new(0xff0e, 0, 0, 0, 0, 0, 0, 1)), + &[IpAddr::V6(Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 1))] + ) + .is_err() + ); + + // Invalid - IPv6 SSM address without sources + assert!( + validate_ssm_configuration( + IpAddr::V6(Ipv6Addr::new(0xff31, 0, 0, 0, 0, 0, 0, 1)), + &[] + ) + .is_err() + ); + } +} diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 1058f9797df..43afdd9430f 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -7,7 +7,7 @@ use crate::app::sagas::declare_saga_actions; use crate::app::sagas::disk_create::{self, SagaDiskCreate}; use crate::app::{ MAX_DISKS_PER_INSTANCE, MAX_EXTERNAL_IPS_PER_INSTANCE, - MAX_NICS_PER_INSTANCE, + MAX_MULTICAST_GROUPS_PER_INSTANCE, MAX_NICS_PER_INSTANCE, }; use crate::external_api::params; use nexus_db_lookup::LookupPath; @@ -18,6 +18,7 @@ use nexus_db_queries::db::queries::network_interface::InsertError as InsertNicEr use nexus_db_queries::{authn, authz, db}; use nexus_defaults::DEFAULT_PRIMARY_NIC_NAME; use nexus_types::external_api::params::InstanceDiskAttachment; +use nexus_types::identity::Resource; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; @@ -25,11 +26,12 @@ use omicron_common::api::external::{Error, InternalContext}; use omicron_common::api::internal::shared::SwitchLocation; use omicron_uuid_kinds::{ AffinityGroupUuid, AntiAffinityGroupUuid, GenericUuid, InstanceUuid, + MulticastGroupUuid, }; use ref_cast::RefCast; use serde::Deserialize; use serde::Serialize; -use slog::warn; +use slog::{info, warn}; use std::collections::HashSet; use std::convert::TryFrom; use std::fmt::Debug; @@ -126,6 +128,10 @@ declare_saga_actions! { + sic_set_boot_disk - sic_set_boot_disk_undo } + JOIN_MULTICAST_GROUP -> "joining multicast group" { + + sic_join_instance_multicast_group + - sic_join_instance_multicast_group_undo + } MOVE_TO_STOPPED -> "stopped_instance" { + sic_move_to_stopped } @@ -306,6 +312,32 @@ impl NexusSaga for SagaInstanceCreate { )?; } + // Add the instance to multicast groups, following the same pattern as external IPs + for i in 0..MAX_MULTICAST_GROUPS_PER_INSTANCE { + let repeat_params = NetParams { + saga_params: params.clone(), + which: i, + instance_id, + new_id: Uuid::new_v4(), + }; + let subsaga_name = + SagaName::new(&format!("instance-create-multicast-group{i}")); + + let mut subsaga_builder = DagBuilder::new(subsaga_name); + subsaga_builder.append(Node::action( + format!("multicast-group-{i}").as_str(), + format!("JoinMulticastGroup{i}").as_str(), + JOIN_MULTICAST_GROUP.as_ref(), + )); + subsaga_append( + "multicast_group".into(), + subsaga_builder.build()?, + &mut builder, + repeat_params, + i, + )?; + } + // Build an iterator of all InstanceDiskAttachment entries in the // request; these could either be a boot disk or data disks. As far as // create/attach is concerned, they're all disks and all need to be @@ -1011,6 +1043,136 @@ async fn sic_allocate_instance_external_ip_undo( Ok(()) } +/// Add the instance to a multicast group using the request parameters at +/// index `group_index`, returning Some(()) if a group is joined (or None if +/// no group is specified). +async fn sic_join_instance_multicast_group( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let repeat_saga_params = sagactx.saga_params::()?; + let saga_params = repeat_saga_params.saga_params; + let group_index = repeat_saga_params.which; + let Some(group_name_or_id) = + saga_params.create_params.multicast_groups.get(group_index) + else { + return Ok(None); + }; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + &saga_params.serialized_authn, + ); + let instance_id = repeat_saga_params.instance_id; + + // Check if multicast is enabled + if !osagactx.nexus().multicast_enabled() { + debug!(osagactx.log(), + "multicast not enabled, skipping multicast group member attachment"; + "instance_id" => %instance_id, + "group_name_or_id" => ?group_name_or_id); + return Ok(Some(())); + } + + // Look up the multicast group by name or ID using the existing nexus method + let multicast_group_selector = params::MulticastGroupSelector { + multicast_group: group_name_or_id.clone(), + }; + let multicast_group_lookup = osagactx + .nexus() + .multicast_group_lookup(&opctx, &multicast_group_selector) + .map_err(ActionError::action_failed)?; + + // Multicast groups are fleet-scoped - users only need Read permission on the group + // (and implicit permission on the instance being created) + let (.., db_group) = multicast_group_lookup + .fetch_for(authz::Action::Read) + .await + .map_err(ActionError::action_failed)?; + + // Add the instance as a member of the multicast group in "Joining" state + if let Err(e) = datastore + .multicast_group_member_attach_to_instance( + &opctx, + MulticastGroupUuid::from_untyped_uuid(db_group.id()), + instance_id, + ) + .await + { + match e { + Error::ObjectAlreadyExists { .. } => { + debug!( + opctx.log, + "multicast member alredy exists"; + "instance_id" => %instance_id, + ); + return Ok(Some(())); + } + e => return Err(ActionError::action_failed(e)), + } + } + + info!( + osagactx.log(), + "successfully joined instance to multicast group"; + "external_group_id" => %db_group.id(), + "external_group_ip" => %db_group.multicast_ip, + "instance_id" => %instance_id + ); + + Ok(Some(())) +} + +async fn sic_join_instance_multicast_group_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let repeat_saga_params = sagactx.saga_params::()?; + let saga_params = repeat_saga_params.saga_params; + let group_index = repeat_saga_params.which; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + &saga_params.serialized_authn, + ); + + // Check if we actually joined a group and get the group name/ID using chain + let Some(group_name_or_id) = + saga_params.create_params.multicast_groups.get(group_index) + else { + return Ok(()); + }; + + // Check if multicast is enabled - if not, no cleanup needed since we didn't attach + if !osagactx.nexus().multicast_enabled() { + debug!(osagactx.log(), + "multicast not enabled, skipping multicast group member undo"; + "group_name_or_id" => ?group_name_or_id); + return Ok(()); + } + + // Look up the multicast group by name or ID using the existing nexus method + let multicast_group_selector = params::MulticastGroupSelector { + multicast_group: group_name_or_id.clone(), + }; + let multicast_group_lookup = osagactx + .nexus() + .multicast_group_lookup(&opctx, &multicast_group_selector)?; + // Undo uses same permission as forward action (Read on multicast group) + let (.., db_group) = + multicast_group_lookup.fetch_for(authz::Action::Read).await?; + + // Delete the record outright. + datastore + .multicast_group_members_delete_by_group( + &opctx, + MulticastGroupUuid::from_untyped_uuid(db_group.id()), + ) + .await?; + + Ok(()) +} + async fn sic_attach_disk_to_instance( sagactx: NexusActionContext, ) -> Result<(), ActionError> { @@ -1361,6 +1523,7 @@ pub mod test { start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, boundary_switches: HashSet::from([SwitchLocation::Switch0]), } diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index a5f59bd65af..0edc640cdc5 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -11,8 +11,10 @@ use crate::app::sagas::declare_saga_actions; use nexus_db_lookup::LookupPath; use nexus_db_queries::{authn, authz, db}; use omicron_common::api::internal::shared::SwitchLocation; +use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use serde::Deserialize; use serde::Serialize; +use slog::{debug, info}; use steno::ActionError; // instance delete saga: input parameters @@ -39,7 +41,10 @@ declare_saga_actions! { DEALLOCATE_EXTERNAL_IP -> "no_result3" { + sid_deallocate_external_ip } - INSTANCE_DELETE_NAT -> "no_result4" { + LEAVE_MULTICAST_GROUPS -> "no_result4" { + + sid_leave_multicast_groups + } + INSTANCE_DELETE_NAT -> "no_result5" { + sid_delete_nat } } @@ -64,6 +69,7 @@ impl NexusSaga for SagaInstanceDelete { builder.append(instance_delete_record_action()); builder.append(delete_network_interfaces_action()); builder.append(deallocate_external_ip_action()); + builder.append(leave_multicast_groups_action()); Ok(builder.build()?) } } @@ -132,6 +138,45 @@ async fn sid_delete_nat( Ok(()) } +async fn sid_leave_multicast_groups( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let instance_id = params.authz_instance.id(); + + // Check if multicast is enabled - if not, no members exist to remove + if !osagactx.nexus().multicast_enabled() { + debug!(osagactx.log(), + "multicast not enabled, skipping multicast group member removal"; + "instance_id" => %instance_id); + return Ok(()); + } + + // Mark all multicast group memberships for this instance as deleted + datastore + .multicast_group_members_mark_for_removal( + &opctx, + InstanceUuid::from_untyped_uuid(instance_id), + ) + .await + .map_err(ActionError::action_failed)?; + + info!( + osagactx.log(), + "Marked multicast members for removal"; + "instance_id" => %instance_id + ); + + Ok(()) +} + async fn sid_deallocate_external_ip( sagactx: NexusActionContext, ) -> Result<(), ActionError> { @@ -240,6 +285,7 @@ mod test { start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), } } diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 30bd08fc4a4..955cfa29e5d 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -667,6 +667,7 @@ mod tests { start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, ) .await diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 881ee8c4e33..72a167bafc5 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -22,7 +22,7 @@ use nexus_db_queries::{authn, authz, db}; use omicron_common::api::external::Error; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, SledUuid}; use serde::{Deserialize, Serialize}; -use slog::info; +use slog::{error, info}; use steno::ActionError; /// Parameters to the instance start saga. @@ -97,6 +97,11 @@ declare_saga_actions! { - sis_ensure_registered_undo } + UPDATE_MULTICAST_SLED_ID -> "multicast_sled_id" { + + sis_update_multicast_sled_id + - sis_update_multicast_sled_id_undo + } + // Only account for the instance's resource consumption when the saga is on // the brink of actually starting it. This allows prior steps' undo actions // to change the instance's generation number if warranted (e.g. by moving @@ -111,6 +116,7 @@ declare_saga_actions! { ENSURE_RUNNING -> "ensure_running" { + sis_ensure_running } + } /// Node name for looking up the VMM record once it has been registered with the @@ -141,6 +147,7 @@ impl NexusSaga for SagaInstanceStart { builder.append(dpd_ensure_action()); builder.append(v2p_ensure_action()); builder.append(ensure_registered_action()); + builder.append(update_multicast_sled_id_action()); builder.append(add_virtual_resources_action()); builder.append(ensure_running_action()); Ok(builder.build()?) @@ -696,11 +703,13 @@ async fn sis_ensure_registered_undo( // writing back the state returned from sled agent). Otherwise, try to // reason about the next action from the specific kind of error that was // returned. - if let Err(e) = osagactx + let unregister_result = osagactx .nexus() .instance_ensure_unregistered(&propolis_id, &sled_id) - .await - { + .await; + + // Handle the unregister result + if let Err(e) = unregister_result { error!(osagactx.log(), "start saga: failed to unregister instance from sled"; "instance_id" => %instance_id, @@ -773,6 +782,73 @@ async fn sis_ensure_registered_undo( } } +async fn sis_update_multicast_sled_id( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + // Only update multicast members if multicast is enabled + // If disabled, no members exist to update + if !osagactx.nexus().multicast_enabled() { + return Ok(()); + } + + let instance_id = params.db_instance.id(); + let sled_id = sagactx.lookup::("sled_id")?; + + info!(osagactx.log(), "start saga: updating multicast member sled_id"; + "instance_id" => %instance_id, + "sled_id" => %sled_id, + "start_reason" => ?params.reason); + + osagactx + .datastore() + .multicast_group_member_update_sled_id( + &opctx, + InstanceUuid::from_untyped_uuid(instance_id), + Some(sled_id.into()), + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn sis_update_multicast_sled_id_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + // Only clear multicast members if multicast is enabled + // If disabled, no members exist to clear + if !osagactx.nexus().multicast_enabled() { + return Ok(()); + } + + let instance_id = InstanceUuid::from_untyped_uuid(params.db_instance.id()); + + info!(osagactx.log(), "start saga: clearing multicast member sled_id during undo"; + "instance_id" => %instance_id, + "start_reason" => ?params.reason); + + osagactx + .datastore() + .multicast_group_member_update_sled_id(&opctx, instance_id, None) + .await?; + + Ok(()) +} + async fn sis_ensure_running( sagactx: NexusActionContext, ) -> Result<(), ActionError> { @@ -892,6 +968,7 @@ mod test { start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, ) .await @@ -938,7 +1015,7 @@ mod test { #[tokio::test] async fn should_start_with_dead_switch() { - let mut cptestctx = nexus_test_utils::test_setup::( + let cptestctx = nexus_test_utils::test_setup::( "should_start_with_dead_switch", 3, ) @@ -1056,11 +1133,15 @@ mod test { .expect("unable to update switch1 settings"); // Shutdown one of the switch daemons - let switch0_dpd = cptestctx + let mut switch0_dpd = cptestctx .dendrite - .get_mut(&SwitchLocation::Switch0) + .write() + .unwrap() + .remove(&SwitchLocation::Switch0) .expect("there should be at least one dendrite running"); + let switch0_port = switch0_dpd.port; + switch0_dpd .cleanup() .await @@ -1068,8 +1149,6 @@ mod test { let log = &opctx.log; - let port = switch0_dpd.port; - let client_state = dpd_client::ClientState { tag: String::from("nexus"), log: log.new(o!( @@ -1080,7 +1159,7 @@ mod test { let addr = std::net::Ipv6Addr::LOCALHOST; let switch_0_dpd_client = dpd_client::Client::new( - &format!("http://[{addr}]:{port}"), + &format!("http://[{addr}]:{switch0_port}"), client_state, ); @@ -1108,11 +1187,13 @@ mod test { assert_eq!(vmm_state, nexus_db_model::VmmState::Running); - let port = cptestctx - .dendrite - .get(&SwitchLocation::Switch1) - .expect("two dendrites should be present in test context") - .port; + let port = { + let dendrite_guard = cptestctx.dendrite.read().unwrap(); + dendrite_guard + .get(&SwitchLocation::Switch1) + .expect("two dendrites should be present in test context") + .port + }; let client_state = dpd_client::ClientState { tag: String::from("nexus"), @@ -1130,22 +1211,37 @@ mod test { let log = opctx.log; - // Check to ensure that the nat entry for the address has made it onto switch1 dendrite - let nat_entries = dpd_client - .nat_ipv4_list(&std::net::Ipv4Addr::new(10, 0, 0, 0), None, None) - .await - .unwrap() - .items - .clone(); + // Check to ensure that the nat entry for the address has made it onto switch1 dendrite. + // Note: ipv4_nat_trigger_update() triggers dendrite's RPW asynchronously and returns + // immediately, but dendrite still needs time to process the update and create the NAT + // entries. Tests need to poll/wait for entries rather than checking immediately, or + // they'll be flaky. + let expected_nat_entries = 1; // Instance has 1 external IP + let nat_subnet = std::net::Ipv4Addr::new(10, 0, 0, 0); + let poll_interval = Duration::from_millis(100); + let poll_max = Duration::from_secs(60); // Allow time for RPW to process - assert_eq!(nat_entries.len(), 1); + poll::wait_for_condition( + async || { + let result = + dpd_client.nat_ipv4_list(&nat_subnet, None, None).await; - let port = cptestctx - .dendrite - .get(&SwitchLocation::Switch0) - .expect("two dendrites should be present in test context") - .port; + let data = + result.map_err(|_| poll::CondCheckError::<()>::NotYet)?; + if data.items.len() == expected_nat_entries { + Ok(()) + } else { + Err(poll::CondCheckError::<()>::NotYet) + } + }, + &poll_interval, + &poll_max, + ) + .await + .expect("NAT entry should appear on switch1"); + + // Reuse the port number from the removed Switch0 to start a new dendrite instance let nexus_address = cptestctx.internal_client.bind_address; let mgs = cptestctx.gateway.get(&SwitchLocation::Switch0).unwrap(); let mgs_address = @@ -1155,14 +1251,18 @@ mod test { // Start a new dendrite instance for switch0 let new_switch0 = omicron_test_utils::dev::dendrite::DendriteInstance::start( - port, + switch0_port, Some(nexus_address), Some(mgs_address), ) .await .unwrap(); - cptestctx.dendrite.insert(SwitchLocation::Switch0, new_switch0); + cptestctx + .dendrite + .write() + .unwrap() + .insert(SwitchLocation::Switch0, new_switch0); // Ensure that the nat entry for the address has made it onto the new switch0 dendrite. // This might take some time while the new dendrite comes online. diff --git a/nexus/src/app/sagas/instance_update/mod.rs b/nexus/src/app/sagas/instance_update/mod.rs index af4d0c528b6..191d61d77d5 100644 --- a/nexus/src/app/sagas/instance_update/mod.rs +++ b/nexus/src/app/sagas/instance_update/mod.rs @@ -1223,6 +1223,36 @@ async fn siu_commit_instance_updates( nexus.background_tasks.task_v2p_manager.activate(); nexus.vpc_needed_notify_sleds(); + + // If this network config update was due to instance migration (sled change), + // update multicast member sled_id for faster convergence + if let Some(NetworkConfigUpdate::Update { new_sled_id, .. }) = + &update.network_config + { + if nexus.multicast_enabled() { + if let Err(e) = osagactx + .datastore() + .multicast_group_member_update_sled_id( + &opctx, + InstanceUuid::from_untyped_uuid(instance_id), + Some((*new_sled_id).into()), + ) + .await + { + // The reconciler will fix this later + info!(log, + "instance update: failed to update multicast member sled_id after migration, reconciler will fix"; + "instance_id" => %instance_id, + "new_sled_id" => %new_sled_id, + "error" => ?e); + } else { + info!(log, + "instance update: updated multicast member sled_id after migration"; + "instance_id" => %instance_id, + "new_sled_id" => %new_sled_id); + } + } + } } Ok(()) @@ -1582,6 +1612,7 @@ mod test { start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, ) .await diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index c7d3298ccb5..642c7a3947f 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -36,6 +36,8 @@ pub mod instance_ip_detach; pub mod instance_migrate; pub mod instance_start; pub mod instance_update; +pub mod multicast_group_dpd_ensure; +pub mod multicast_group_dpd_update; pub mod project_create; pub mod region_replacement_drive; pub mod region_replacement_finish; @@ -184,7 +186,9 @@ fn make_action_registry() -> ActionRegistry { region_snapshot_replacement_step::SagaRegionSnapshotReplacementStep, region_snapshot_replacement_step_garbage_collect::SagaRegionSnapshotReplacementStepGarbageCollect, region_snapshot_replacement_finish::SagaRegionSnapshotReplacementFinish, - image_create::SagaImageCreate + image_create::SagaImageCreate, + multicast_group_dpd_ensure::SagaMulticastGroupDpdEnsure, + multicast_group_dpd_update::SagaMulticastGroupDpdUpdate ]; #[cfg(test)] diff --git a/nexus/src/app/sagas/multicast_group_dpd_ensure.rs b/nexus/src/app/sagas/multicast_group_dpd_ensure.rs new file mode 100644 index 00000000000..365d1615c6e --- /dev/null +++ b/nexus/src/app/sagas/multicast_group_dpd_ensure.rs @@ -0,0 +1,534 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Saga for applying multicast dataplane configuration via DPD. +//! +//! Atomically applies external and underlay multicast configuration via DPD. +//! Either both are successfully applied on all switches, or partial changes +//! are rolled back. +//! +//! Triggered by RPW reconciler when a multicast group is in "Creating" state +//! and needs dataplane updates. + +use anyhow::Context; +use serde::{Deserialize, Serialize}; +use slog::{debug, warn}; +use steno::{ActionError, DagBuilder, Node}; +use uuid::Uuid; + +use dpd_client::types::{ + MulticastGroupExternalResponse, MulticastGroupUnderlayResponse, +}; + +use nexus_db_lookup::LookupDataStore; +use nexus_db_model::{MulticastGroup, UnderlayMulticastGroup}; +use nexus_db_queries::authn; +use nexus_types::identity::Resource; +use omicron_uuid_kinds::{GenericUuid, MulticastGroupUuid}; + +use super::{ActionRegistry, NexusActionContext, NexusSaga, SagaInitError}; +use crate::app::multicast::dataplane::MulticastDataplaneClient; +use crate::app::sagas::declare_saga_actions; + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub(crate) struct Params { + /// Authentication context + pub serialized_authn: authn::saga::Serialized, + /// External multicast group to program + pub external_group_id: Uuid, + /// Underlay multicast group to program + pub underlay_group_id: Uuid, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct DataplaneUpdateResponse { + underlay: MulticastGroupUnderlayResponse, + external: MulticastGroupExternalResponse, +} + +declare_saga_actions! { + multicast_group_dpd_ensure; + + FETCH_GROUP_DATA -> "group_data" { + + mgde_fetch_group_data + } + UPDATE_DATAPLANE -> "update_responses" { + + mgde_update_dataplane + - mgde_rollback_dataplane + } + UPDATE_GROUP_STATE -> "state_updated" { + + mgde_update_group_state + } +} + +#[derive(Debug)] +pub struct SagaMulticastGroupDpdEnsure; +impl NexusSaga for SagaMulticastGroupDpdEnsure { + const NAME: &'static str = "multicast-group-dpd-ensure"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + multicast_group_dpd_ensure_register_actions(registry); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: DagBuilder, + ) -> Result { + builder.append(Node::action( + "group_data", + "FetchGroupData", + FETCH_GROUP_DATA.as_ref(), + )); + + builder.append(Node::action( + "update_responses", + "UpdateDataplane", + UPDATE_DATAPLANE.as_ref(), + )); + + builder.append(Node::action( + "state_updated", + "UpdateGroupState", + UPDATE_GROUP_STATE.as_ref(), + )); + + Ok(builder.build()?) + } +} + +/// Fetch multicast group data from database. +async fn mgde_fetch_group_data( + sagactx: NexusActionContext, +) -> Result<(MulticastGroup, UnderlayMulticastGroup), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + debug!( + osagactx.log(), + "fetching multicast group data"; + "external_group_id" => %params.external_group_id, + "underlay_group_id" => %params.underlay_group_id + ); + + let conn = osagactx + .datastore() + .pool_connection_authorized(&opctx) + .await + .map_err(ActionError::action_failed)?; + + // Fetch both groups on same connection for consistent state view + // (sequential fetches since using same connection) + let external_group = osagactx + .datastore() + .multicast_group_fetch_on_conn(&conn, params.external_group_id) + .await + .map_err(ActionError::action_failed)?; + + let underlay_group = osagactx + .datastore() + .underlay_multicast_group_fetch_on_conn(&conn, params.underlay_group_id) + .await + .map_err(ActionError::action_failed)?; + + // Validate groups are in correct state + match external_group.state { + nexus_db_model::MulticastGroupState::Creating => {} + other_state => { + warn!( + osagactx.log(), + "external group not in 'Creating' state for DPD"; + "external_group_id" => %params.external_group_id, + "external_group_name" => external_group.name().as_str(), + "current_state" => ?other_state + ); + return Err(ActionError::action_failed(format!( + "External group {} is in state {other_state:?}, expected 'Creating'", + params.external_group_id + ))); + } + } + + debug!( + osagactx.log(), + "fetched multicast group data"; + "external_group_id" => %external_group.id(), + "external_group_name" => external_group.name().as_str(), + "external_ip" => %external_group.multicast_ip, + "underlay_group_id" => %underlay_group.id, + "underlay_ip" => %underlay_group.multicast_ip, + "vni" => %u32::from(external_group.vni.0) + ); + + Ok((external_group, underlay_group)) +} + +/// Apply external and underlay groups in dataplane atomically. +async fn mgde_update_dataplane( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let (external_group, underlay_group) = sagactx + .lookup::<(MulticastGroup, UnderlayMulticastGroup)>("group_data")?; + + // Use MulticastDataplaneClient for consistent DPD operations + let dataplane = MulticastDataplaneClient::new( + osagactx.nexus().resolver().clone(), + osagactx.log().clone(), + ) + .await + .map_err(ActionError::action_failed)?; + + debug!( + osagactx.log(), + "applying multicast configuration via DPD"; + "switch_count" => %dataplane.switch_count(), + "external_group_id" => %external_group.id(), + "external_group_name" => external_group.name().as_str(), + "external_ip" => %external_group.multicast_ip, + "underlay_group_id" => %underlay_group.id, + "underlay_ip" => %underlay_group.multicast_ip, + ); + + let (underlay_response, external_response) = dataplane + .create_groups(&external_group, &underlay_group) + .await + .map_err(ActionError::action_failed)?; + + debug!( + osagactx.log(), + "applied multicast configuration via DPD"; + "external_group_id" => %external_group.id(), + "external_group_name" => external_group.name().as_str(), + "underlay_group_id" => %underlay_group.id, + "external_ip" => %external_group.multicast_ip, + "underlay_ip" => %underlay_group.multicast_ip + ); + + Ok(DataplaneUpdateResponse { + underlay: underlay_response, + external: external_response, + }) +} + +async fn mgde_rollback_dataplane( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let (external_group, _) = sagactx + .lookup::<(MulticastGroup, UnderlayMulticastGroup)>("group_data")?; + + let multicast_tag = external_group.name().to_string(); + + // Use MulticastDataplaneClient for consistent cleanup + let dataplane = MulticastDataplaneClient::new( + osagactx.nexus().resolver().clone(), + osagactx.log().clone(), + ) + .await + .map_err(ActionError::action_failed)?; + + debug!( + osagactx.log(), + "rolling back multicast additions"; + "external_group_id" => %params.external_group_id, + "underlay_group_id" => %params.underlay_group_id, + "tag" => %multicast_tag, + "external_group_name" => external_group.name().as_str(), + ); + + dataplane + .remove_groups(&multicast_tag) + .await + .context("failed to cleanup multicast groups during saga rollback")?; + + debug!( + osagactx.log(), + "completed rollback of multicast configuration"; + "tag" => %multicast_tag + ); + + Ok(()) +} + +/// Update multicast group state to "Active" after applying DPD configuration. +async fn mgde_update_group_state( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + let (external_group, _) = sagactx + .lookup::<(MulticastGroup, UnderlayMulticastGroup)>("group_data")?; + + debug!( + osagactx.log(), + "updating multicast group state to 'Active'"; + "external_group_id" => %params.external_group_id, + "current_state" => ?external_group.state + ); + + // Transition the group from "Creating" -> "Active" + osagactx + .datastore() + .multicast_group_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(params.external_group_id), + nexus_db_model::MulticastGroupState::Active, + ) + .await + .map_err(ActionError::action_failed)?; + + debug!( + osagactx.log(), + "transitioned multicast group to 'Active'"; + "external_group_id" => %params.external_group_id + ); + + Ok(()) +} + +#[cfg(test)] +mod test { + use super::*; + + use std::net::{IpAddr, Ipv4Addr}; + + use omicron_uuid_kinds::GenericUuid; + + use nexus_db_queries::authn::saga::Serialized; + use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; + use nexus_test_utils::resource_helpers::{ + create_default_ip_pool, link_ip_pool, object_create, + }; + use nexus_test_utils_macros::nexus_test; + use nexus_types::external_api::params::{ + IpPoolCreate, MulticastGroupCreate, + }; + use nexus_types::external_api::shared::{IpRange, Ipv4Range}; + use nexus_types::external_api::views::{IpPool, IpPoolRange, IpVersion}; + use omicron_common::api::external::{ + IdentityMetadataCreateParams, NameOrId, + }; + + use crate::app::saga::create_saga_dag; + use crate::app::sagas::test_helpers; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + fn new_test_params(opctx: &nexus_db_queries::context::OpContext) -> Params { + Params { + serialized_authn: Serialized::for_opctx(opctx), + external_group_id: Uuid::new_v4(), + underlay_group_id: Uuid::new_v4(), + } + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + // Test that repeated rollback attempts don't cause issues + let nexus = &cptestctx.server.server_context().nexus; + let opctx = test_helpers::test_opctx(cptestctx); + + let params = Params { + serialized_authn: Serialized::for_opctx(&opctx), + external_group_id: Uuid::new_v4(), + underlay_group_id: Uuid::new_v4(), + }; + + // Run the saga multiple times to test idempotent rollback + for _i in 1..=3 { + let result = nexus + .sagas + .saga_execute::(params.clone()) + .await; + + // Each attempt should fail consistently + assert!(result.is_err()); + } + } + + #[nexus_test(server = crate::Server)] + async fn test_params_serialization(cptestctx: &ControlPlaneTestContext) { + let opctx = test_helpers::test_opctx(cptestctx); + let params = new_test_params(&opctx); + + // Test that parameters can be serialized and deserialized + let serialized = serde_json::to_string(¶ms).unwrap(); + let deserialized: Params = serde_json::from_str(&serialized).unwrap(); + + assert_eq!(params.external_group_id, deserialized.external_group_id); + assert_eq!(params.underlay_group_id, deserialized.underlay_group_id); + } + + #[nexus_test(server = crate::Server)] + async fn test_saga_dag_structure(cptestctx: &ControlPlaneTestContext) { + let opctx = test_helpers::test_opctx(cptestctx); + let params = new_test_params(&opctx); + let dag = + create_saga_dag::(params).unwrap(); + + // Verify the DAG has the expected structure + let nodes: Vec<_> = dag.get_nodes().collect(); + assert!(nodes.len() >= 2); // Should have at least our 2 main actions + + // Verify expected node labels exist + let node_labels: std::collections::HashSet<_> = + nodes.iter().map(|node| node.label()).collect(); + + assert!(node_labels.contains("FetchGroupData")); + assert!(node_labels.contains("UpdateDataplane")); + } + + /// Verify saga handles missing groups gracefully when executed with + /// non-existent group IDs. + #[nexus_test(server = crate::Server)] + async fn test_saga_handles_missing_groups( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let opctx = test_helpers::test_opctx(cptestctx); + + // Create params with non-existent UUIDs + let params = Params { + serialized_authn: Serialized::for_opctx(&opctx), + external_group_id: Uuid::new_v4(), // Non-existent + underlay_group_id: Uuid::new_v4(), // Non-existent + }; + + // Execute the saga - should fail gracefully when fetching non-existent groups + let result = nexus + .sagas + .saga_execute::(params) + .await; + + // Saga should fail (groups don't exist) + assert!( + result.is_err(), + "Saga should fail when groups don't exist in database" + ); + } + + /// Test that the saga rejects external groups that are not in "Creating" state. + /// + /// The saga validates that external groups are in "Creating" state before applying + /// DPD configuration. This test verifies that validation works correctly. + #[nexus_test(server = crate::Server)] + async fn test_saga_rejects_non_creating_state( + cptestctx: &ControlPlaneTestContext, + ) { + let client = &cptestctx.external_client; + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = test_helpers::test_opctx(cptestctx); + + // Setup: Create IP pools + create_default_ip_pool(client).await; + + // Create multicast IP pool + let pool_name = "saga-state-pool"; + let pool_params = IpPoolCreate::new_multicast( + IdentityMetadataCreateParams { + name: pool_name.parse().unwrap(), + description: "Multicast IP pool for saga test".to_string(), + }, + IpVersion::V4, + ); + object_create::<_, IpPool>(client, "/v1/system/ip-pools", &pool_params) + .await; + + // Add multicast IP range + let asm_range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 70, 0, 1), + Ipv4Addr::new(224, 70, 0, 255), + ) + .unwrap(), + ); + let range_url = format!("/v1/system/ip-pools/{}/ranges/add", pool_name); + object_create::<_, IpPoolRange>(client, &range_url, &asm_range).await; + + // Link pool to silo + link_ip_pool(client, pool_name, &DEFAULT_SILO.id(), false).await; + + // Create multicast group via API (starts in Creating state) + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "saga-reject-test".parse().unwrap(), + description: "Test saga state validation".to_string(), + }, + multicast_ip: Some(IpAddr::V4(Ipv4Addr::new(224, 70, 0, 100))), + source_ips: None, + pool: Some(NameOrId::Name("saga-state-pool".parse().unwrap())), + mvlan: None, + }; + + let group: nexus_types::external_api::views::MulticastGroup = + object_create(client, "/v1/multicast-groups", &group_params).await; + + // Fetch the external group from database to get full model + let group_id = + omicron_uuid_kinds::MulticastGroupUuid::from_untyped_uuid( + group.identity.id, + ); + let external_group = datastore + .multicast_group_fetch(&opctx, group_id) + .await + .expect("Failed to fetch external group"); + + // Manually create underlay group (normally done by reconciler) + let underlay_group = datastore + .ensure_underlay_multicast_group( + &opctx, + external_group.clone(), + "ff04::1:2:3:4".parse().unwrap(), + ) + .await + .expect("Failed to create underlay group"); + + // Manually transition the group to "Active" state in the database + datastore + .multicast_group_set_state( + &opctx, + MulticastGroupUuid::from_untyped_uuid(group.identity.id), + nexus_db_model::MulticastGroupState::Active, + ) + .await + .expect("Failed to set group to Active state"); + + // Try to run saga on Active group - should fail + let params = Params { + serialized_authn: Serialized::for_opctx(&opctx), + external_group_id: group.identity.id, + underlay_group_id: underlay_group.id, + }; + + let result = nexus + .sagas + .saga_execute::(params) + .await; + + // Saga should reject Active group + assert!(result.is_err(), "Saga should reject group in Active state"); + + // Cleanup + nexus_test_utils::resource_helpers::object_delete( + client, + &format!("/v1/multicast-groups/{}", group.identity.name), + ) + .await; + } +} diff --git a/nexus/src/app/sagas/multicast_group_dpd_update.rs b/nexus/src/app/sagas/multicast_group_dpd_update.rs new file mode 100644 index 00000000000..33d9717b2e3 --- /dev/null +++ b/nexus/src/app/sagas/multicast_group_dpd_update.rs @@ -0,0 +1,271 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Saga for updating multicast group state in dataplane via DPD. +//! +//! Handles atomic updates of external and underlay multicast groups in DPD. +//! Reads current state from database and applies to all switches. +//! +//! Idempotent saga can be called multiple times safely. If group state hasn't +//! changed, DPD-update is effectively a no-op. + +use anyhow::Context; +use serde::{Deserialize, Serialize}; +use slog::{debug, info}; +use steno::{ActionError, DagBuilder, Node}; +use uuid::Uuid; + +use dpd_client::types::{ + MulticastGroupExternalResponse, MulticastGroupUnderlayResponse, +}; + +use nexus_db_model::{MulticastGroup, UnderlayMulticastGroup}; +use nexus_db_queries::authn; +use nexus_types::identity::Resource; +use omicron_uuid_kinds::{GenericUuid, MulticastGroupUuid}; + +use super::{ActionRegistry, NexusActionContext, NexusSaga, SagaInitError}; +use crate::app::multicast::dataplane::{ + GroupUpdateParams, MulticastDataplaneClient, +}; +use crate::app::sagas::declare_saga_actions; + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub(crate) struct Params { + /// Authentication context + pub serialized_authn: authn::saga::Serialized, + /// External multicast group to update + pub external_group_id: Uuid, + /// Underlay multicast group to update + pub underlay_group_id: Uuid, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct DataplaneUpdateResponse { + underlay: MulticastGroupUnderlayResponse, + external: MulticastGroupExternalResponse, +} + +declare_saga_actions! { + multicast_group_dpd_update; + + FETCH_GROUP_DATA -> "group_data" { + + mgu_fetch_group_data + } + UPDATE_DATAPLANE -> "update_responses" { + + mgu_update_dataplane + - mgu_rollback_dataplane + } +} + +#[derive(Debug)] +pub struct SagaMulticastGroupDpdUpdate; +impl NexusSaga for SagaMulticastGroupDpdUpdate { + const NAME: &'static str = "multicast-group-dpd-update"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + multicast_group_dpd_update_register_actions(registry); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: DagBuilder, + ) -> Result { + builder.append(Node::action( + "group_data", + "FetchGroupData", + FETCH_GROUP_DATA.as_ref(), + )); + + builder.append(Node::action( + "update_responses", + "UpdateDataplane", + UPDATE_DATAPLANE.as_ref(), + )); + + Ok(builder.build()?) + } +} + +/// Fetch multicast group data from database. +async fn mgu_fetch_group_data( + sagactx: NexusActionContext, +) -> Result<(MulticastGroup, UnderlayMulticastGroup), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + debug!( + osagactx.log(), + "fetching multicast group data for DPD-update"; + "external_group_id" => %params.external_group_id, + "underlay_group_id" => %params.underlay_group_id + ); + + // Fetch external multicast group + let external_group = osagactx + .datastore() + .multicast_group_fetch( + &opctx, + MulticastGroupUuid::from_untyped_uuid(params.external_group_id), + ) + .await + .map_err(ActionError::action_failed)?; + + // Fetch underlay multicast group + let underlay_group = osagactx + .datastore() + .underlay_multicast_group_fetch(&opctx, params.underlay_group_id) + .await + .map_err(ActionError::action_failed)?; + + debug!( + osagactx.log(), + "successfully fetched multicast group data for DPD-update"; + "external_group_id" => %external_group.id(), + "external_group_name" => external_group.name().as_str(), + "external_ip" => %external_group.multicast_ip, + "underlay_group_id" => %underlay_group.id, + "underlay_ip" => %underlay_group.multicast_ip, + "sources" => ?external_group.source_ips + ); + + Ok((external_group, underlay_group)) +} + +/// Update external and underlay groups in dataplane atomically. +async fn mgu_update_dataplane( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let (external_group, underlay_group) = sagactx + .lookup::<(MulticastGroup, UnderlayMulticastGroup)>("group_data")?; + + // Use MulticastDataplaneClient for consistent DPD operations + let dataplane = MulticastDataplaneClient::new( + osagactx.nexus().resolver().clone(), + osagactx.log().clone(), + ) + .await + .map_err(ActionError::action_failed)?; + + debug!( + osagactx.log(), + "updating multicast group in DPD across switches (idempotent)"; + "switch_count" => %dataplane.switch_count(), + "external_group_id" => %external_group.id(), + "external_group_name" => external_group.name().as_str(), + "external_ip" => %external_group.multicast_ip, + "underlay_ip" => %underlay_group.multicast_ip, + "sources" => ?external_group.source_ips, + ); + + let (underlay_response, external_response) = dataplane + .update_groups(GroupUpdateParams { + external_group: &external_group, + underlay_group: &underlay_group, + new_name: external_group.name().as_str(), + new_sources: &external_group.source_ips, + }) + .await + .map_err(ActionError::action_failed)?; + + info!( + osagactx.log(), + "successfully updated multicast groups in DPD across switches"; + "external_group_id" => %external_group.id(), + "underlay_group_id" => %underlay_group.id, + "group_name" => external_group.name().as_str() + ); + + Ok(DataplaneUpdateResponse { + underlay: underlay_response, + external: external_response, + }) +} + +/// Roll back multicast group updates by removing groups from DPD. +async fn mgu_rollback_dataplane( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let (external_group, _) = sagactx + .lookup::<(MulticastGroup, UnderlayMulticastGroup)>("group_data")?; + + let multicast_tag = external_group.name().to_string(); + + let dataplane = MulticastDataplaneClient::new( + osagactx.nexus().resolver().clone(), + osagactx.log().clone(), + ) + .await + .map_err(ActionError::action_failed)?; + + debug!( + osagactx.log(), + "rolling back multicast additions"; + "external_group_id" => %params.external_group_id, + "underlay_group_id" => %params.underlay_group_id, + "tag" => %multicast_tag, + "external_group_name" => external_group.name().as_str(), + ); + + dataplane + .remove_groups(&multicast_tag) + .await + .context("failed to cleanup multicast groups during saga rollback")?; + + debug!( + osagactx.log(), + "completed rollback of multicast configuration"; + "tag" => %multicast_tag + ); + + Ok(()) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::app::saga::create_saga_dag; + use crate::app::sagas::test_helpers; + use nexus_db_queries::authn::saga::Serialized; + use nexus_test_utils_macros::nexus_test; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + fn new_test_params(opctx: &nexus_db_queries::context::OpContext) -> Params { + Params { + serialized_authn: Serialized::for_opctx(opctx), + external_group_id: Uuid::new_v4(), + underlay_group_id: Uuid::new_v4(), + } + } + + #[nexus_test(server = crate::Server)] + async fn test_saga_dag_structure(cptestctx: &ControlPlaneTestContext) { + let opctx = test_helpers::test_opctx(cptestctx); + let params = new_test_params(&opctx); + let dag = + create_saga_dag::(params).unwrap(); + + // Verify the DAG has the expected structure + let nodes: Vec<_> = dag.get_nodes().collect(); + assert!(nodes.len() >= 2); // Should have at least our 2 main actions + + // Verify expected node labels exist + let node_labels: std::collections::HashSet<_> = + nodes.iter().map(|node| node.label()).collect(); + + assert!(node_labels.contains("FetchGroupData")); + assert!(node_labels.contains("UpdateDataplane")); + } +} diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 4b76bd2edb6..fd6035bd439 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -2132,6 +2132,7 @@ mod test { start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, ) .await; diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index f956924c51a..75e1bfdea76 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -92,6 +92,14 @@ impl super::Nexus { // the control plane. if was_modified { self.activate_inventory_collection(); + + // Signal multicast cache invalidation since sled topology changed. + // The reconciler will be activated via its inventory watchers. + if let Some(flag) = + &self.background_tasks_internal.multicast_invalidate_cache + { + flag.store(true, std::sync::atomic::Ordering::SeqCst); + } } Ok(()) @@ -123,6 +131,15 @@ impl super::Nexus { // for the next periodic activation before they can be cleaned up. self.background_tasks.task_instance_watcher.activate(); + // Signal multicast cache invalidation since sled topology changed. + // Inventory collection will be triggered automatically, which will + // activate the reconciler via its inventory watchers. + if let Some(flag) = + &self.background_tasks_internal.multicast_invalidate_cache + { + flag.store(true, std::sync::atomic::Ordering::SeqCst); + } + Ok(prev_policy) } diff --git a/nexus/src/app/test_interfaces.rs b/nexus/src/app/test_interfaces.rs index 8c841ea51d3..6d261e16f4d 100644 --- a/nexus/src/app/test_interfaces.rs +++ b/nexus/src/app/test_interfaces.rs @@ -63,6 +63,12 @@ pub trait TestInterfaces { async fn set_disk_as_faulted(&self, disk_id: &Uuid) -> Result; fn set_samael_max_issue_delay(&self, max_issue_delay: chrono::Duration); + + /// Manually invalidate multicast caches and activate reconciler. + /// + /// This simulates topology changes that would require cache invalidation, + /// such as backplane configuration changes or sled movements. + fn invalidate_multicast_caches(&self); } #[async_trait] @@ -164,4 +170,14 @@ impl TestInterfaces for super::Nexus { let mut mid = self.samael_max_issue_delay.lock().unwrap(); *mid = Some(max_issue_delay); } + + fn invalidate_multicast_caches(&self) { + if let Some(flag) = + &self.background_tasks_internal.multicast_invalidate_cache + { + flag.store(true, std::sync::atomic::Ordering::SeqCst); + self.background_tasks + .activate(&self.background_tasks.task_multicast_reconciler); + } + } } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index a13a83d7ba9..2c08c457fb0 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -8,8 +8,8 @@ use super::{ console_api, params, views::{ self, Certificate, FloatingIp, Group, IdentityProvider, Image, IpPool, - IpPoolRange, PhysicalDisk, Project, Rack, Silo, SiloQuotas, - SiloUtilization, Sled, Snapshot, SshKey, User, UserBuiltin, + IpPoolRange, MulticastGroup, PhysicalDisk, Project, Rack, Silo, + SiloQuotas, SiloUtilization, Sled, Snapshot, SshKey, User, UserBuiltin, Utilization, Vpc, VpcRouter, VpcSubnet, }, }; @@ -1748,7 +1748,7 @@ impl NexusExternalApi for NexusExternalApiImpl { // like we do for update, delete, associate. let (.., pool) = nexus.ip_pool_lookup(&opctx, &pool_selector)?.fetch().await?; - Ok(HttpResponseOk(IpPool::from(pool))) + Ok(HttpResponseOk(pool.into())) }; apictx .context @@ -2355,6 +2355,318 @@ impl NexusExternalApi for NexusExternalApiImpl { .await } + // Multicast Groups + + async fn multicast_group_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let groups = + nexus.multicast_groups_list(&opctx, &paginated_by).await?; + let results_page = ScanByNameOrId::results_page( + &query, + groups + .into_iter() + .map(views::MulticastGroup::try_from) + .collect::, _>>()?, + &marker_for_name_or_id, + )?; + Ok(HttpResponseOk(results_page)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn multicast_group_create( + rqctx: RequestContext, + group_params: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let create_params = group_params.into_inner(); + + let group = + nexus.multicast_group_create(&opctx, &create_params).await?; + Ok(HttpResponseCreated(views::MulticastGroup::try_from(group)?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn multicast_group_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let group_selector = params::MulticastGroupSelector { + multicast_group: path.multicast_group.clone(), + }; + let group_lookup = + nexus.multicast_group_lookup(&opctx, &group_selector)?; + let group = + nexus.multicast_group_fetch(&opctx, &group_lookup).await?; + Ok(HttpResponseOk(views::MulticastGroup::try_from(group)?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn multicast_group_update( + rqctx: RequestContext, + path_params: Path, + updated_group: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let updated_group_params = updated_group.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let group_selector = params::MulticastGroupSelector { + multicast_group: path.multicast_group.clone(), + }; + let group_lookup = + nexus.multicast_group_lookup(&opctx, &group_selector)?; + let group = nexus + .multicast_group_update( + &opctx, + &group_lookup, + &updated_group_params, + ) + .await?; + Ok(HttpResponseOk(views::MulticastGroup::try_from(group)?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn multicast_group_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let group_selector = params::MulticastGroupSelector { + multicast_group: path.multicast_group.clone(), + }; + let group_lookup = + nexus.multicast_group_lookup(&opctx, &group_selector)?; + nexus.multicast_group_delete(&opctx, &group_lookup).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn lookup_multicast_group_by_ip( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + + let ip_addr = path.address; + + // System endpoint requires fleet-level read authorization + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + let group = + nexus.multicast_group_lookup_by_ip(&opctx, ip_addr).await?; + Ok(HttpResponseOk(views::MulticastGroup::try_from(group)?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + // Multicast Group Member Management + + async fn multicast_group_member_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result< + HttpResponseOk>, + HttpError, + > { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + + let group_selector = params::MulticastGroupSelector { + multicast_group: path.multicast_group, + }; + let group_lookup = + nexus.multicast_group_lookup(&opctx, &group_selector)?; + + let members = nexus + .multicast_group_members_list( + &opctx, + &group_lookup, + &pag_params, + ) + .await?; + + let results = members + .into_iter() + .map(views::MulticastGroupMember::try_from) + .collect::, _>>()?; + + Ok(HttpResponseOk(ScanById::results_page( + &query, + results, + &|_, member: &views::MulticastGroupMember| member.identity.id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn multicast_group_member_add( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + member_params: TypedBody, + ) -> Result, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let member_params = member_params.into_inner(); + + let group_selector = params::MulticastGroupSelector { + multicast_group: path.multicast_group, + }; + let group_lookup = + nexus.multicast_group_lookup(&opctx, &group_selector)?; + + let instance_lookup = nexus.instance_lookup( + &opctx, + params::InstanceSelector { + project: query.project, + instance: member_params.instance, + }, + )?; + + let member = nexus + .multicast_group_member_attach( + &opctx, + &group_lookup, + &instance_lookup, + ) + .await?; + + Ok(HttpResponseCreated(views::MulticastGroupMember::try_from( + member, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn multicast_group_member_remove( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + + let group_selector = params::MulticastGroupSelector { + multicast_group: path.multicast_group, + }; + let group_lookup = + nexus.multicast_group_lookup(&opctx, &group_selector)?; + + let instance_lookup = nexus.instance_lookup( + &opctx, + params::InstanceSelector { + project: query.project, + instance: path.instance, + }, + )?; + + nexus + .multicast_group_member_detach( + &opctx, + &group_lookup, + &instance_lookup, + ) + .await?; + + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + // Disks async fn disk_list( @@ -5415,6 +5727,132 @@ impl NexusExternalApi for NexusExternalApiImpl { .await } + // Instance Multicast Groups + + async fn instance_multicast_group_list( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result< + HttpResponseOk>, + HttpError, + > { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let memberships = nexus + .instance_list_multicast_groups(&opctx, &instance_lookup) + .await?; + Ok(HttpResponseOk(ResultsPage { + items: memberships, + next_page: None, + })) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn instance_multicast_group_join( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + + let instance_selector = params::InstanceSelector { + project: query.project.clone(), + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + + let group_selector = params::MulticastGroupSelector { + multicast_group: path.multicast_group, + }; + let group_lookup = + nexus.multicast_group_lookup(&opctx, &group_selector)?; + + let member = nexus + .multicast_group_member_attach( + &opctx, + &group_lookup, + &instance_lookup, + ) + .await?; + + Ok(HttpResponseCreated(views::MulticastGroupMember::try_from( + member, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn instance_multicast_group_leave( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + + let instance_selector = params::InstanceSelector { + project: query.project.clone(), + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + + let group_selector = params::MulticastGroupSelector { + multicast_group: path.multicast_group, + }; + let group_lookup = + nexus.multicast_group_lookup(&opctx, &group_selector)?; + + nexus + .multicast_group_member_detach( + &opctx, + &group_lookup, + &instance_lookup, + ) + .await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + // Snapshots async fn snapshot_list( diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index e2994ba1422..4bfce83f4fb 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -15,6 +15,7 @@ camino-tempfile.workspace = true chrono.workspace = true crucible-agent-client.workspace = true dns-server.workspace = true +dpd-client.workspace = true dns-service-client.workspace = true dropshot.workspace = true futures.workspace = true diff --git a/nexus/test-utils/src/background.rs b/nexus/test-utils/src/background.rs index 7b39f69daa8..6733ab9daae 100644 --- a/nexus/test-utils/src/background.rs +++ b/nexus/test-utils/src/background.rs @@ -54,9 +54,29 @@ pub async fn wait_background_task( /// Given the name of a background task, activate it, then wait for it to /// complete. Return the `BackgroundTask` object from this invocation. +/// +/// The `timeout` parameter controls how long to wait for the task to go idle +/// before activating it, and how long to wait for it to complete after +/// activation. Defaults to 10 seconds if not specified. pub async fn activate_background_task( lockstep_client: &ClientTestContext, task_name: &str, +) -> BackgroundTask { + activate_background_task_with_timeout( + lockstep_client, + task_name, + Duration::from_secs(10), + ) + .await +} + +/// Like `activate_background_task`, but with a configurable timeout. +/// +/// Use this variant when you need a longer timeout. +pub async fn activate_background_task_with_timeout( + lockstep_client: &ClientTestContext, + task_name: &str, + timeout: Duration, ) -> BackgroundTask { // If it is running, wait for an existing task to complete - this function // has to wait for _this_ activation to finish. @@ -83,7 +103,7 @@ pub async fn activate_background_task( Err(CondCheckError::<()>::NotYet) }, &Duration::from_millis(50), - &Duration::from_secs(10), + &timeout, ) .await .expect("task never went to idle"); @@ -163,7 +183,7 @@ pub async fn activate_background_task( } }, &Duration::from_millis(50), - &Duration::from_secs(60), + &timeout, ) .await .unwrap(); diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index b7a9e776565..2b801ba6694 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -109,7 +109,7 @@ use std::collections::HashMap; use std::fmt::Debug; use std::iter::{once, repeat, zip}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV6}; -use std::sync::Arc; +use std::sync::{Arc, RwLock}; use std::time::Duration; use uuid::Uuid; @@ -186,7 +186,8 @@ pub struct ControlPlaneTestContext { pub oximeter: Oximeter, pub producer: ProducerServer, pub gateway: BTreeMap, - pub dendrite: HashMap, + pub dendrite: + RwLock>, pub mgd: HashMap, pub external_dns_zone_name: String, pub external_dns: dns_server::TransientServer, @@ -285,6 +286,22 @@ impl ControlPlaneTestContext { ) } + /// Stop a Dendrite instance for testing failure scenarios. + pub async fn stop_dendrite( + &self, + switch_location: omicron_common::api::external::SwitchLocation, + ) { + use slog::debug; + let log = &self.logctx.log; + debug!(log, "Stopping Dendrite for {switch_location}"); + + let dendrite_opt = + { self.dendrite.write().unwrap().remove(&switch_location) }; + if let Some(mut dendrite) = dendrite_opt { + dendrite.cleanup().await.unwrap(); + } + } + pub async fn teardown(mut self) { self.server.close().await; self.database.cleanup().await.unwrap(); @@ -299,7 +316,7 @@ impl ControlPlaneTestContext { for (_, gateway) in self.gateway { gateway.teardown().await; } - for (_, mut dendrite) in self.dendrite { + for (_, mut dendrite) in self.dendrite.into_inner().unwrap() { dendrite.cleanup().await.unwrap(); } for (_, mut mgd) in self.mgd { @@ -457,7 +474,8 @@ pub struct ControlPlaneTestContextBuilder<'a, N: NexusServer> { pub oximeter: Option, pub producer: Option, pub gateway: BTreeMap, - pub dendrite: HashMap, + pub dendrite: + RwLock>, pub mgd: HashMap, // NOTE: Only exists after starting Nexus, until external Nexus is @@ -516,7 +534,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { oximeter: None, producer: None, gateway: BTreeMap::new(), - dendrite: HashMap::new(), + dendrite: RwLock::new(HashMap::new()), mgd: HashMap::new(), nexus_internal: None, nexus_internal_addr: None, @@ -749,7 +767,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .await .unwrap(); let port = dendrite.port; - self.dendrite.insert(switch_location, dendrite); + self.dendrite.write().unwrap().insert(switch_location, dendrite); let address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, port, 0, 0); @@ -794,11 +812,16 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .host_zone_switch( sled_id, Ipv6Addr::LOCALHOST, - self.dendrite.get(&switch_location).unwrap().port, + self.dendrite + .read() + .unwrap() + .get(&switch_location) + .unwrap() + .port, self.gateway.get(&switch_location).unwrap().port, self.mgd.get(&switch_location).unwrap().port, ) - .unwrap(); + .unwrap() } pub async fn start_oximeter(&mut self) { @@ -1550,7 +1573,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { producer: self.producer.unwrap(), logctx: self.logctx, gateway: self.gateway, - dendrite: self.dendrite, + dendrite: RwLock::new(self.dendrite.into_inner().unwrap()), mgd: self.mgd, external_dns_zone_name: self.external_dns_zone_name.unwrap(), external_dns: self.external_dns.unwrap(), @@ -1587,7 +1610,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { for (_, gateway) in self.gateway { gateway.teardown().await; } - for (_, mut dendrite) in self.dendrite { + for (_, mut dendrite) in self.dendrite.into_inner().unwrap() { dendrite.cleanup().await.unwrap(); } for (_, mut mgd) in self.mgd { @@ -2079,13 +2102,19 @@ pub async fn start_sled_agent( sim_mode: sim::SimMode, simulated_upstairs: &Arc, ) -> Result { - let config = sim::Config::for_testing( + // Generate a baseboard serial number that matches the SP configuration + // (SimGimlet00, SimGimlet01, etc.) so that inventory can link sled agents + // to their corresponding SPs via baseboard_id. + let baseboard_serial = format!("SimGimlet{:02}", sled_index); + + let config = sim::Config::for_testing_with_baseboard( id, sim_mode, Some(nexus_address), Some(update_directory), sim::ZpoolConfig::None, SledCpuFamily::AmdMilan, + Some(baseboard_serial), ); start_sled_agent_with_config(log, &config, sled_index, simulated_upstairs) .await @@ -2325,3 +2354,31 @@ async fn wait_for_producer_impl( .await .expect("Failed to find producer within time limit"); } + +/// Build a DPD client for test validation using the first running dendrite instance +pub fn dpd_client( + cptestctx: &ControlPlaneTestContext, +) -> dpd_client::Client { + // Get the first available dendrite instance and extract the values we need + let dendrite_guard = cptestctx.dendrite.read().unwrap(); + let (switch_location, dendrite_instance) = dendrite_guard + .iter() + .next() + .expect("No dendrite instances running for test"); + + // Copy the values we need while the guard is still alive + let switch_location = *switch_location; + let port = dendrite_instance.port; + drop(dendrite_guard); + + let client_state = dpd_client::ClientState { + tag: String::from("nexus-test"), + log: cptestctx.logctx.log.new(slog::o!( + "component" => "DpdClient", + "switch" => switch_location.to_string() + )), + }; + + let addr = Ipv6Addr::LOCALHOST; + dpd_client::Client::new(&format!("http://[{addr}]:{port}"), client_state) +} diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index b78adb25542..b15d673ef99 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -294,6 +294,44 @@ pub async fn create_ip_pool( (pool, range) } +/// Create a multicast IP pool with a multicast range for testing. +/// +/// The multicast IP range may be specified if it's important for testing specific +/// multicast addresses, or a default multicast range (224.1.0.0 - 224.1.255.255) +/// will be provided if the `ip_range` argument is `None`. +pub async fn create_multicast_ip_pool( + client: &ClientTestContext, + pool_name: &str, + ip_range: Option, +) -> (IpPool, IpPoolRange) { + let pool = object_create( + client, + "/v1/system/ip-pools", + ¶ms::IpPoolCreate::new_multicast( + IdentityMetadataCreateParams { + name: pool_name.parse().unwrap(), + description: String::from("a multicast ip pool"), + }, + ip_range + .map(|r| r.version()) + .unwrap_or_else(|| views::IpVersion::V4), + ), + ) + .await; + + let ip_range = ip_range.unwrap_or_else(|| { + use std::net::Ipv4Addr; + IpRange::try_from(( + Ipv4Addr::new(224, 1, 0, 0), + Ipv4Addr::new(224, 1, 255, 255), + )) + .unwrap() + }); + let url = format!("/v1/system/ip-pools/{}/ranges/add", pool_name); + let range = object_create(client, &url, &ip_range).await; + (pool, range) +} + pub async fn link_ip_pool( client: &ClientTestContext, pool_name: &str, @@ -662,6 +700,8 @@ pub async fn create_instance( true, Default::default(), None, + // Multicast groups= + Vec::::new(), ) .await } @@ -679,6 +719,7 @@ pub async fn create_instance_with( start: bool, auto_restart_policy: Option, cpu_platform: Option, + multicast_groups: Vec, ) -> Instance { let url = format!("/v1/instances?project={}", project_name); @@ -705,6 +746,7 @@ pub async fn create_instance_with( start, auto_restart_policy, anti_affinity_groups: Vec::new(), + multicast_groups, }, ) .await diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 3997fe9543a..e9ad1149e4b 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -201,6 +201,14 @@ fm.sitrep_load_period_secs = 15 # only necessary to ensure that it always happens eventually. fm.sitrep_gc_period_secs = 600 probe_distributor.period_secs = 60 +multicast_reconciler.period_secs = 60 +# Use shorter TTLs for tests to ensure cache invalidation logic is exercised +multicast_reconciler.sled_cache_ttl_secs = 60 +multicast_reconciler.backplane_cache_ttl_secs = 120 + +[multicast] +# Enable multicast functionality for tests (disabled by default in production) +enabled = true [default_region_allocation_strategy] # we only have one sled in the test environment, so we need to use the diff --git a/nexus/tests/integration_tests/affinity.rs b/nexus/tests/integration_tests/affinity.rs index 15640868627..a786979410b 100644 --- a/nexus/tests/integration_tests/affinity.rs +++ b/nexus/tests/integration_tests/affinity.rs @@ -73,6 +73,8 @@ impl ProjectScopedApiHelper<'_, T> { None, // Instance CPU platform= None, + // Multicast groups= + Vec::new(), ) .await } diff --git a/nexus/tests/integration_tests/audit_log.rs b/nexus/tests/integration_tests/audit_log.rs index 3d84852ee73..4577f75827c 100644 --- a/nexus/tests/integration_tests/audit_log.rs +++ b/nexus/tests/integration_tests/audit_log.rs @@ -326,6 +326,7 @@ async fn test_audit_log_create_delete_ops(ctx: &ControlPlaneTestContext) { false, // start=false, so instance is created in stopped state None::, None::, + Vec::new(), ) .await; let _disk = create_disk(client, "test-project", "test-disk").await; diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index dd2ebc92b33..eca4777fbbf 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -662,6 +662,7 @@ pub static DEMO_INSTANCE_CREATE: LazyLock = start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }); pub static DEMO_STOPPED_INSTANCE_CREATE: LazyLock = LazyLock::new(|| params::InstanceCreate { @@ -684,6 +685,7 @@ pub static DEMO_STOPPED_INSTANCE_CREATE: LazyLock = start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }); pub static DEMO_INSTANCE_UPDATE: LazyLock = LazyLock::new(|| params::InstanceUpdate { @@ -692,6 +694,7 @@ pub static DEMO_INSTANCE_UPDATE: LazyLock = auto_restart_policy: Nullable(None), ncpus: InstanceCpuCount(1), memory: ByteCount::from_gibibytes_u32(16), + multicast_groups: None, }); // The instance needs a network interface, too. @@ -745,6 +748,74 @@ pub static DEMO_CERTIFICATE_CREATE: LazyLock = service: shared::ServiceUsingCertificate::ExternalApi, }); +// Multicast groups and members +// Multicast groups are fleet-scoped (like IP pools), not project-scoped +pub static DEMO_MULTICAST_GROUP_NAME: LazyLock = + LazyLock::new(|| "demo-multicast-group".parse().unwrap()); +pub static MULTICAST_GROUPS_URL: LazyLock = + LazyLock::new(|| "/v1/multicast-groups".to_string()); +pub static DEMO_MULTICAST_GROUP_URL: LazyLock = LazyLock::new(|| { + format!("/v1/multicast-groups/{}", *DEMO_MULTICAST_GROUP_NAME) +}); +pub static DEMO_MULTICAST_GROUP_MEMBERS_URL: LazyLock = + LazyLock::new(|| { + format!( + "/v1/multicast-groups/{}/members?project={}", + *DEMO_MULTICAST_GROUP_NAME, *DEMO_PROJECT_NAME + ) + }); +pub static DEMO_MULTICAST_GROUP_MEMBER_URL: LazyLock = + LazyLock::new(|| { + format!( + "/v1/multicast-groups/{}/members/{}?project={}", + *DEMO_MULTICAST_GROUP_NAME, *DEMO_INSTANCE_NAME, *DEMO_PROJECT_NAME + ) + }); +pub static DEMO_INSTANCE_MULTICAST_GROUPS_URL: LazyLock = + LazyLock::new(|| { + format!( + "/v1/instances/{}/multicast-groups?project={}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_NAME + ) + }); +pub static DEMO_INSTANCE_MULTICAST_GROUP_JOIN_URL: LazyLock = + LazyLock::new(|| { + format!( + "/v1/instances/{}/multicast-groups/{}?project={}", + *DEMO_INSTANCE_NAME, *DEMO_MULTICAST_GROUP_NAME, *DEMO_PROJECT_NAME + ) + }); +pub static DEMO_MULTICAST_GROUP_BY_IP_URL: LazyLock = + LazyLock::new(|| { + "/v1/system/multicast-groups/by-ip/224.0.1.100".to_string() + }); +pub static DEMO_MULTICAST_GROUP_CREATE: LazyLock = + LazyLock::new(|| params::MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: DEMO_MULTICAST_GROUP_NAME.clone(), + description: String::from("demo multicast group"), + }, + multicast_ip: Some("224.0.1.100".parse().unwrap()), + pool: Some(DEMO_MULTICAST_IP_POOL_NAME.clone().into()), + source_ips: Some(Vec::new()), + mvlan: None, + }); +pub static DEMO_MULTICAST_GROUP_UPDATE: LazyLock = + LazyLock::new(|| params::MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: Some("updated description".to_string()), + }, + source_ips: Some(Vec::new()), + mvlan: None, + }); +pub static DEMO_MULTICAST_MEMBER_ADD: LazyLock< + params::MulticastGroupMemberAdd, +> = LazyLock::new(|| params::MulticastGroupMemberAdd { + instance: DEMO_INSTANCE_NAME.clone().into(), +}); + +// Switch port settings and status pub const DEMO_SWITCH_PORT_URL: &'static str = "/v1/system/hardware/switch-port"; pub static DEMO_SWITCH_PORT_SETTINGS_APPLY_URL: LazyLock = @@ -954,6 +1025,43 @@ pub static DEMO_IP_POOL_UPDATE: LazyLock = description: Some(String::from("a new IP pool")), }, }); + +// Multicast IP Pool +pub static DEMO_MULTICAST_IP_POOL_NAME: LazyLock = + LazyLock::new(|| "default-multicast".parse().unwrap()); +pub static DEMO_MULTICAST_IP_POOL_CREATE: LazyLock = + LazyLock::new(|| { + params::IpPoolCreate::new_multicast( + IdentityMetadataCreateParams { + name: DEMO_MULTICAST_IP_POOL_NAME.clone(), + description: String::from("a multicast IP pool"), + }, + IpVersion::V4, + ) + }); +pub static DEMO_MULTICAST_IP_POOL_URL: LazyLock = LazyLock::new(|| { + format!("/v1/system/ip-pools/{}", *DEMO_MULTICAST_IP_POOL_NAME) +}); +pub static DEMO_MULTICAST_IP_POOL_SILOS_URL: LazyLock = + LazyLock::new(|| format!("{}/silos", *DEMO_MULTICAST_IP_POOL_URL)); +pub static DEMO_MULTICAST_IP_POOL_RANGE: LazyLock = + LazyLock::new(|| { + IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 0, 1, 100), + Ipv4Addr::new(224, 0, 1, 200), + ) + .unwrap(), + ) + }); +pub static DEMO_MULTICAST_IP_POOL_RANGES_ADD_URL: LazyLock = + LazyLock::new(|| format!("{}/ranges/add", *DEMO_MULTICAST_IP_POOL_URL)); +pub static DEMO_MULTICAST_IP_POOL_SILOS_BODY: LazyLock = + LazyLock::new(|| params::IpPoolLinkSilo { + silo: NameOrId::Id(DEFAULT_SILO.identity().id), + is_default: false, // multicast pool is not the default + }); + pub static DEMO_IP_POOL_SILOS_URL: LazyLock = LazyLock::new(|| format!("{}/silos", *DEMO_IP_POOL_URL)); pub static DEMO_IP_POOL_SILOS_BODY: LazyLock = @@ -971,8 +1079,8 @@ pub static DEMO_IP_POOL_SILO_UPDATE_BODY: LazyLock = pub static DEMO_IP_POOL_RANGE: LazyLock = LazyLock::new(|| { IpRange::V4( Ipv4Range::new( - std::net::Ipv4Addr::new(10, 0, 0, 0), - std::net::Ipv4Addr::new(10, 0, 0, 255), + Ipv4Addr::new(10, 0, 0, 0), + Ipv4Addr::new(10, 0, 0, 255), ) .unwrap(), ) @@ -1062,7 +1170,7 @@ pub static DEMO_FLOAT_IP_CREATE: LazyLock = name: DEMO_FLOAT_IP_NAME.clone(), description: String::from("a new IP pool"), }, - ip: Some(std::net::Ipv4Addr::new(10, 0, 0, 141).into()), + ip: Some(Ipv4Addr::new(10, 0, 0, 141).into()), pool: None, }); @@ -3050,6 +3158,86 @@ pub static VERIFY_ENDPOINTS: LazyLock> = LazyLock::new( unprivileged_access: UnprivilegedAccess::None, allowed_methods: vec![AllowedMethod::Get], }, + + // Multicast groups + + // Multicast groups are fleet-scoped and allow any authenticated user + // (including unprivileged) to create, read, modify, and delete groups + // to enable cross-project and cross-silo multicast communication. + VerifyEndpoint { + url: &MULTICAST_GROUPS_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::Full, + allowed_methods: vec![ + AllowedMethod::Get, + AllowedMethod::Post( + serde_json::to_value(&*DEMO_MULTICAST_GROUP_CREATE).unwrap(), + ), + ], + }, + VerifyEndpoint { + url: &DEMO_MULTICAST_GROUP_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::Full, + allowed_methods: vec![ + AllowedMethod::Get, + AllowedMethod::Put( + serde_json::to_value(&*DEMO_MULTICAST_GROUP_UPDATE).unwrap(), + ), + AllowedMethod::Delete, + ], + }, + // Multicast member endpoints have asymmetric authorization: + // - GET operations only check fleet-scoped group Read permission (accessible to all authenticated users) + // - POST/DELETE operations require project-scoped instance Modify permission + // + // When unprivileged users try to add/remove instances from inaccessible projects, + // the instance lookup fails with 404 (not 403) to prevent information leakage. + // This is correct security behavior. + // + // Configuration: Protected + ReadOnly + // - GET: Not tested for unprivileged access here (verified in authorization.rs tests) + // - POST/DELETE: Correctly expect 404 when instance is in inaccessible project + VerifyEndpoint { + url: &DEMO_MULTICAST_GROUP_MEMBERS_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::ReadOnly, + allowed_methods: vec![ + AllowedMethod::GetVolatile, + AllowedMethod::Post( + serde_json::to_value(&*DEMO_MULTICAST_MEMBER_ADD).unwrap(), + ), + ], + }, + VerifyEndpoint { + url: &DEMO_MULTICAST_GROUP_MEMBER_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::ReadOnly, + allowed_methods: vec![ + AllowedMethod::Delete, + ], + }, + VerifyEndpoint { + url: &DEMO_INSTANCE_MULTICAST_GROUPS_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::ReadOnly, + allowed_methods: vec![AllowedMethod::GetVolatile], + }, + VerifyEndpoint { + url: &DEMO_INSTANCE_MULTICAST_GROUP_JOIN_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Put(serde_json::to_value(()).unwrap()), + AllowedMethod::Delete, + ], + }, + VerifyEndpoint { + url: &DEMO_MULTICAST_GROUP_BY_IP_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![AllowedMethod::Get], + }, // Audit log VerifyEndpoint { url: &AUDIT_LOG_URL, diff --git a/nexus/tests/integration_tests/external_ips.rs b/nexus/tests/integration_tests/external_ips.rs index b8183eb9ad9..83594fe8184 100644 --- a/nexus/tests/integration_tests/external_ips.rs +++ b/nexus/tests/integration_tests/external_ips.rs @@ -1044,6 +1044,7 @@ async fn test_floating_ip_attach_fail_between_projects( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, StatusCode::BAD_REQUEST, ) @@ -1377,6 +1378,7 @@ async fn instance_for_external_ips( start, Default::default(), None, + Vec::new(), ) .await } diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 6986222680d..228569faa4a 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -249,6 +249,7 @@ async fn test_create_instance_with_bad_hostname_impl( ssh_public_keys: None, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let mut body: serde_json::Value = serde_json::from_str(&serde_json::to_string(¶ms).unwrap()).unwrap(); @@ -357,6 +358,7 @@ async fn test_instances_create_reboot_halt( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), })) .expect_status(Some(StatusCode::BAD_REQUEST)), ) @@ -768,6 +770,7 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { true, Default::default(), None, + Vec::new(), ) .await; let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); @@ -942,6 +945,7 @@ async fn test_instance_migrate_v2p_and_routes( true, Default::default(), None, + Vec::new(), ) .await; let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); @@ -1157,6 +1161,7 @@ async fn test_instance_migration_compatible_cpu_platforms( true, Default::default(), Some(InstanceCpuPlatform::AmdMilan), + Vec::new(), ) .await; let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); @@ -1346,6 +1351,7 @@ async fn test_instance_migration_incompatible_cpu_platforms( true, Default::default(), Some(InstanceCpuPlatform::AmdTurin), + Vec::new(), ) .await; let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); @@ -1423,6 +1429,7 @@ async fn test_instance_migration_unknown_sled_type( true, Default::default(), None, + Vec::new(), ) .await; let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); @@ -1680,6 +1687,7 @@ async fn test_instance_failed_when_on_expunged_sled( true, Some(auto_restart), None, + Vec::new(), ) .await; let instance_id = @@ -2030,6 +2038,7 @@ async fn make_forgotten_instance( true, Some(auto_restart), None, + Vec::new(), ) .await; let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); @@ -2260,6 +2269,7 @@ async fn test_instance_metrics_with_migration( true, Default::default(), None, + Vec::new(), ) .await; let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); @@ -2428,6 +2438,7 @@ async fn test_instances_create_stopped_start( boot_disk: None, cpu_platform: None, start: false, + multicast_groups: Vec::new(), auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), }, @@ -2615,6 +2626,7 @@ async fn test_instance_using_image_from_other_project_fails( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), })) .expect_status(Some(StatusCode::BAD_REQUEST)), ) @@ -2683,6 +2695,7 @@ async fn test_instance_create_saga_removes_instance_database_record( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let response = NexusRequest::objects_post( client, @@ -2715,6 +2728,7 @@ async fn test_instance_create_saga_removes_instance_database_record( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let _ = NexusRequest::objects_post( client, @@ -2811,6 +2825,7 @@ async fn test_instance_with_single_explicit_ip_address( auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let response = NexusRequest::objects_post( client, @@ -2932,6 +2947,7 @@ async fn test_instance_with_new_custom_network_interfaces( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let response = NexusRequest::objects_post( client, @@ -3051,6 +3067,7 @@ async fn test_instance_create_delete_network_interface( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let response = NexusRequest::objects_post( client, @@ -3306,6 +3323,7 @@ async fn test_instance_update_network_interfaces( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let response = NexusRequest::objects_post( client, @@ -3673,6 +3691,7 @@ async fn test_instance_update_network_interface_transit_ips( false, Default::default(), None, + Vec::new(), ) .await; @@ -3943,6 +3962,7 @@ async fn test_instance_with_multiple_nics_unwinds_completely( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = RequestBuilder::new(client, http::Method::POST, &get_instances_url()) @@ -4017,6 +4037,7 @@ async fn test_attach_one_disk_to_instance(cptestctx: &ControlPlaneTestContext) { start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = @@ -4109,6 +4130,7 @@ async fn test_instance_create_attach_disks( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = @@ -4208,6 +4230,7 @@ async fn test_instance_create_attach_disks_undo( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = @@ -4293,6 +4316,7 @@ async fn test_attach_eight_disks_to_instance( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = @@ -4382,6 +4406,7 @@ async fn test_cannot_attach_nine_disks_to_instance( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let url_instances = format!("/v1/instances?project={}", project_name); @@ -4485,6 +4510,7 @@ async fn test_cannot_attach_faulted_disks(cptestctx: &ControlPlaneTestContext) { start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = @@ -4577,6 +4603,7 @@ async fn test_disks_detached_when_instance_destroyed( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = @@ -4676,6 +4703,7 @@ async fn test_disks_detached_when_instance_destroyed( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = @@ -4761,6 +4789,7 @@ async fn test_duplicate_disk_attach_requests_ok( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = @@ -4806,6 +4835,7 @@ async fn test_duplicate_disk_attach_requests_ok( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = @@ -4862,6 +4892,7 @@ async fn test_cannot_detach_boot_disk(cptestctx: &ControlPlaneTestContext) { cpu_platform: None, disks: Vec::new(), start: false, + multicast_groups: Vec::new(), auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), }; @@ -4926,6 +4957,7 @@ async fn test_cannot_detach_boot_disk(cptestctx: &ControlPlaneTestContext) { cpu_platform: Nullable(None), ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), + multicast_groups: None, }, ) .await; @@ -5000,6 +5032,7 @@ async fn test_updating_running_instance_boot_disk_is_conflict( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = @@ -5031,6 +5064,7 @@ async fn test_updating_running_instance_boot_disk_is_conflict( cpu_platform: Nullable(None), ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), + multicast_groups: None, }, http::StatusCode::CONFLICT, ) @@ -5052,6 +5086,7 @@ async fn test_updating_running_instance_boot_disk_is_conflict( cpu_platform: Nullable(None), ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), + multicast_groups: None, }, ) .await; @@ -5075,6 +5110,7 @@ async fn test_updating_missing_instance_is_not_found( cpu_platform: Nullable(None), ncpus: InstanceCpuCount::try_from(0).unwrap(), memory: ByteCount::from_gibibytes_u32(0), + multicast_groups: None, }, http::StatusCode::NOT_FOUND, ) @@ -5168,6 +5204,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { // Start out with None auto_restart_policy: None, anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = @@ -5194,6 +5231,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { cpu_platform: Nullable(None), ncpus: initial_ncpus, memory: initial_memory, + multicast_groups: None, }; // Resizing the instance immediately will error; the instance is running. @@ -5203,6 +5241,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { ncpus: new_ncpus, memory: new_memory, + multicast_groups: None, ..base_update.clone() }, StatusCode::CONFLICT, @@ -5224,6 +5263,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { ncpus: new_ncpus, memory: new_memory, + multicast_groups: None, ..base_update.clone() }, ) @@ -5238,6 +5278,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { ncpus: initial_ncpus, memory: new_memory, + multicast_groups: None, ..base_update.clone() }, ) @@ -5251,6 +5292,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { ncpus: initial_ncpus, memory: initial_memory, + multicast_groups: None, ..base_update.clone() }, ) @@ -5268,6 +5310,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { ncpus: InstanceCpuCount(MAX_VCPU_PER_INSTANCE + 1), memory: instance.memory, + multicast_groups: None, ..base_update.clone() }, StatusCode::BAD_REQUEST, @@ -5288,6 +5331,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { ncpus: instance.ncpus, memory: ByteCount::from_mebibytes_u32(0), + multicast_groups: None, ..base_update.clone() }, StatusCode::BAD_REQUEST, @@ -5303,6 +5347,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { ncpus: instance.ncpus, memory: ByteCount::try_from(MAX_MEMORY_BYTES_PER_INSTANCE - 1) .unwrap(), + multicast_groups: None, ..base_update.clone() }, StatusCode::BAD_REQUEST, @@ -5320,6 +5365,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { memory: ByteCount::from_mebibytes_u32( (max_mib + 1024).try_into().unwrap(), ), + multicast_groups: None, ..base_update.clone() }, StatusCode::BAD_REQUEST, @@ -5339,6 +5385,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { ncpus: new_ncpus, memory: new_memory, + multicast_groups: None, ..base_update.clone() }, StatusCode::NOT_FOUND, @@ -5375,6 +5422,7 @@ async fn test_auto_restart_policy_can_be_changed( // Start out with None auto_restart_policy: None, anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = @@ -5402,6 +5450,7 @@ async fn test_auto_restart_policy_can_be_changed( cpu_platform: Nullable(None), ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), + multicast_groups: None, }), ) .await; @@ -5448,6 +5497,7 @@ async fn test_cpu_platform_can_be_changed(cptestctx: &ControlPlaneTestContext) { start: false, auto_restart_policy: None, anti_affinity_groups: Vec::new(), + multicast_groups: vec![], }; let builder = @@ -5475,6 +5525,7 @@ async fn test_cpu_platform_can_be_changed(cptestctx: &ControlPlaneTestContext) { cpu_platform: Nullable(cpu_platform), ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), + multicast_groups: None, }), ) .await; @@ -5543,6 +5594,7 @@ async fn test_boot_disk_can_be_changed(cptestctx: &ControlPlaneTestContext) { start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = @@ -5570,6 +5622,7 @@ async fn test_boot_disk_can_be_changed(cptestctx: &ControlPlaneTestContext) { cpu_platform: Nullable(None), ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), + multicast_groups: None, }, ) .await; @@ -5615,6 +5668,7 @@ async fn test_boot_disk_must_be_attached(cptestctx: &ControlPlaneTestContext) { start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let builder = @@ -5639,6 +5693,7 @@ async fn test_boot_disk_must_be_attached(cptestctx: &ControlPlaneTestContext) { cpu_platform: Nullable(None), ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), + multicast_groups: None, }, http::StatusCode::CONFLICT, ) @@ -5673,6 +5728,7 @@ async fn test_boot_disk_must_be_attached(cptestctx: &ControlPlaneTestContext) { cpu_platform: Nullable(None), ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), + multicast_groups: None, }, ) .await; @@ -5710,6 +5766,7 @@ async fn test_instances_memory_rejected_less_than_min_memory_size( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let error = NexusRequest::new( @@ -5764,6 +5821,7 @@ async fn test_instances_memory_not_divisible_by_min_memory_size( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let error = NexusRequest::new( @@ -5818,6 +5876,7 @@ async fn test_instances_memory_greater_than_max_size( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let error = NexusRequest::new( @@ -5916,6 +5975,7 @@ async fn test_instance_create_with_anti_affinity_groups( memory: ByteCount::from_gibibytes_u32(4), ssh_public_keys: None, start: false, + multicast_groups: Vec::new(), hostname: instance_name.parse().unwrap(), user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -5986,6 +6046,7 @@ async fn test_instance_create_with_duplicate_anti_affinity_groups( memory: ByteCount::from_gibibytes_u32(4), ssh_public_keys: None, start: false, + multicast_groups: Vec::new(), hostname: instance_name.parse().unwrap(), user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -6057,6 +6118,7 @@ async fn test_instance_create_with_anti_affinity_groups_that_do_not_exist( memory: ByteCount::from_gibibytes_u32(4), ssh_public_keys: None, start: false, + multicast_groups: Vec::new(), hostname: instance_name.parse().unwrap(), user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -6141,6 +6203,7 @@ async fn test_instance_create_with_ssh_keys( // By default should transfer all profile keys ssh_public_keys: None, start: false, + multicast_groups: Vec::new(), hostname: instance_name.parse().unwrap(), user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -6191,6 +6254,7 @@ async fn test_instance_create_with_ssh_keys( // Should only transfer the first key ssh_public_keys: Some(vec![user_keys[0].identity.name.clone().into()]), start: false, + multicast_groups: Vec::new(), hostname: instance_name.parse().unwrap(), user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -6240,6 +6304,7 @@ async fn test_instance_create_with_ssh_keys( // Should transfer no keys ssh_public_keys: Some(vec![]), start: false, + multicast_groups: Vec::new(), hostname: instance_name.parse().unwrap(), user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -6390,6 +6455,7 @@ async fn test_cannot_provision_instance_beyond_cpu_capacity( boot_disk: None, cpu_platform: None, start: false, + multicast_groups: Vec::new(), auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), }; @@ -6450,6 +6516,7 @@ async fn test_cannot_provision_instance_beyond_cpu_limit( boot_disk: None, cpu_platform: None, start: false, + multicast_groups: Vec::new(), auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), }; @@ -6507,6 +6574,7 @@ async fn test_cannot_provision_instance_beyond_ram_capacity( boot_disk: None, cpu_platform: None, start: false, + multicast_groups: Vec::new(), auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), }; @@ -6612,6 +6680,7 @@ async fn test_can_start_instance_with_cpu_platform( start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: vec![], }; let url_instances = get_instances_url(); @@ -6652,6 +6721,7 @@ async fn test_can_start_instance_with_cpu_platform( cpu_platform: Nullable(Some(InstanceCpuPlatform::AmdTurin)), ncpus: InstanceCpuCount::try_from(1).unwrap(), memory: ByteCount::from_gibibytes_u32(4), + multicast_groups: None, }, ) .await; @@ -6725,6 +6795,7 @@ async fn test_cannot_start_instance_with_unsatisfiable_cpu_platform( start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: vec![], }; let url_instances = get_instances_url(); @@ -7022,6 +7093,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let error = object_create_error( client, @@ -7093,6 +7165,7 @@ async fn test_instance_ephemeral_ip_from_orphan_pool( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; // instance create 404s @@ -7158,6 +7231,7 @@ async fn test_instance_ephemeral_ip_no_default_pool_error( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let url = format!("/v1/instances?project={}", PROJECT_NAME); @@ -7225,6 +7299,7 @@ async fn test_instance_attach_several_external_ips( true, Default::default(), None, + Vec::new(), ) .await; @@ -7300,6 +7375,7 @@ async fn test_instance_allow_only_one_ephemeral_ip( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let error = object_create_error( client, @@ -7332,6 +7408,7 @@ async fn create_instance_with_pool( true, Default::default(), None, + Vec::new(), ) .await } @@ -7437,6 +7514,7 @@ async fn test_instance_create_in_silo(cptestctx: &ControlPlaneTestContext) { start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; let url_instances = format!("/v1/instances?project={}", PROJECT_NAME); NexusRequest::objects_post(client, &url_instances, &instance_params) @@ -7615,6 +7693,7 @@ async fn test_instance_create_with_cross_project_subnet( vec![if0_params], ), external_ips: vec![], + multicast_groups: vec![], disks: vec![], boot_disk: None, cpu_platform: None, @@ -7743,6 +7822,7 @@ async fn test_silo_limited_collaborator_cross_project_subnet( vec![if_same_project], ), external_ips: vec![], + multicast_groups: vec![], disks: vec![], boot_disk: None, cpu_platform: None, @@ -7809,6 +7889,7 @@ async fn test_silo_limited_collaborator_cross_project_subnet( vec![if_cross_project], ), external_ips: vec![], + multicast_groups: vec![], disks: vec![], boot_disk: None, cpu_platform: None, diff --git a/nexus/tests/integration_tests/internet_gateway.rs b/nexus/tests/integration_tests/internet_gateway.rs index 4c9550640a8..57e044ddb57 100644 --- a/nexus/tests/integration_tests/internet_gateway.rs +++ b/nexus/tests/integration_tests/internet_gateway.rs @@ -388,6 +388,7 @@ async fn test_setup(c: &ClientTestContext) { true, None, None, + Vec::new(), ) .await; diff --git a/nexus/tests/integration_tests/inventory_matching.rs b/nexus/tests/integration_tests/inventory_matching.rs new file mode 100644 index 00000000000..3faab873551 --- /dev/null +++ b/nexus/tests/integration_tests/inventory_matching.rs @@ -0,0 +1,116 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Test that inventory matching works correctly between sled agents and SPs. + +use nexus_db_queries::context::OpContext; +use nexus_test_utils_macros::nexus_test; +use nexus_types::identity::Asset; + +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + +/// Test that simulated sleds and SPs have matching baseboard identifiers +/// so inventory can properly map sleds to switch ports. +#[nexus_test] +async fn test_sled_sp_inventory_matching(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Get the latest inventory collection + let inventory = datastore + .inventory_get_latest_collection(&opctx) + .await + .expect("failed to get inventory collection") + .expect("no inventory collection available"); + + // Get all sleds + let sleds = datastore + .sled_list_all_batched( + &opctx, + nexus_types::deployment::SledFilter::InService, + ) + .await + .expect("failed to list sleds"); + + // Verify we have at least one sled + assert!(!sleds.is_empty(), "expected at least one sled"); + + // Track whether we found matching SP data for any sled + let mut found_matching_sp = false; + + // Check each sled for matching SP data + for sled in sleds { + let sled_serial = sled.serial_number(); + let sled_part = sled.part_number(); + + // Look for matching SP in inventory + let sp_match = inventory.sps.iter().find(|(bb, _sp)| { + bb.serial_number == sled_serial && bb.part_number == sled_part + }); + + if let Some((_bb, sp)) = sp_match { + found_matching_sp = true; + + // Verify the SP has a valid sp_slot for switch port mapping + assert!( + sp.sp_slot < 32, + "SP slot {} is unexpectedly large", + sp.sp_slot + ); + } else { + eprintln!( + "No exact SP match found for sled {} (serial={sled_serial}, part={sled_part})", + sled.id() + ); + + // Check if there's a serial-only match (indicating part number mismatch) + let serial_only_match = inventory + .sps + .iter() + .find(|(bb, _sp)| bb.serial_number == sled_serial); + + if let Some((bb, _sp)) = serial_only_match { + eprintln!( + "Found SP with same serial but different part: SP has part={}", + bb.part_number + ); + } + } + } + + assert!(found_matching_sp, "No sleds had matching SP data in inventory"); +} + +/// Verify that the baseboard model is correctly set to "i86pc" for simulated +/// hardware. +#[nexus_test] +async fn test_simulated_baseboard_model(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Get all sleds + let sleds = datastore + .sled_list_all_batched( + &opctx, + nexus_types::deployment::SledFilter::InService, + ) + .await + .expect("failed to list sleds"); + + for sled in sleds { + // Simulated sleds should use "i86pc" as the model to match SP simulator + assert_eq!( + sled.part_number(), + "i86pc", + "Sled {} has incorrect model '{}', expected 'i86pc'", + sled.id(), + sled.part_number() + ); + } +} diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index e8eec6b9fdf..586764bb2a2 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -1195,6 +1195,42 @@ async fn test_ip_pool_range_rejects_v6(cptestctx: &ControlPlaneTestContext) { assert_eq!(error.message, "IPv6 ranges are not allowed yet"); } +// Support for IPv6 multicast ranges not enabled yet. +// Delete this test when we support IPv6 multicast ranges. +#[nexus_test] +async fn test_ip_pool_multicast_range_rejects_v6( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + // Create a multicast pool + let pool_params = IpPoolCreate::new_multicast( + IdentityMetadataCreateParams { + name: "mcast-p0".parse().unwrap(), + description: "Multicast pool for IPv6 rejection test".to_string(), + }, + IpVersion::V4, + ); + object_create::<_, IpPool>(client, "/v1/system/ip-pools", &pool_params) + .await; + + // Try to add an IPv6 multicast range (ff30::/12 is SSM) + let range = IpRange::V6( + Ipv6Range::new( + std::net::Ipv6Addr::new(0xff30, 0, 0, 0, 0, 0, 0, 10), + std::net::Ipv6Addr::new(0xff30, 0, 0, 0, 0, 0, 0, 20), + ) + .unwrap(), + ); + + let add_url = "/v1/system/ip-pools/mcast-p0/ranges/add"; + let error = + object_create_error(client, add_url, &range, StatusCode::BAD_REQUEST) + .await; + + assert_eq!(error.message, "IPv6 ranges are not allowed yet"); +} + #[nexus_test] async fn test_ip_pool_range_pagination(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 4980d57ef4d..01fbcb0bc5d 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -27,9 +27,11 @@ mod images; mod initialization; mod instances; mod internet_gateway; +mod inventory_matching; mod ip_pools; mod metrics; mod metrics_querier; +mod multicast; mod oximeter; mod pantry; mod password_login; diff --git a/nexus/tests/integration_tests/multicast/api.rs b/nexus/tests/integration_tests/multicast/api.rs new file mode 100644 index 00000000000..bcda0eafe3a --- /dev/null +++ b/nexus/tests/integration_tests/multicast/api.rs @@ -0,0 +1,329 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +// +// Copyright 2025 Oxide Computer Company + +//! Tests for multicast API behavior and functionality. +//! +//! This module tests various aspects of multicast group membership APIs, including: +//! +//! - Stopped instance handling +//! - Idempotency behavior +//! - API consistency + +use http::{Method, StatusCode}; +use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; +use nexus_test_utils::resource_helpers::{ + create_default_ip_pool, create_project, object_create, +}; +use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::params::{ + InstanceCreate, InstanceNetworkInterfaceAttachment, MulticastGroupCreate, + MulticastGroupMemberAdd, +}; +use nexus_types::external_api::views::{MulticastGroup, MulticastGroupMember}; +use omicron_common::api::external::{ + ByteCount, IdentityMetadataCreateParams, Instance, InstanceCpuCount, + NameOrId, +}; + +use super::*; + +/// Test various multicast API behaviors and scenarios. +#[nexus_test] +async fn test_multicast_api_behavior(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + let project_name = "api-edge-cases-project"; + let group_name = "api-edge-cases-group"; + + // Setup in parallel + let (_, _, mcast_pool) = ops::join3( + create_project(client, project_name), + create_default_ip_pool(client), + create_multicast_ip_pool(client, "api-edge-pool"), + ) + .await; + + let group_url = "/v1/multicast-groups".to_string(); + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: group_name.parse().unwrap(), + description: "Group for API edge case testing".to_string(), + }, + multicast_ip: None, // Test with auto-assigned IP + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + object_create::<_, MulticastGroup>(client, &group_url, &group_params).await; + wait_for_group_active(client, group_name).await; + + // Case: Stopped instances (all APIs should handle stopped instances + // identically) + + // API Path: Instance created stopped with multicast group + let instance1_params = InstanceCreate { + identity: IdentityMetadataCreateParams { + name: "edge-case-1".parse().unwrap(), + description: "Stopped instance with multicast group".to_string(), + }, + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(1), + hostname: "edge-case-1".parse().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + multicast_groups: vec![NameOrId::Name(group_name.parse().unwrap())], + disks: vec![], + boot_disk: None, + start: false, // Create stopped + cpu_platform: None, + auto_restart_policy: Default::default(), + anti_affinity_groups: Vec::new(), + }; + + let instance_url = format!("/v1/instances?project={project_name}"); + let instance1: Instance = + object_create(client, &instance_url, &instance1_params).await; + + // API Path: Instance created stopped, then added to group + let instance2_params = InstanceCreate { + identity: IdentityMetadataCreateParams { + name: "edge-case-2".parse().unwrap(), + description: "Stopped instance, group added later".to_string(), + }, + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(1), + hostname: "edge-case-2".parse().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + multicast_groups: vec![], // No groups at creation + disks: vec![], + boot_disk: None, + start: false, // Create stopped + cpu_platform: None, + auto_restart_policy: Default::default(), + anti_affinity_groups: Vec::new(), + }; + let instance2: Instance = + object_create(client, &instance_url, &instance2_params).await; + + // Add to group after creation + let member_add_url = format!( + "{}?project={project_name}", + mcast_group_members_url(group_name) + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name("edge-case-2".parse().unwrap()), + }; + object_create::<_, MulticastGroupMember>( + client, + &member_add_url, + &member_params, + ) + .await; + + // Verify both stopped instances are in identical "Left" state + // + // State transition: "Joining" → "Left" (reconciler detects invalid instance) + // Create saga creates member with state="Joining", sled_id=NULL + // Reconciler runs, sees instance_valid=false (stopped/no VMM) + // Reconciler immediately transitions "Joining"→"Left" (no DPD programming) + // + // This verifies the reconciler correctly handles stopped instances without + // requiring inventory/DPD readiness (unlike running instances). + for (i, instance) in [&instance1, &instance2].iter().enumerate() { + wait_for_member_state( + cptestctx, + group_name, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Left, + ) + .await; + + assert_eq!( + instance.runtime.run_state, + InstanceState::Stopped, + "Instance {} should be stopped", + i + 1 + ); + } + + // Case: Idempotency test (adding already-existing member should be + // safe for all APIs) + + // Try to add instance1 again using group member add (should be idempotent) + let duplicate_member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name("edge-case-1".parse().unwrap()), + }; + + // This should succeed idempotently + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &member_add_url) + .body(Some(&duplicate_member_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Idempotent member add should succeed"); + + // Final verification: member count should still be 2 (no duplicates) + let final_members = list_multicast_group_members(client, group_name).await; + assert_eq!( + final_members.len(), + 2, + "Should have exactly 2 members (no duplicates from idempotency test)" + ); + + // Case: UUID-based API access (without project names) + // Since multicast groups are fleet-scoped, UUID-based operations should work + // without requiring project parameter + + let instance3_params = InstanceCreate { + identity: IdentityMetadataCreateParams { + name: "edge-case-3".parse().unwrap(), + description: "Instance for UUID-based access".to_string(), + }, + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(1), + hostname: "edge-case-3".parse().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + multicast_groups: vec![], + disks: vec![], + boot_disk: None, + start: false, // Create stopped to test UUID operations on non-running instances + cpu_platform: None, + auto_restart_policy: Default::default(), + anti_affinity_groups: Vec::new(), + }; + + let (instance3, group) = ops::join2( + object_create::<_, Instance>(client, &instance_url, &instance3_params), + get_multicast_group(client, group_name), + ) + .await; + let instance_uuid = instance3.identity.id; + let group_uuid = group.identity.id; + + // Join using UUIDs (no project parameter) + let join_url_uuid = + format!("/v1/instances/{instance_uuid}/multicast-groups/{group_uuid}"); + let member_uuid: MulticastGroupMember = NexusRequest::new( + RequestBuilder::new(client, Method::PUT, &join_url_uuid) + .body(Some(&())) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("UUID-based join should succeed") + .parsed_body() + .expect( + "Failed to parse MulticastGroupMember from UUID-based join response", + ); + + assert_eq!(member_uuid.instance_id, instance_uuid); + // Instance is stopped (start: false), so reconciler transitions "Joining"→"Left" + wait_for_member_state( + cptestctx, + group_name, + instance_uuid, + nexus_db_model::MulticastGroupMemberState::Left, + ) + .await; + + // Verify membership via UUID-based instance group list (no project parameter) + let instance_groups_url = + format!("/v1/instances/{instance_uuid}/multicast-groups"); + let uuid_memberships: Vec = + NexusRequest::iter_collection_authn( + client, + &instance_groups_url, + "", + None, + ) + .await + .expect("UUID-based instance group list should succeed") + .all_items; + + assert_eq!( + uuid_memberships.len(), + 1, + "UUID-based list should show 1 membership" + ); + assert_eq!(uuid_memberships[0].instance_id, instance_uuid); + + // Verify UUID-based group member listing + let group_members_url_uuid = + mcast_group_members_url(&group_uuid.to_string()); + let uuid_based_members: Vec = + NexusRequest::iter_collection_authn( + client, + &group_members_url_uuid, + "", + None, + ) + .await + .expect("UUID-based group member list should succeed") + .all_items; + + assert_eq!( + uuid_based_members.len(), + 3, + "Should show 3 members via UUID-based group list" + ); + + // Leave using UUIDs (no project parameter) + NexusRequest::new( + RequestBuilder::new(client, Method::DELETE, &join_url_uuid) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("UUID-based leave should succeed"); + + wait_for_member_count(client, group_name, 2).await; + + // Verify instance3 was actually removed + let final_members_after_leave = + list_multicast_group_members(client, group_name).await; + assert!( + !final_members_after_leave + .iter() + .any(|m| m.instance_id == instance_uuid), + "instance3 should not be in the group after UUID-based leave" + ); + + // Negative test: invalid UUID should fail with 400 Bad Request + let invalid_join_url = + format!("/v1/instances/not-a-uuid/multicast-groups/{group_uuid}"); + NexusRequest::new( + RequestBuilder::new(client, Method::PUT, &invalid_join_url) + .body(Some(&())) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Invalid UUID should return 400 Bad Request"); + + // Cleanup - instance3 has already left the group above + cleanup_instances( + cptestctx, + client, + project_name, + &["edge-case-1", "edge-case-2", "edge-case-3"], + ) + .await; + cleanup_multicast_groups(client, &[group_name]).await; +} diff --git a/nexus/tests/integration_tests/multicast/authorization.rs b/nexus/tests/integration_tests/multicast/authorization.rs new file mode 100644 index 00000000000..5247e4fe2a6 --- /dev/null +++ b/nexus/tests/integration_tests/multicast/authorization.rs @@ -0,0 +1,1094 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Authorization tests for fleet-scoped multicast groups. +//! +//! Multicast groups are fleet-scoped resources with explicit permissions granted +//! to any authenticated user in the fleet (defined in nexus/auth/src/authz/omicron.polar). +//! +//! **Authorization model (intentionally deviates from standard Oxide IAM):** +//! - **Read/List**: Any authenticated user can read and list multicast groups in their fleet +//! (no Fleet::Viewer role required) +//! - **Create**: Any authenticated user can create multicast groups in their fleet +//! (no Fleet::Admin role required) +//! - **Modify/Delete**: Any authenticated user can modify and delete multicast groups in their fleet +//! (no Fleet::Admin role required) +//! - **Member operations**: Users can add/remove instances they own (requires instance permissions) +//! +//! This enables cross-project and cross-silo multicast communication. Users +//! with ONLY project-level roles (e.g., Project::Collaborator) and NO +//! silo-level roles can still access multicast groups, because the only +//! requirement is being an authenticated user in a silo within the fleet. + +use std::net::{IpAddr, Ipv4Addr}; + +use http::StatusCode; + +use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; +use nexus_test_utils::resource_helpers::test_params::UserPassword; +use nexus_test_utils::resource_helpers::{ + create_default_ip_pool, create_instance, create_local_user, create_project, + grant_iam, link_ip_pool, object_get, +}; +use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::params::{ + InstanceCreate, InstanceNetworkInterfaceAttachment, MulticastGroupCreate, + MulticastGroupMemberAdd, MulticastGroupUpdate, ProjectCreate, +}; +use nexus_types::external_api::shared::{ProjectRole, SiloRole}; +use nexus_types::external_api::views::{ + MulticastGroup, MulticastGroupMember, Silo, +}; +use omicron_common::api::external::{ + ByteCount, Hostname, IdentityMetadataCreateParams, + IdentityMetadataUpdateParams, Instance, InstanceCpuCount, NameOrId, +}; +use omicron_common::vlan::VlanID; + +use super::*; + +/// Test that silo users can create and modify multicast groups in their fleet. +/// This verifies the authorization model where any authenticated silo user +/// can manage multicast groups. +#[nexus_test] +async fn test_silo_users_can_create_and_modify_multicast_groups( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + create_default_ip_pool(&client).await; + + // Get current silo info + let silo_url = format!("/v1/system/silos/{}", cptestctx.silo_name); + let silo: Silo = object_get(client, &silo_url).await; + + // Create multicast IP pool (as fleet admin) + create_multicast_ip_pool(&client, "mcast-pool").await; + link_ip_pool(&client, "mcast-pool", &silo.identity.id, false).await; + + // Create a regular silo user (collaborator) + let user = create_local_user( + client, + &silo, + &"test-user".parse().unwrap(), + UserPassword::LoginDisallowed, + ) + .await; + + // Grant collaborator role to the user + grant_iam( + client, + &silo_url, + SiloRole::Collaborator, + user.id, + AuthnMode::PrivilegedUser, + ) + .await; + + // Create multicast group as the silo user - should SUCCEED + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 101)); + let group_url = "/v1/multicast-groups"; + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "user-group".parse().unwrap(), + description: "Group created by silo user".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name("mcast-pool".parse().unwrap())), + mvlan: None, + }; + + // Silo user can create multicast group + let group: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &group_url) + .body(Some(&group_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::SiloUser(user.id)) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + assert_eq!(group.identity.name.as_str(), "user-group"); + assert_eq!(group.multicast_ip, multicast_ip); + + // Wait for group to become active before updating + wait_for_group_active(client, "user-group").await; + + // Silo user can also modify the multicast group they created + let update_url = mcast_group_url(&group.identity.name.to_string()); + let update_params = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: Some(group.identity.name.clone()), + description: Some("Updated description by silo user".to_string()), + }, + source_ips: None, + mvlan: None, + }; + + let updated_group: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, http::Method::PUT, &update_url) + .body(Some(&update_params)) + .expect_status(Some(StatusCode::OK)), + ) + .authn_as(AuthnMode::SiloUser(user.id)) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + assert_eq!( + updated_group.identity.description, + "Updated description by silo user" + ); + + // Fleet admin can also create multicast groups + let admin_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "admin-group".parse().unwrap(), + description: "Group created by fleet admin".to_string(), + }, + multicast_ip: Some(IpAddr::V4(Ipv4Addr::new(224, 0, 1, 102))), + source_ips: None, + pool: Some(NameOrId::Name("mcast-pool".parse().unwrap())), + mvlan: None, + }; + + let admin_group: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &group_url) + .body(Some(&admin_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + assert_eq!(admin_group.identity.name.as_str(), "admin-group"); +} + +/// Test that silo users can attach their own instances to fleet-scoped +/// multicast groups (including groups created by other users or fleet admins). +#[nexus_test] +async fn test_silo_users_can_attach_instances_to_multicast_groups( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + create_default_ip_pool(&client).await; + + // Get current silo info + let silo_url = format!("/v1/system/silos/{}", cptestctx.silo_name); + let silo: Silo = object_get(client, &silo_url).await; + + // Create multicast pool and link to silo + create_multicast_ip_pool(&client, "mcast-pool").await; + link_ip_pool(&client, "default", &silo.identity.id, true).await; + link_ip_pool(&client, "mcast-pool", &silo.identity.id, false).await; + + // Create a regular silo user + let user = create_local_user( + client, + &silo, + &"test-user".parse().unwrap(), + UserPassword::LoginDisallowed, + ) + .await; + + grant_iam( + client, + &silo_url, + SiloRole::Collaborator, + user.id, + AuthnMode::PrivilegedUser, + ) + .await; + + // Create project as the silo user + let project_url = "/v1/projects"; + let project_params = ProjectCreate { + identity: IdentityMetadataCreateParams { + name: "user-project".parse().unwrap(), + description: "Project created by silo user".to_string(), + }, + }; + NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, project_url) + .body(Some(&project_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::SiloUser(user.id)) + .execute() + .await + .unwrap(); + + // Fleet admin creates multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 100)); + let group_url = "/v1/multicast-groups"; + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "shared-group".parse().unwrap(), + description: "Fleet-scoped multicast group".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name("mcast-pool".parse().unwrap())), + mvlan: None, + }; + let group: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &group_url) + .body(Some(&group_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Silo user creates instance in their project + let instance_url = "/v1/instances?project=user-project"; + let instance_params = InstanceCreate { + identity: IdentityMetadataCreateParams { + name: "user-instance".parse().unwrap(), + description: "Instance created by silo user".to_string(), + }, + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(1), + hostname: "user-instance".parse::().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + multicast_groups: vec![], + disks: vec![], + boot_disk: None, + cpu_platform: None, + start: false, + auto_restart_policy: Default::default(), + anti_affinity_groups: Vec::new(), + }; + + let instance: Instance = NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &instance_url) + .body(Some(&instance_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::SiloUser(user.id)) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Silo user can attach their instance to the fleet-scoped multicast group + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Id(instance.identity.id), + }; + let member_add_url = mcast_group_member_add_url( + &group.identity.name.to_string(), + &member_params.instance, + "user-project", + ); + + let member: MulticastGroupMember = NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &member_add_url) + .body(Some(&member_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::SiloUser(user.id)) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + assert_eq!(member.instance_id, instance.identity.id); + assert_eq!(member.multicast_group_id, group.identity.id); +} + +/// Test that authenticated silo users can read multicast groups without +/// requiring Fleet::Viewer role (verifies the Polar policy for read permission). +#[nexus_test] +async fn test_authenticated_users_can_read_multicast_groups( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + create_default_ip_pool(&client).await; + + // Get current silo info + let silo_url = format!("/v1/system/silos/{}", cptestctx.silo_name); + let silo: Silo = object_get(client, &silo_url).await; + + // Create multicast pool and link to silo + create_multicast_ip_pool(&client, "mcast-pool").await; + link_ip_pool(&client, "mcast-pool", &silo.identity.id, false).await; + + // Create a regular silo user with NO special roles (not even viewer) + let user = create_local_user( + client, + &silo, + &"regular-user".parse().unwrap(), + UserPassword::LoginDisallowed, + ) + .await; + + // Fleet admin creates a multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 100)); + let group_url = "/v1/multicast-groups"; + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "readable-group".parse().unwrap(), + description: "Group that should be readable by all silo users" + .to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name("mcast-pool".parse().unwrap())), + mvlan: Some(VlanID::new(100).unwrap()), + }; + let group: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &group_url) + .body(Some(&group_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Wait for group to become active + wait_for_group_active(client, "readable-group").await; + + // Regular silo user (with no Fleet roles) can GET the multicast group + let get_group_url = mcast_group_url(&group.identity.name.to_string()); + let read_group: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, http::Method::GET, &get_group_url) + .expect_status(Some(StatusCode::OK)), + ) + .authn_as(AuthnMode::SiloUser(user.id)) + .execute() + .await + .expect("Silo user should be able to read multicast group") + .parsed_body() + .unwrap(); + + assert_eq!(read_group.identity.id, group.identity.id); + assert_eq!(read_group.identity.name, group.identity.name); + assert_eq!(read_group.multicast_ip, multicast_ip); + assert_eq!(read_group.mvlan, Some(VlanID::new(100).unwrap())); + + // Regular silo user can also LIST multicast groups + let list_groups: Vec = NexusRequest::iter_collection_authn( + client, + "/v1/multicast-groups", + "", + None, + ) + .await + .expect("Silo user should be able to list multicast groups") + .all_items; + + assert!( + list_groups.iter().any(|g| g.identity.id == group.identity.id), + "Multicast group should appear in list for silo user" + ); +} + +/// Test that instances from different projects can attach to the same +/// fleet-scoped multicast group (no cross-project isolation). +#[nexus_test] +async fn test_cross_project_instance_attachment_allowed( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + // Create pools and projects + let (_, _project1, _project2, mcast_pool) = ops::join4( + create_default_ip_pool(&client), + create_project(client, "project1"), + create_project(client, "project2"), + create_multicast_ip_pool(&client, "mcast-pool"), + ) + .await; + + // Fleet admin creates a multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 100)); + let group_url = "/v1/multicast-groups"; + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "cross-project-group".parse().unwrap(), + description: "Fleet-scoped group for cross-project test" + .to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + let group: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &group_url) + .body(Some(&group_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Create instances in both projects + let instance1 = create_instance(client, "project1", "instance1").await; + let instance2 = create_instance(client, "project2", "instance2").await; + + // Attach instance from project1 to the group + let member_params1 = MulticastGroupMemberAdd { + instance: NameOrId::Id(instance1.identity.id), + }; + let member_add_url1 = mcast_group_member_add_url( + &group.identity.name.to_string(), + &member_params1.instance, + "project1", + ); + let member1: MulticastGroupMember = + object_create(client, &member_add_url1, &member_params1).await; + + // Attach instance from project2 to the SAME group - should succeed + let member_params2 = MulticastGroupMemberAdd { + instance: NameOrId::Id(instance2.identity.id), + }; + let member_add_url2 = mcast_group_member_add_url( + &group.identity.name.to_string(), + &member_params2.instance, + "project2", + ); + let member2: MulticastGroupMember = + object_create(client, &member_add_url2, &member_params2).await; + + // Both instances should be members of the same group + assert_eq!(member1.multicast_group_id, group.identity.id); + assert_eq!(member2.multicast_group_id, group.identity.id); + assert_eq!(member1.instance_id, instance1.identity.id); + assert_eq!(member2.instance_id, instance2.identity.id); +} + +/// Verify that unauthenticated users cannot list multicast groups without +/// proper authentication for the list endpoint. +#[nexus_test] +async fn test_unauthenticated_cannot_list_multicast_groups( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + create_default_ip_pool(&client).await; + + // Get current silo info + let silo_url = format!("/v1/system/silos/{}", cptestctx.silo_name); + let silo: Silo = object_get(client, &silo_url).await; + + // Create multicast pool and link to silo + create_multicast_ip_pool(&client, "mcast-pool").await; + link_ip_pool(&client, "mcast-pool", &silo.identity.id, false).await; + + // Fleet admin creates a multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 150)); + let group_url = "/v1/multicast-groups"; + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "test-group".parse().unwrap(), + description: "Group for auth test".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name("mcast-pool".parse().unwrap())), + mvlan: None, + }; + + NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &group_url) + .body(Some(&group_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Try to list multicast groups without authentication - should get 401 Unauthorized + RequestBuilder::new(client, http::Method::GET, &group_url) + .expect_status(Some(StatusCode::UNAUTHORIZED)) + .execute() + .await + .expect("Expected 401 Unauthorized for unauthenticated list request"); +} + +/// Verify that unauthenticated users cannot access member operations. +/// This tests that member endpoints (list/add/remove) require authentication. +#[nexus_test] +async fn test_unauthenticated_cannot_access_member_operations( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + create_default_ip_pool(&client).await; + + // Get current silo info + let silo_url = format!("/v1/system/silos/{}", cptestctx.silo_name); + let silo: Silo = object_get(client, &silo_url).await; + + // Create multicast pool and link to silo + create_multicast_ip_pool(&client, "mcast-pool").await; + link_ip_pool(&client, "default", &silo.identity.id, true).await; + link_ip_pool(&client, "mcast-pool", &silo.identity.id, false).await; + + // Create project and instance + let project = create_project(client, "test-project").await; + let instance = + create_instance(client, "test-project", "test-instance").await; + + // Fleet admin creates multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 150)); + let group_url = "/v1/multicast-groups"; + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "auth-test-group".parse().unwrap(), + description: "Group for auth test".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name("mcast-pool".parse().unwrap())), + mvlan: None, + }; + let group: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &group_url) + .body(Some(&group_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Try to LIST members without authentication - should get 401 + let members_url = mcast_group_members_url(&group.identity.name.to_string()); + RequestBuilder::new(client, http::Method::GET, &members_url) + .expect_status(Some(StatusCode::UNAUTHORIZED)) + .execute() + .await + .expect("Expected 401 Unauthorized for unauthenticated list members request"); + + // Try to ADD member without authentication - should get 401 + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Id(instance.identity.id), + }; + let member_add_url = mcast_group_member_add_url( + &group.identity.name.to_string(), + &member_params.instance, + project.identity.name.as_str(), + ); + RequestBuilder::new(client, http::Method::POST, &member_add_url) + .body(Some(&member_params)) + .expect_status(Some(StatusCode::UNAUTHORIZED)) + .execute() + .await + .expect( + "Expected 401 Unauthorized for unauthenticated add member request", + ); + + // Try to REMOVE member without authentication - should get 401 + let member_delete_url = format!( + "{}/{}?project={}", + mcast_group_members_url(&group.identity.name.to_string()), + instance.identity.name, + project.identity.name.as_str() + ); + RequestBuilder::new(client, http::Method::DELETE, &member_delete_url) + .expect_status(Some(StatusCode::UNAUTHORIZED)) + .execute() + .await + .expect("Expected 401 Unauthorized for unauthenticated remove member request"); +} + +/// Test the asymmetric authorization behavior: unprivileged users CAN list +/// group members even though they don't have access to the member instances. +/// +/// This validates that listing members only requires Read permission on the +/// multicast group (fleet-scoped), NOT permissions on individual instances. +#[nexus_test] +async fn test_unprivileged_users_can_list_group_members( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + create_default_ip_pool(&client).await; + + // Get current silo info + let silo_url = format!("/v1/system/silos/{}", cptestctx.silo_name); + let silo: Silo = object_get(client, &silo_url).await; + + // Create multicast pool and link to silo + create_multicast_ip_pool(&client, "mcast-pool").await; + link_ip_pool(&client, "default", &silo.identity.id, true).await; + link_ip_pool(&client, "mcast-pool", &silo.identity.id, false).await; + + // Create two regular silo users + let privileged_user = create_local_user( + client, + &silo, + &"privileged-user".parse().unwrap(), + UserPassword::LoginDisallowed, + ) + .await; + + let unprivileged_user = create_local_user( + client, + &silo, + &"unprivileged-user".parse().unwrap(), + UserPassword::LoginDisallowed, + ) + .await; + + // Grant Silo Collaborator only to privileged user so they can create projects + grant_iam( + client, + &silo_url, + SiloRole::Collaborator, + privileged_user.id, + AuthnMode::PrivilegedUser, + ) + .await; + + // Privileged user creates their own project + let project_url = "/v1/projects"; + let project_params = ProjectCreate { + identity: IdentityMetadataCreateParams { + name: "privileged-project".parse().unwrap(), + description: "Project owned by privileged user".to_string(), + }, + }; + NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, project_url) + .body(Some(&project_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::SiloUser(privileged_user.id)) + .execute() + .await + .unwrap(); + + // Fleet admin creates multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 200)); + let group_url = "/v1/multicast-groups"; + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "asymmetric-test-group".parse().unwrap(), + description: "Group for testing asymmetric authorization" + .to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name("mcast-pool".parse().unwrap())), + mvlan: None, + }; + let group: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &group_url) + .body(Some(&group_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Privileged user creates instance in their project + let instance_url = "/v1/instances?project=privileged-project"; + let instance_params = InstanceCreate { + identity: IdentityMetadataCreateParams { + name: "privileged-instance".parse().unwrap(), + description: "Instance in privileged user's project".to_string(), + }, + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(1), + hostname: "privileged-instance".parse::().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + multicast_groups: vec![], + disks: vec![], + boot_disk: None, + cpu_platform: None, + start: false, + auto_restart_policy: Default::default(), + anti_affinity_groups: Vec::new(), + }; + + let instance: Instance = NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &instance_url) + .body(Some(&instance_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::SiloUser(privileged_user.id)) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Privileged user adds their instance to the group + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Id(instance.identity.id), + }; + let member_add_url = mcast_group_member_add_url( + &group.identity.name.to_string(), + &member_params.instance, + "privileged-project", + ); + + NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &member_add_url) + .body(Some(&member_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::SiloUser(privileged_user.id)) + .execute() + .await + .unwrap(); + + // Unprivileged user (who does NOT have access to + // privileged-project or privileged-instance) CAN list the group members + let members_url = mcast_group_members_url(&group.identity.name.to_string()); + let members_response: dropshot::ResultsPage = + NexusRequest::object_get(client, &members_url) + .authn_as(AuthnMode::SiloUser(unprivileged_user.id)) + .execute() + .await + .expect( + "Unprivileged user should be able to list group members (asymmetric authorization)", + ) + .parsed_body() + .unwrap(); + + let members = members_response.items; + + // Verify unprivileged user can see the member that they don't own + assert_eq!( + members.len(), + 1, + "Should see 1 member in the group (even though unprivileged user doesn't own it)" + ); + assert_eq!( + members[0].instance_id, instance.identity.id, + "Should see the privileged user's instance ID in member list" + ); + assert_eq!( + members[0].multicast_group_id, group.identity.id, + "Member should be associated with the correct group" + ); + + // Also verify privileged user can list too (sanity check) + let privileged_response: dropshot::ResultsPage = + NexusRequest::object_get(client, &members_url) + .authn_as(AuthnMode::SiloUser(privileged_user.id)) + .execute() + .await + .expect("Privileged user should also be able to list members") + .parsed_body() + .unwrap(); + + let privileged_members = privileged_response.items; + assert_eq!(privileged_members.len(), 1); + assert_eq!(privileged_members[0].instance_id, instance.identity.id); + assert_eq!(privileged_members[0].multicast_group_id, group.identity.id); + + // Unprivileged user should get 404 (NOT 403) when trying to add/remove + // instances from inaccessible projects + + // Try to ADD the instance (should get 404 because unprivileged user + // can't see the instance, not 403 which would leak its existence) + NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &member_add_url) + .body(Some(&member_params)) + .expect_status(Some(StatusCode::NOT_FOUND)), + ) + .authn_as(AuthnMode::SiloUser(unprivileged_user.id)) + .execute() + .await + .expect( + "Should get 404 when trying to add instance from inaccessible project", + ); + + // Try to REMOVE the instance (should get 404, not 403) + let member_delete_url = format!( + "{}/{}?project=privileged-project", + mcast_group_members_url(&group.identity.name.to_string()), + instance.identity.name + ); + + NexusRequest::new( + RequestBuilder::new(client, http::Method::DELETE, &member_delete_url) + .expect_status(Some(StatusCode::NOT_FOUND)), + ) + .authn_as(AuthnMode::SiloUser(unprivileged_user.id)) + .execute() + .await + .expect("Should get 404 when trying to remove instance from inaccessible project"); + + // Verify the member still exists (unauthorized operations didn't modify anything) + let final_members: dropshot::ResultsPage = + NexusRequest::object_get(client, &members_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + assert_eq!( + final_members.items.len(), + 1, + "Member should still exist after failed unauthorized operations" + ); +} + +/// Test that authenticated silo users with ONLY project-level roles (no +/// silo-level roles) can still access multicast groups fleet-wide. This verifies +/// that being an authenticated SiloUser is sufficient - multicast group access +/// does not depend on having any specific silo-level or project-level roles. +#[nexus_test] +async fn test_project_only_users_can_access_multicast_groups( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + // create_default_ip_pool already links "default" pool to the DEFAULT_SILO + create_default_ip_pool(&client).await; + + // Create multicast pool (fleet-scoped, no per-silo linking needed) + create_multicast_ip_pool(&client, "mcast-pool").await; + + // Get the DEFAULT silo (same silo as the privileged test user) + // This ensures that when we create a project using AuthnMode::PrivilegedUser, + // it will be created in the same silo as our project_user + use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; + let silo_url = format!("/v1/system/silos/{}", DEFAULT_SILO.identity().name); + let silo: Silo = object_get(client, &silo_url).await; + + // Create a user with NO silo-level roles (only project-level roles) + let project_user = create_local_user( + client, + &silo, + &"project-only-user".parse().unwrap(), + UserPassword::LoginDisallowed, + ) + .await; + + // Create a project using AuthnMode::PrivilegedUser, which creates it in DEFAULT_SILO + // (the same silo where we created project_user above) + let project = create_project(client, "project-only").await; + + // Grant ONLY project-level role (Project::Collaborator), NO silo roles + // Users with project-level roles can work within that project even without + // silo-level roles, as long as they reference the project by ID + let project_url = format!("/v1/projects/{}", project.identity.name); + grant_iam( + client, + &project_url, + ProjectRole::Collaborator, + project_user.id, + AuthnMode::PrivilegedUser, + ) + .await; + + // Fleet admin creates a multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 250)); + let group_url = "/v1/multicast-groups"; + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "project-user-test".parse().unwrap(), + description: "Group for testing project-only user access" + .to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name("mcast-pool".parse().unwrap())), + mvlan: None, + }; + let group: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &group_url) + .body(Some(&group_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Project-only user CAN LIST multicast groups (no silo roles needed) + let list_response: dropshot::ResultsPage = + NexusRequest::object_get(client, "/v1/multicast-groups") + .authn_as(AuthnMode::SiloUser(project_user.id)) + .execute() + .await + .expect("Project-only user should be able to list multicast groups") + .parsed_body() + .unwrap(); + + let list_groups = list_response.items; + + assert!( + list_groups.iter().any(|g| g.identity.id == group.identity.id), + "Project-only user should see multicast groups in list" + ); + + // Project-only user CAN READ individual multicast group + let get_group_url = mcast_group_url(&group.identity.name.to_string()); + let read_group: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, http::Method::GET, &get_group_url) + .expect_status(Some(StatusCode::OK)), + ) + .authn_as(AuthnMode::SiloUser(project_user.id)) + .execute() + .await + .expect("Project-only user should be able to read multicast group") + .parsed_body() + .unwrap(); + + assert_eq!(read_group.identity.id, group.identity.id); + + // Project-only user CAN CREATE a multicast group + let user_group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "created-by-project-user".parse().unwrap(), + description: "Group created by project-only user".to_string(), + }, + multicast_ip: Some(IpAddr::V4(Ipv4Addr::new(224, 0, 1, 251))), + source_ips: None, + pool: Some(NameOrId::Name("mcast-pool".parse().unwrap())), + mvlan: None, + }; + + let user_created_group: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &group_url) + .body(Some(&user_group_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::SiloUser(project_user.id)) + .execute() + .await + .expect("Project-only user should be able to create multicast group") + .parsed_body() + .unwrap(); + + assert_eq!( + user_created_group.identity.name.as_str(), + "created-by-project-user" + ); + + // Wait for group to become active before modifying + wait_for_group_active(client, "created-by-project-user").await; + + // Project-only user CAN MODIFY multicast groups (including ones they created) + let update_url = + mcast_group_url(&user_created_group.identity.name.to_string()); + let update_params = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: Some(user_created_group.identity.name.clone()), + description: Some("Updated by project-only user".to_string()), + }, + source_ips: None, + mvlan: None, + }; + + let updated_group: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, http::Method::PUT, &update_url) + .body(Some(&update_params)) + .expect_status(Some(StatusCode::OK)), + ) + .authn_as(AuthnMode::SiloUser(project_user.id)) + .execute() + .await + .expect("Project-only user should be able to modify multicast group") + .parsed_body() + .unwrap(); + + assert_eq!( + updated_group.identity.description, + "Updated by project-only user" + ); + + // Project-only user CAN CREATE an instance in the project (Project::Collaborator) + // Must use project ID (not name) since user has no silo-level roles + let instance_name = "project-user-instance"; + let instances_url = + format!("/v1/instances?project={}", project.identity.id); + let instance_params = InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: "Instance created by project-only user".to_string(), + }, + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(1), + hostname: instance_name.parse().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + disks: vec![], + boot_disk: None, + cpu_platform: None, + start: false, + auto_restart_policy: Default::default(), + anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), + }; + let instance: Instance = NexusRequest::objects_post( + client, + &instances_url, + &instance_params, + ) + .authn_as(AuthnMode::SiloUser(project_user.id)) + .execute() + .await + .expect( + "Project-only user should be able to create an instance in the project", + ) + .parsed_body() + .expect("Should parse created instance"); + + // Project-only user CAN ATTACH the instance they own to a fleet-scoped group + let member_add_url = format!( + "{}?project={}", + mcast_group_members_url(&group.identity.name.to_string()), + project.identity.name + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + let member: MulticastGroupMember = NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &member_add_url) + .body(Some(&member_params)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::SiloUser(project_user.id)) + .execute() + .await + .expect("Project-only user should be able to attach their instance to the group") + .parsed_body() + .unwrap(); + + // Verify the member was created successfully + assert_eq!(member.instance_id, instance.identity.id); + assert_eq!(member.multicast_group_id, group.identity.id); +} diff --git a/nexus/tests/integration_tests/multicast/cache_invalidation.rs b/nexus/tests/integration_tests/multicast/cache_invalidation.rs new file mode 100644 index 00000000000..e50da7d2539 --- /dev/null +++ b/nexus/tests/integration_tests/multicast/cache_invalidation.rs @@ -0,0 +1,615 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Integration test for multicast reconciler cache invalidation. + +use std::net::IpAddr; + +use gateway_client::types::{PowerState, RotState, SpState}; +use nexus_db_queries::context::OpContext; +use nexus_test_utils::resource_helpers::object_create; +use nexus_test_utils::resource_helpers::{ + create_default_ip_pool, create_project, +}; +use nexus_test_utils_macros::nexus_test; +use nexus_types::deployment::SledFilter; +use nexus_types::external_api::params::MulticastGroupCreate; +use nexus_types::inventory::SpType; +use omicron_common::api::external::IdentityMetadataCreateParams; +use omicron_nexus::Server; +use omicron_nexus::TestInterfaces; +use omicron_uuid_kinds::{GenericUuid, InstanceUuid, MulticastGroupUuid}; + +use super::*; + +/// Test that multicast operations can handle physical sled movement. +/// +/// This test simulates a sled being physically moved to a different rack slot: +/// - Create a multicast group and instance, wait for member to join +/// - Verify the member is programmed on the correct rear port (based on original `sp_slot`) +/// - Insert a new inventory collection with a different `sp_slot` for the same sled +/// - Trigger cache invalidation and reconciler activation +/// - Verify DPD now uses the new rear port matching the new `sp_slot` +#[nexus_test(server = Server)] +async fn test_sled_move_updates_multicast_port_mapping( + cptestctx: &ControlPlaneTestContext, +) { + const PROJECT_NAME: &str = "test-project"; + const GROUP_NAME: &str = "sled-move-test-group"; + const INSTANCE_NAME: &str = "sled-move-test-instance"; + + ensure_multicast_test_ready(cptestctx).await; + + let client = &cptestctx.external_client; + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let log = &cptestctx.logctx.log; + let opctx = OpContext::for_tests(log.clone(), datastore.clone()); + + // Create project and multicast IP pool + create_default_ip_pool(client).await; + create_project(client, PROJECT_NAME).await; + let pool = create_multicast_ip_pool(client, "sled-move-pool").await; + + // Create multicast group + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: GROUP_NAME.parse().unwrap(), + description: "Group for sled move test".to_string(), + }, + multicast_ip: Some("224.0.1.200".parse::().unwrap()), + source_ips: None, + pool: Some(omicron_common::api::external::NameOrId::Name( + pool.identity.name.clone(), + )), + mvlan: None, + }; + + object_create::<_, nexus_types::external_api::views::MulticastGroup>( + client, + &super::mcast_groups_url(), + ¶ms, + ) + .await; + + // Create instance and attach to multicast group + let instance = instance_for_multicast_groups( + cptestctx, + PROJECT_NAME, + INSTANCE_NAME, + true, + &[GROUP_NAME], + ) + .await; + + let instance_uuid = InstanceUuid::from_untyped_uuid(instance.identity.id); + + // Wait for member to join + wait_for_member_state( + cptestctx, + GROUP_NAME, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + // Verify initial port mapping (based on current inventory `sp_slot`) + verify_inventory_based_port_mapping(cptestctx, &instance_uuid) + .await + .expect("initial port mapping verification"); + + // Assert that the member is in Joined state + let members_before = list_multicast_group_members(client, GROUP_NAME).await; + assert_eq!(members_before.len(), 1, "should have exactly one member"); + assert_eq!( + members_before[0].state, "Joined", + "member should be in Joined state before sled move" + ); + + // Get the sled this instance is running on + let sled_id = nexus + .active_instance_info(&instance_uuid, None) + .await + .expect("active_instance_info call succeeds") + .expect("instance should be on a sled") + .sled_id; + + // Get sled baseboard information + let sleds = datastore + .sled_list_all_batched(&opctx, SledFilter::InService) + .await + .expect("list in-service sleds"); + let sled = sleds + .into_iter() + .find(|s| s.id() == sled_id) + .expect("found sled in database"); + + // Get current inventory to see the original sp_slot + let original_inventory = datastore + .inventory_get_latest_collection(&opctx) + .await + .expect("fetch latest inventory collection") + .expect("inventory collection should exist"); + + let original_sp = original_inventory + .sps + .iter() + .find(|(bb, _)| bb.serial_number == sled.serial_number()) + .map(|(_, sp)| sp) + .expect("found SP for sled in original inventory"); + + let original_slot = original_sp.sp_slot; + let sled_serial = sled.serial_number().to_string(); + let sled_part_number = sled.part_number().to_string(); + + // Verify DPD has the original port before the move + let dpd = nexus_test_utils::dpd_client(cptestctx); + let original_port_id = dpd_client::types::PortId::Rear( + dpd_client::types::Rear::try_from(format!("rear{original_slot}")) + .expect("valid rear port string"), + ); + + // Determine a valid target slot by querying DPD's backplane map. + // Prefer a different slot if available; otherwise fall back to the same. + let backplane = + dpd.backplane_map().await.expect("fetch backplane map").into_inner(); + let mut valid_slots: Vec = backplane + .keys() + .filter_map(|k| { + k.strip_prefix("rear").and_then(|s| s.parse::().ok()) + }) + .collect(); + valid_slots.sort_unstable(); + valid_slots.dedup(); + let new_slot = valid_slots + .iter() + .copied() + .find(|s| *s != original_slot) + .unwrap_or(original_slot); + + // Build a new inventory collection with the sled in a different slot + let mut builder = nexus_inventory::CollectionBuilder::new("sled-move-test"); + builder.found_sp_state( + "test-sp", + SpType::Sled, + new_slot, + SpState { + serial_number: sled_serial, + model: sled_part_number, + power_state: PowerState::A0, + revision: 0, + base_mac_address: [0; 6], + hubris_archive_id: "test-hubris".to_string(), + rot: RotState::CommunicationFailed { + message: "test-rot-state".to_string(), + }, + }, + ); + + let new_collection = builder.build(); + + // Insert the new inventory collection + datastore + .inventory_insert_collection(&opctx, &new_collection) + .await + .expect("insert new inventory collection"); + + // Invalidate multicast caches to force refresh from new inventory + nexus.invalidate_multicast_caches(); + + // Wait for reconciler to process the cache invalidation and refresh mappings + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Verify that DPD now uses the new rear port (matching new `sp_slot`) + // This helper reads the latest inventory and asserts DPD has a member + // on rear{`sp_slot`}, so it will verify the new mapping is right + verify_inventory_based_port_mapping(cptestctx, &instance_uuid) + .await + .expect("port mapping should be updated after cache invalidation"); + + // Assert that the member is still in "Joined" state after the move + let members_after = list_multicast_group_members(client, GROUP_NAME).await; + assert_eq!(members_after.len(), 1, "should still have exactly one member"); + assert_eq!( + members_after[0].state, "Joined", + "member should still be in Joined state after sled move" + ); + assert_eq!( + members_after[0].instance_id, instance.identity.id, + "member should still reference the same instance" + ); + + // Verify stale port cleanup: fetch DPD state and ensure old port was removed + let members = datastore + .multicast_group_members_list_by_instance(&opctx, instance_uuid, false) + .await + .expect("list multicast members for instance"); + let member = members + .first() + .expect("instance should have at least one multicast membership"); + + let external_group = datastore + .multicast_group_fetch( + &opctx, + MulticastGroupUuid::from_untyped_uuid(member.external_group_id), + ) + .await + .expect("fetch external multicast group"); + let underlay_group_id = external_group + .underlay_group_id + .expect("external group should have underlay_group_id"); + + let underlay_group = datastore + .underlay_multicast_group_fetch(&opctx, underlay_group_id) + .await + .expect("fetch underlay multicast group"); + + let dpd_client = nexus_test_utils::dpd_client(cptestctx); + let underlay_group_response = dpd_client + .multicast_group_get(&underlay_group.multicast_ip.ip()) + .await + .expect("DPD multicast_group_get succeeds") + .into_inner(); + + let dpd_members = match underlay_group_response { + dpd_client::types::MulticastGroupResponse::Underlay { + members, .. + } => members, + dpd_client::types::MulticastGroupResponse::External { .. } => { + panic!("Expected Underlay group, got External"); + } + }; + + // Verify that the old port membership has been removed (stale port cleanup) + let has_old_port_member = dpd_members.iter().any(|m| { + matches!(m.direction, dpd_client::types::Direction::Underlay) + && m.port_id == original_port_id + }); + + assert!( + !has_old_port_member, + "Old underlay member with rear{original_slot} should have been removed after sled move" + ); +} + +/// Test that cache TTL expiry automatically refreshes sled-to-port mappings: +/// +/// - Start test server with sled_cache_ttl = 2 seconds +/// - Create multicast group and instance, wait for member to join +/// - Insert new inventory with different `sp_slot` (simulating sled move) +/// - Wait for TTL to expire (sleep 3 seconds) +/// - Activate reconciler (which should refresh cache due to TTL) +/// - Verify DPD uses the new rear port +#[tokio::test] +async fn test_cache_ttl_driven_refresh() { + const PROJECT_NAME: &str = "ttl-test-project"; + const GROUP_NAME: &str = "ttl-test-group"; + const INSTANCE_NAME: &str = "ttl-test-instance"; + + // Load default test config and customize TTLs + let mut config = nexus_test_utils::load_test_config(); + + // Set short cache TTLs for testing (2 seconds for sled cache) + config.pkg.background_tasks.multicast_reconciler.sled_cache_ttl_secs = + chrono::TimeDelta::seconds(2).to_std().unwrap(); + config.pkg.background_tasks.multicast_reconciler.backplane_cache_ttl_secs = + chrono::TimeDelta::seconds(1).to_std().unwrap(); + + // Ensure multicast is enabled + config.pkg.multicast.enabled = true; + + // Start test server with custom config + let cptestctx = + nexus_test_utils::test_setup_with_config::( + "test_cache_ttl_driven_refresh", + &mut config, + omicron_sled_agent::sim::SimMode::Explicit, + None, + 0, + gateway_test_utils::setup::DEFAULT_SP_SIM_CONFIG.into(), + ) + .await; + + ensure_multicast_test_ready(&cptestctx).await; + + // Local handles for DB and opctx + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.clone(), datastore.clone()); + + let client = &cptestctx.external_client; + + // Create project and multicast IP pool + create_default_ip_pool(client).await; + create_project(client, PROJECT_NAME).await; + let pool = create_multicast_ip_pool(client, "ttl-test-pool").await; + + // Create multicast group + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: GROUP_NAME.parse().unwrap(), + description: "Group for TTL refresh test".to_string(), + }, + multicast_ip: Some("224.0.1.210".parse::().unwrap()), + source_ips: None, + pool: Some(omicron_common::api::external::NameOrId::Name( + pool.identity.name.clone(), + )), + mvlan: None, + }; + + object_create::<_, nexus_types::external_api::views::MulticastGroup>( + client, + &super::mcast_groups_url(), + ¶ms, + ) + .await; + + // Create instance and attach to multicast group + let instance = instance_for_multicast_groups( + &cptestctx, + PROJECT_NAME, + INSTANCE_NAME, + true, + &[GROUP_NAME], + ) + .await; + + let instance_uuid = InstanceUuid::from_untyped_uuid(instance.identity.id); + + // Wait for member to join + wait_for_member_state( + &cptestctx, + GROUP_NAME, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + // Verify initial port mapping (this populates the cache) + verify_inventory_based_port_mapping(&cptestctx, &instance_uuid) + .await + .expect("initial port mapping verification"); + + // Get the sled this instance is running on + let sled_id = nexus + .active_instance_info(&instance_uuid, None) + .await + .expect("active_instance_info call succeeds") + .expect("instance should be on a sled") + .sled_id; + + // Get sled baseboard information + let sleds = datastore + .sled_list_all_batched(&opctx, SledFilter::InService) + .await + .expect("list in-service sleds"); + let sled = sleds + .into_iter() + .find(|s| s.id() == sled_id) + .expect("found sled in database"); + + // Get current inventory to see the original sp_slot + let original_inventory = datastore + .inventory_get_latest_collection(&opctx) + .await + .expect("fetch latest inventory collection") + .expect("inventory collection should exist"); + + let original_sp = original_inventory + .sps + .iter() + .find(|(bb, _)| bb.serial_number == sled.serial_number()) + .map(|(_, sp)| sp) + .expect("found SP for sled in original inventory"); + + let original_slot = original_sp.sp_slot; + let sled_serial = sled.serial_number().to_string(); + let sled_part_number = sled.part_number().to_string(); + + // Determine a valid target slot by querying DPD's backplane map. + // Prefer a different slot if available; otherwise fall back to the same. + let dpd = nexus_test_utils::dpd_client(&cptestctx); + let backplane = + dpd.backplane_map().await.expect("fetch backplane map").into_inner(); + let mut valid_slots: Vec = backplane + .keys() + .filter_map(|k| { + k.strip_prefix("rear").and_then(|s| s.parse::().ok()) + }) + .collect(); + valid_slots.sort_unstable(); + valid_slots.dedup(); + let new_slot = valid_slots + .iter() + .copied() + .find(|s| *s != original_slot) + .unwrap_or(original_slot); + + // Build a new inventory collection with the sled in a different slot + let mut builder = + nexus_inventory::CollectionBuilder::new("ttl-refresh-test"); + builder.found_sp_state( + "test-sp", + SpType::Sled, + new_slot, + SpState { + serial_number: sled_serial, + model: sled_part_number, + power_state: PowerState::A0, + revision: 0, + base_mac_address: [0; 6], + hubris_archive_id: "test-hubris".to_string(), + rot: RotState::CommunicationFailed { + message: "test-rot-state".to_string(), + }, + }, + ); + + let new_collection = builder.build(); + + // Insert the new inventory collection + datastore + .inventory_insert_collection(&opctx, &new_collection) + .await + .expect("insert new inventory collection"); + + // Wait for cache TTL to expire (sled_cache_ttl = 2 seconds) + // Sleep for 3 seconds to ensure TTL has expired + tokio::time::sleep(std::time::Duration::from_secs(3)).await; + + wait_for_condition_with_reconciler( + &cptestctx.lockstep_client, + || async { + // Try to verify the inventory-based port mapping + // This will succeed once DPD has been updated with the new rear port + match verify_inventory_based_port_mapping( + &cptestctx, + &instance_uuid, + ) + .await + { + Ok(()) => Ok(()), + Err(_) => { + // Not yet updated, reconciler needs another cycle + Err(CondCheckError::::NotYet) + } + } + }, + &POLL_INTERVAL, + &MULTICAST_OPERATION_TIMEOUT, + ) + .await + .expect("DPD should be updated with new rear port after TTL expiry"); + + cptestctx.teardown().await; +} + +/// Test that backplane cache TTL expiry triggers automatic refresh from DPD. +/// +/// This test verifies that the backplane map cache expires independently from +/// the sled mapping cache and continues to work correctly after TTL expiry: +/// +/// - Start test server with backplane_cache_ttl = 1 second (shorter than sled cache) +/// - Create multicast group and instance, wait for member to join (populates both caches) +/// - Verify initial port mapping works +/// - Wait for backplane TTL to expire (sleep 2 seconds) +/// - Trigger reconciler (which refreshes expired backplane cache from DPD) +/// - Verify port mapping still works (confirms cache refresh succeeded) +#[tokio::test] +async fn test_backplane_cache_ttl_expiry() { + const PROJECT_NAME: &str = "backplane-ttl-project"; + const GROUP_NAME: &str = "backplane-ttl-group"; + const INSTANCE_NAME: &str = "backplane-ttl-instance"; + + // Load default test config and customize TTLs + let mut config = nexus_test_utils::load_test_config(); + + // Set backplane cache TTL to 1 second (shorter than sled cache to test independently) + config.pkg.background_tasks.multicast_reconciler.backplane_cache_ttl_secs = + chrono::TimeDelta::seconds(1).to_std().unwrap(); + // Keep sled cache TTL longer to ensure we're testing backplane cache expiry + config.pkg.background_tasks.multicast_reconciler.sled_cache_ttl_secs = + chrono::TimeDelta::seconds(10).to_std().unwrap(); + + // Ensure multicast is enabled + config.pkg.multicast.enabled = true; + + // Start test server with custom config + let cptestctx = + nexus_test_utils::test_setup_with_config::( + "test_backplane_cache_ttl_expiry", + &mut config, + omicron_sled_agent::sim::SimMode::Explicit, + None, + 0, + gateway_test_utils::setup::DEFAULT_SP_SIM_CONFIG.into(), + ) + .await; + + ensure_multicast_test_ready(&cptestctx).await; + + let client = &cptestctx.external_client; + + // Create project and multicast IP pool + create_default_ip_pool(client).await; + create_project(client, PROJECT_NAME).await; + let pool = create_multicast_ip_pool(client, "backplane-ttl-pool").await; + + // Create multicast group + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: GROUP_NAME.parse().unwrap(), + description: "Group for backplane TTL test".to_string(), + }, + multicast_ip: Some("224.0.1.230".parse::().unwrap()), + source_ips: None, + pool: Some(omicron_common::api::external::NameOrId::Name( + pool.identity.name.clone(), + )), + mvlan: None, + }; + + object_create::<_, nexus_types::external_api::views::MulticastGroup>( + client, + &super::mcast_groups_url(), + ¶ms, + ) + .await; + + // Create instance and attach to multicast group + let instance = instance_for_multicast_groups( + &cptestctx, + PROJECT_NAME, + INSTANCE_NAME, + true, + &[GROUP_NAME], + ) + .await; + + let instance_uuid = InstanceUuid::from_untyped_uuid(instance.identity.id); + + // Wait for member to join (this populates both caches) + wait_for_member_state( + &cptestctx, + GROUP_NAME, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + // Verify initial port mapping (confirms both caches are populated) + verify_inventory_based_port_mapping(&cptestctx, &instance_uuid) + .await + .expect("initial port mapping verification"); + + // Wait for backplane cache TTL to expire (1 second) but not sled cache (10 seconds) + // Sleep for 2 seconds to ensure backplane TTL has expired + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + + // Force cache access by triggering reconciler + // This will cause the reconciler to check backplane cache, find it expired, + // and refresh from DPD. The sled cache should still be valid. + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Verify member is still on the right port after backplane cache refresh + verify_inventory_based_port_mapping(&cptestctx, &instance_uuid) + .await + .expect("port mapping after backplane cache TTL expiry"); + + // Verify member is still in "Joined" state + let members = list_multicast_group_members(client, GROUP_NAME).await; + assert_eq!(members.len(), 1, "should still have exactly one member"); + assert_eq!( + members[0].state, "Joined", + "member should remain in Joined state after backplane cache refresh" + ); + assert_eq!( + members[0].instance_id, instance.identity.id, + "member should still reference the same instance" + ); + + cptestctx.teardown().await; +} diff --git a/nexus/tests/integration_tests/multicast/enablement.rs b/nexus/tests/integration_tests/multicast/enablement.rs new file mode 100644 index 00000000000..d8cf90d2440 --- /dev/null +++ b/nexus/tests/integration_tests/multicast/enablement.rs @@ -0,0 +1,237 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tests for multicast enablement functionality. +//! +//! TODO: Remove once we have full multicast support in PROD. + +use std::net::IpAddr; + +use gateway_test_utils::setup::DEFAULT_SP_SIM_CONFIG; +use nexus_test_utils::resource_helpers::{ + create_default_ip_pool, create_project, object_create, object_get, +}; +use nexus_test_utils::{load_test_config, test_setup_with_config}; +use nexus_types::external_api::params::MulticastGroupCreate; +use nexus_types::external_api::views::MulticastGroup; +use omicron_common::api::external::{ + IdentityMetadataCreateParams, Instance, InstanceState, NameOrId, +}; +use omicron_sled_agent::sim; +use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; + +use super::*; +use crate::integration_tests::instances::{ + instance_simulate, instance_wait_for_state, +}; + +const PROJECT_NAME: &str = "multicast-enablement-test"; +const GROUP_NAME: &str = "test-group"; + +/// Test that when multicast is disabled, instance lifecycle operations +/// and group attachment APIs skip multicast operations but complete successfully, +/// and no multicast members are ever created. +#[tokio::test] +async fn test_multicast_enablement() { + // Create custom config with multicast disabled (simulating PROD, for now) + let mut config = load_test_config(); + config.pkg.multicast.enabled = false; + + let cptestctx = test_setup_with_config::( + "test_multicast_enablement", + &mut config, + sim::SimMode::Explicit, + None, + 0, + DEFAULT_SP_SIM_CONFIG.into(), + ) + .await; + + let client = &cptestctx.external_client; + + // Set up project and multicast infrastructure + create_default_ip_pool(&client).await; + create_project(client, PROJECT_NAME).await; + let _pool = create_multicast_ip_pool(client, "test-pool").await; + + // Create a multicast group + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: GROUP_NAME.parse().unwrap(), + description: "Test group for enablement testing".to_string(), + }, + multicast_ip: Some("224.0.1.100".parse::().unwrap()), + source_ips: None, + pool: Some(NameOrId::Name("test-pool".parse().unwrap())), + mvlan: None, + }; + + let group_url = "/v1/multicast-groups".to_string(); + object_create::<_, MulticastGroup>(client, &group_url, &group_params).await; + + // Create instance with multicast groups specified + // This should succeed even with multicast disabled + let instance = instance_for_multicast_groups( + &cptestctx, + PROJECT_NAME, + "test-instance-lifecycle", + false, // don't start initially + &[GROUP_NAME], + ) + .await; + + // Verify instance was created successfully + assert_eq!(instance.identity.name, "test-instance-lifecycle"); + + // Verify NO multicast members were created (since multicast is disabled) + let members = list_multicast_group_members(client, GROUP_NAME).await; + assert_eq!( + members.len(), + 0, + "No multicast members should be created when disabled" + ); + + // Start the instance - this should also succeed + let start_url = format!( + "/v1/instances/test-instance-lifecycle/start?project={PROJECT_NAME}" + ); + nexus_test_utils::http_testing::NexusRequest::new( + nexus_test_utils::http_testing::RequestBuilder::new( + client, + http::Method::POST, + &start_url, + ) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(http::StatusCode::ACCEPTED)), + ) + .authn_as(nexus_test_utils::http_testing::AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Instance start should succeed even with multicast disabled"); + + // Simulate the instance to complete the start transition + let get_url_for_start_sim = + format!("/v1/instances/test-instance-lifecycle?project={PROJECT_NAME}"); + let instance_for_start_sim: Instance = + object_get(client, &get_url_for_start_sim).await; + let instance_id_for_start_sim = + InstanceUuid::from_untyped_uuid(instance_for_start_sim.identity.id); + instance_simulate( + &cptestctx.server.server_context().nexus, + &instance_id_for_start_sim, + ) + .await; + + // Still no multicast members should exist + let members = list_multicast_group_members(client, GROUP_NAME).await; + assert_eq!( + members.len(), + 0, + "No multicast members should be created during start when disabled" + ); + + // Stop the instance - this should also succeed + let stop_url = format!( + "/v1/instances/test-instance-lifecycle/stop?project={PROJECT_NAME}" + ); + nexus_test_utils::http_testing::NexusRequest::new( + nexus_test_utils::http_testing::RequestBuilder::new( + client, + http::Method::POST, + &stop_url, + ) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(http::StatusCode::ACCEPTED)), + ) + .authn_as(nexus_test_utils::http_testing::AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Instance stop should succeed even with multicast disabled"); + + let get_url_for_sim = + format!("/v1/instances/test-instance-lifecycle?project={PROJECT_NAME}"); + + let instance_for_sim: Instance = object_get(client, &get_url_for_sim).await; + let instance_id_for_sim = + InstanceUuid::from_untyped_uuid(instance_for_sim.identity.id); + // Simulate the instance to complete the stop transition + instance_simulate( + &cptestctx.server.server_context().nexus, + &instance_id_for_sim, + ) + .await; + + // Still no multicast members should exist + let members = list_multicast_group_members(client, GROUP_NAME).await; + assert_eq!( + members.len(), + 0, + "No multicast members should be created during stop when disabled" + ); + + // Wait for instance to be fully stopped before attempting deletion + let get_url = + format!("/v1/instances/test-instance-lifecycle?project={PROJECT_NAME}"); + let stopped_instance: Instance = object_get(client, &get_url).await; + let instance_id = + InstanceUuid::from_untyped_uuid(stopped_instance.identity.id); + + // Wait for the instance to be stopped + instance_wait_for_state(client, instance_id, InstanceState::Stopped).await; + + // Delete the instance - this should now succeed + let delete_url = + format!("/v1/instances/test-instance-lifecycle?project={PROJECT_NAME}"); + nexus_test_utils::resource_helpers::object_delete(client, &delete_url) + .await; + + // Verify no multicast state was ever created + let members = list_multicast_group_members(client, GROUP_NAME).await; + assert_eq!( + members.len(), + 0, + "No multicast members should exist after instance deletion when disabled" + ); + + // Test API-level group attachment when disabled + + // Create another instance without multicast groups initially + instance_for_multicast_groups( + &cptestctx, + PROJECT_NAME, + "test-instance-api", + false, + &[], // No groups initially + ) + .await; + + // Try to attach to multicast group via API - should succeed + let attach_url = format!( + "/v1/instances/test-instance-api/multicast-groups/{GROUP_NAME}?project={PROJECT_NAME}" + ); + + nexus_test_utils::http_testing::NexusRequest::new( + nexus_test_utils::http_testing::RequestBuilder::new( + client, + http::Method::PUT, + &attach_url, + ) + .expect_status(Some(http::StatusCode::CREATED)), + ) + .authn_as(nexus_test_utils::http_testing::AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Multicast group attach should succeed even when disabled"); + + // Verify that direct API calls DO create member records even when disabled + // (This is correct behavior for experimental APIs - they handle config management) + let members = list_multicast_group_members(client, GROUP_NAME).await; + assert_eq!( + members.len(), + 1, + "Direct API calls should create member records even when disabled (experimental API behavior)" + ); + + cptestctx.teardown().await; +} diff --git a/nexus/tests/integration_tests/multicast/failures.rs b/nexus/tests/integration_tests/multicast/failures.rs new file mode 100644 index 00000000000..a47b4b01991 --- /dev/null +++ b/nexus/tests/integration_tests/multicast/failures.rs @@ -0,0 +1,612 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +// +// Copyright 2025 Oxide Computer Company + +//! Integration tests for multicast group failure scenarios. +//! +//! Tests DPD communication failures, reconciler resilience, and saga rollback +//! scenarios. + +use std::net::{IpAddr, Ipv4Addr}; + +use nexus_test_utils::resource_helpers::{ + create_default_ip_pool, create_instance, create_project, object_create, + object_delete, object_get, objects_list_page_authz, +}; +use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::params::{ + MulticastGroupCreate, MulticastGroupMemberAdd, +}; +use nexus_types::external_api::views::{MulticastGroup, MulticastGroupMember}; +use omicron_common::api::external::{ + IdentityMetadataCreateParams, NameOrId, SwitchLocation, +}; + +use super::*; + +#[nexus_test] +async fn test_multicast_group_dpd_communication_failure_recovery( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + let group_name = "dpd-failure-group"; + let instance_name = "dpd-failure-instance"; + + // Setup: project, pools, group with member - parallelize creation + let (_, _, mcast_pool) = ops::join3( + create_project(&client, project_name), + create_default_ip_pool(&client), + create_multicast_ip_pool(&client, "mcast-pool"), + ) + .await; + + // Create group that will experience DPD communication failure + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 250)); + let group_url = "/v1/multicast-groups".to_string(); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Group for DPD communication failure test".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + // Stop DPD BEFORE reconciler runs to test failure recovery + cptestctx.stop_dendrite(SwitchLocation::Switch0).await; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + // Group should start in "Creating" state + assert_eq!( + created_group.state, "Creating", + "New multicast group should start in Creating state" + ); + + // Add member to make group programmable + create_instance(client, project_name, instance_name).await; + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + let member_add_url = mcast_group_member_add_url( + group_name, + &member_params.instance, + project_name, + ); + object_create::<_, MulticastGroupMember>( + client, + &member_add_url, + &member_params, + ) + .await; + + // Verify group remains in "Creating" state since DPD is unavailable + // The reconciler can't progress the group to Active without DPD communication + let group_get_url = mcast_group_url(group_name); + let fetched_group: MulticastGroup = + object_get(client, &group_get_url).await; + + assert_eq!( + fetched_group.state, "Creating", + "Group should remain in Creating state when DPD is unavailable, found: {}", + fetched_group.state + ); + + // Verify group properties are maintained despite DPD issues + // The group should remain accessible and in "Creating" state since DPD is down + assert_eq!(fetched_group.identity.name, group_name); + assert_eq!(fetched_group.multicast_ip, multicast_ip); + assert_eq!(fetched_group.identity.id, created_group.identity.id); +} + +#[nexus_test] +async fn test_multicast_reconciler_state_consistency_validation( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + + // Create multiple groups to test reconciler batch processing with failures + let (_, _, mcast_pool) = ops::join3( + create_project(&client, project_name), + create_default_ip_pool(&client), + create_multicast_ip_pool(&client, "mcast-pool"), + ) + .await; + + // Stop DPD BEFORE reconciler runs to test failure recovery + cptestctx.stop_dendrite(SwitchLocation::Switch0).await; + + // Create groups that will test different failure scenarios using helper functions + let group_specs = &[ + MulticastGroupForTest { + name: "consistency-group-1", + multicast_ip: IpAddr::V4(Ipv4Addr::new(224, 0, 1, 220)), + description: Some("Group for state consistency test".to_string()), + }, + MulticastGroupForTest { + name: "consistency-group-2", + multicast_ip: IpAddr::V4(Ipv4Addr::new(224, 0, 1, 221)), + description: Some("Group for state consistency test".to_string()), + }, + MulticastGroupForTest { + name: "consistency-group-3", + multicast_ip: IpAddr::V4(Ipv4Addr::new(224, 0, 1, 222)), + description: Some("Group for state consistency test".to_string()), + }, + ]; + + // Create all groups rapidly to stress test reconciler + let created_groups = + create_multicast_groups(client, &mcast_pool, group_specs).await; + let group_names: Vec<&str> = group_specs.iter().map(|g| g.name).collect(); + + // Create instances and attach to groups in parallel (now that double-delete bug is fixed) + let instance_names: Vec<_> = group_names + .iter() + .map(|&group_name| format!("instance-{group_name}")) + .collect(); + + // Create all instances in parallel + let create_futures = instance_names.iter().map(|instance_name| { + create_instance(client, project_name, instance_name) + }); + ops::join_all(create_futures).await; + + // Attach instances to their respective groups in parallel + let attach_futures = instance_names.iter().zip(&group_names).map( + |(instance_name, &group_name)| { + multicast_group_attach( + cptestctx, + project_name, + instance_name, + group_name, + ) + }, + ); + ops::join_all(attach_futures).await; + + // Verify each group is in a consistent state (DPD failure prevents reconciliation) + for (i, group_name) in group_names.iter().enumerate() { + let original_group = &created_groups[i]; + let group_get_url = mcast_group_url(group_name); + let fetched_group: MulticastGroup = + object_get(client, &group_get_url).await; + + // Critical consistency checks + assert_eq!(fetched_group.identity.id, original_group.identity.id); + assert_eq!(fetched_group.multicast_ip, original_group.multicast_ip); + + // State should be Creating since all DPD processes were stopped + // The reconciler cannot activate groups without DPD communication + assert_eq!( + fetched_group.state, "Creating", + "Group {group_name} should remain in Creating state when DPD is unavailable, found: {}", + fetched_group.state + ); + } + + // Clean up all groups - test reconciler's ability to handle batch deletions + cleanup_multicast_groups(client, &group_names).await; +} + +#[nexus_test] +async fn test_dpd_failure_during_creating_state( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + let group_name = "creating-dpd-fail-group"; + let instance_name = "creating-fail-instance"; + + // Setup: project, pools, group with member - parallelize creation + let (_, _, mcast_pool) = ops::join3( + create_project(&client, project_name), + create_default_ip_pool(&client), + create_multicast_ip_pool(&client, "mcast-pool"), + ) + .await; + + // Create group (IP within pool range 224.0.1.10 to 224.0.1.255) + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 210)); + let group_url = "/v1/multicast-groups".to_string(); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Group for DPD failure during Creating state test" + .to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + // Stop DPD before object creation of groups. + cptestctx.stop_dendrite(SwitchLocation::Switch0).await; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + // Group should start in "Creating" state + assert_eq!( + created_group.state, "Creating", + "New multicast group should start in Creating state" + ); + + // Add member to make group programmable + create_instance(client, project_name, instance_name).await; + + let member_add_url = format!( + "/v1/multicast-groups/{group_name}/members?project={project_name}" + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + object_create::<_, MulticastGroupMember>( + client, + &member_add_url, + &member_params, + ) + .await; + + // Stop DPD process BEFORE reconciler runs to test Creating→Creating failure + + // Wait for reconciler to process - tests DPD communication handling during "Creating" state + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Check group state after reconciler processes with DPD unavailable + let group_get_url = mcast_group_url(group_name); + let fetched_group: MulticastGroup = + object_get(client, &group_get_url).await; + + // Critical assertion: Group should remain in "Creating" state since DPD is unavailable + // The reconciler cannot transition Creating→Active without DPD communication + assert_eq!( + fetched_group.state, "Creating", + "Group should remain in Creating state when DPD is unavailable during activation, found: {}", + fetched_group.state + ); + + // Verify group properties are maintained + assert_eq!(fetched_group.identity.name, group_name); + assert_eq!(fetched_group.multicast_ip, multicast_ip); + assert_eq!(fetched_group.identity.id, created_group.identity.id); + + // Test cleanup - should work regardless of DPD state + object_delete(client, &group_get_url).await; + + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; +} + +#[nexus_test] +async fn test_dpd_failure_during_active_state( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + let group_name = "active-dpd-fail-group"; + let instance_name = "active-fail-instance"; + + // Setup: project, pools, group with member + create_project(&client, project_name).await; + create_default_ip_pool(&client).await; + + let mcast_pool = create_multicast_ip_pool(&client, "mcast-pool").await; + + // Create group that will become active first + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 211)); + let group_url = "/v1/multicast-groups".to_string(); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Group for DPD failure during Active state test" + .to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + assert_eq!(created_group.state, "Creating"); + + // Add member to make group programmable + create_instance(client, project_name, instance_name).await; + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + let member_add_url = mcast_group_member_add_url( + group_name, + &member_params.instance, + project_name, + ); + object_create::<_, MulticastGroupMember>( + client, + &member_add_url, + &member_params, + ) + .await; + + // First, let the group activate normally with DPD running + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Verify group is now Active (or at least not Creating anymore) + let group_get_url = mcast_group_url(group_name); + let active_group: MulticastGroup = object_get(client, &group_get_url).await; + + // Group should be Active or at least no longer Creating + assert!( + active_group.state == "Active" || active_group.state == "Creating", + "Group should be Active or Creating before DPD failure test, found: {}", + active_group.state + ); + + // Only proceed with failure test if group successfully activated + if active_group.state == "Active" { + // Now stop DPD while group is "Active" to test "Active" state resilience + cptestctx.stop_dendrite(SwitchLocation::Switch0).await; + + // Wait for reconciler to process - tests DPD communication handling during "Active" state + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Check group state after reconciler processes with DPD unavailable + let fetched_group: MulticastGroup = + object_get(client, &group_get_url).await; + + // Group should remain "Active" - existing "Active" groups shouldn't change state due to DPD failures + // The reconciler should handle temporary DPD communication issues gracefully + assert_eq!( + fetched_group.state, "Active", + "Active group should remain Active despite DPD communication failure, found: {}", + fetched_group.state + ); + + // Verify group properties are maintained + assert_eq!(fetched_group.identity.name, group_name); + assert_eq!(fetched_group.multicast_ip, multicast_ip); + assert_eq!(fetched_group.identity.id, created_group.identity.id); + } + + // Test cleanup - should work regardless of DPD state + object_delete(client, &group_get_url).await; + + // Wait for reconciler to process the deletion + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; +} + +#[nexus_test] +async fn test_dpd_failure_during_deleting_state( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + let group_name = "deleting-dpd-fail-group"; + let instance_name = "deleting-fail-instance"; + + // Setup: project, pools, group with member + create_project(&client, project_name).await; + create_default_ip_pool(&client).await; + + let mcast_pool = create_multicast_ip_pool(&client, "mcast-pool").await; + + // Create group that we'll delete while DPD is down + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 212)); + let group_url = "/v1/multicast-groups".to_string(); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Group for DPD failure during Deleting state test" + .to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + assert_eq!(created_group.state, "Creating"); + + // Add member and let group activate + create_instance(client, project_name, instance_name).await; + let member_add_url = format!( + "/v1/multicast-groups/{group_name}/members?project={project_name}" + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + object_create::<_, MulticastGroupMember>( + client, + &member_add_url, + &member_params, + ) + .await; + + // Wait for group to reach "Active" state before testing deletion + wait_for_group_active(client, group_name).await; + + // Now delete the group to put it in "Deleting" state + let group_delete_url = mcast_group_url(group_name); + object_delete(client, &group_delete_url).await; + + // Stop DPD AFTER deletion but BEFORE reconciler processes deletion + cptestctx.stop_dendrite(SwitchLocation::Switch0).await; + + // The group should now be in "Deleting" state and DPD is down + // Let's check the state before reconciler runs + // Group should be accessible via GET request + + // Try to get group - should be accessible in "Deleting" state + let get_result = objects_list_page_authz::( + client, + "/v1/multicast-groups", + ) + .await; + + let remaining_groups: Vec<_> = get_result + .items + .into_iter() + .filter(|g| g.identity.name == group_name) + .collect(); + + if !remaining_groups.is_empty() { + let group = &remaining_groups[0]; + assert_eq!( + group.state, "Deleting", + "Group should be in Deleting state after deletion request, found: {}", + group.state + ); + } + + // Wait for reconciler to attempt deletion with DPD down + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Check final state - group should remain in "Deleting" state since DPD is unavailable + // The reconciler cannot complete deletion without DPD communication + let final_result = + nexus_test_utils::resource_helpers::objects_list_page_authz::< + MulticastGroup, + >(client, "/v1/multicast-groups") + .await; + + let final_groups: Vec<_> = final_result + .items + .into_iter() + .filter(|g| g.identity.name == group_name) + .collect(); + + if !final_groups.is_empty() { + let group = &final_groups[0]; + assert_eq!( + group.state, "Deleting", + "Group should remain in Deleting state when DPD is unavailable during deletion, found: {}", + group.state + ); + + // Verify group properties are maintained during failed deletion + assert_eq!(group.identity.name, group_name); + assert_eq!(group.multicast_ip, multicast_ip); + assert_eq!(group.identity.id, created_group.identity.id); + } + // Note: If group is gone, that means deletion succeeded despite DPD being down, + // which would indicate the reconciler has fallback cleanup logic +} + +#[nexus_test] +async fn test_multicast_group_members_during_dpd_failure( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + let group_name = "member-dpd-fail-group"; + let instance_name = "member-test-instance"; + + // Setup: project, pools, group with member - parallelize creation + let (_, _, mcast_pool) = ops::join3( + create_project(&client, project_name), + create_default_ip_pool(&client), + create_multicast_ip_pool(&client, "mcast-pool"), + ) + .await; + + // Create group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 213)); + let group_url = "/v1/multicast-groups".to_string(); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Group for member state during DPD failure test" + .to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + // Stop DPD to test member operations during failure + cptestctx.stop_dendrite(SwitchLocation::Switch0).await; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + assert_eq!(created_group.state, "Creating"); + + // Add member + let instance = create_instance(client, project_name, instance_name).await; + + let member_add_url = format!( + "/v1/multicast-groups/{group_name}/members?project={project_name}" + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + + object_create::<_, MulticastGroupMember>( + client, + &member_add_url, + &member_params, + ) + .await; + + // Verify member is accessible before DPD failure + let members_url = format!("/v1/multicast-groups/{group_name}/members"); + let initial_members = + nexus_test_utils::resource_helpers::objects_list_page_authz::< + MulticastGroupMember, + >(client, &members_url) + .await + .items; + assert_eq!( + initial_members.len(), + 1, + "Should have exactly one member before DPD failure" + ); + // Note: Members store instance_id (UUID), not instance name + assert_eq!(initial_members[0].instance_id, instance.identity.id); + + // Wait for reconciler - group should remain in "Creating" state + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Verify members are still accessible despite DPD failure + let members_during_failure = + nexus_test_utils::resource_helpers::objects_list_page_authz::< + MulticastGroupMember, + >(client, &members_url) + .await + .items; + assert_eq!( + members_during_failure.len(), + 1, + "Member should still be accessible during DPD failure" + ); + assert_eq!(members_during_failure[0].instance_id, instance.identity.id); + assert_eq!( + members_during_failure[0].multicast_group_id, + created_group.identity.id + ); + + // Verify group is still in "Creating" state + let group_get_url = mcast_group_url(group_name); + let fetched_group: MulticastGroup = + object_get(client, &group_get_url).await; + + assert_eq!( + fetched_group.state, "Creating", + "Group should remain in Creating state during DPD failure, found: {}", + fetched_group.state + ); + + // Clean up + object_delete(client, &group_get_url).await; + + // Wait for reconciler to process the deletion + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; +} diff --git a/nexus/tests/integration_tests/multicast/groups.rs b/nexus/tests/integration_tests/multicast/groups.rs new file mode 100644 index 00000000000..8d795c6d26a --- /dev/null +++ b/nexus/tests/integration_tests/multicast/groups.rs @@ -0,0 +1,2910 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +// +// Copyright 2025 Oxide Computer Company + +//! Integration tests for multicast group APIs and basic membership operations. + +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; + +use dropshot::HttpErrorResponseBody; +use dropshot::ResultsPage; +use http::{Method, StatusCode}; + +use crate::integration_tests::instances::{ + instance_simulate, instance_wait_for_state, +}; +use dpd_client::Error as DpdError; +use dpd_client::types as dpd_types; +use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; +use nexus_test_utils::dpd_client; +use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; +use nexus_test_utils::resource_helpers::{ + create_default_ip_pool, create_instance, create_project, link_ip_pool, + object_create, object_create_error, object_delete, object_get, + object_get_error, object_put, object_put_error, +}; +use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::params::{ + IpPoolCreate, MulticastGroupCreate, MulticastGroupMemberAdd, + MulticastGroupUpdate, +}; +use nexus_types::external_api::shared::{IpRange, Ipv4Range, Ipv6Range}; +use nexus_types::external_api::views::{ + IpPool, IpPoolRange, IpVersion, MulticastGroup, MulticastGroupMember, +}; +use nexus_types::identity::Resource; +use omicron_common::api::external::{ + IdentityMetadataCreateParams, IdentityMetadataUpdateParams, InstanceState, + NameOrId, Nullable, +}; +use omicron_common::vlan::VlanID; +use omicron_uuid_kinds::InstanceUuid; + +use super::*; + +/// Verify creation works when optional fields are omitted from the JSON body +/// (i.e., keys are missing, not present as `null`). This mirrors CLI behavior. +#[nexus_test] +async fn test_multicast_group_create_raw_omitted_optionals( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "raw-omit-proj"; + let pool_name = "raw-omit-pool"; + let group_name = "raw-omit-group"; + + // Ensure a project exists (not strictly required for fleet-scoped groups) + create_project(client, project_name).await; + + // Create a multicast pool with a unique, non-reserved ASM range and link it + create_multicast_ip_pool_with_range( + client, + pool_name, + (224, 9, 0, 10), + (224, 9, 0, 255), + ) + .await; + + let group_url = mcast_groups_url(); + + // Omit multicast_ip and source_ips keys entirely; specify pool by name + let body = format!( + r#"{{"name":"{group}","description":"Create with omitted optionals","pool":"{pool}"}}"#, + group = group_name, + pool = pool_name, + ); + + let created: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &group_url) + .header("content-type", "application/json") + .raw_body(Some(body)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Create with omitted optional fields should succeed") + .parsed_body() + .expect("Should parse created MulticastGroup"); + + assert_eq!(created.identity.name, group_name); + assert!(created.multicast_ip.is_multicast()); + assert!(created.source_ips.is_empty()); + + // Wait for reconciler to activate the group + wait_for_group_active(client, group_name).await; + + // Cleanup + object_delete(client, &mcast_group_url(group_name)).await; +} + +/// Verify ASM creation with explicit address works when `source_ips` is omitted +#[nexus_test] +async fn test_multicast_group_create_raw_asm_omitted_sources( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let pool_name = "raw-asm-pool"; + let group_name = "raw-asm-group"; + + // Pool for allocation (even with explicit IP, current create path validates pool) + create_multicast_ip_pool_with_range( + client, + pool_name, + (224, 10, 0, 10), + (224, 10, 0, 255), + ) + .await; + + let group_url = mcast_groups_url(); + let body = format!( + r#"{{"name":"{group}","description":"ASM no sources omitted","multicast_ip":"224.10.0.100","pool":"{pool}"}}"#, + group = group_name, + pool = pool_name, + ); + + let created: MulticastGroup = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &group_url) + .header("content-type", "application/json") + .raw_body(Some(body)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("ASM creation with omitted source_ips should succeed") + .parsed_body() + .expect("Should parse created MulticastGroup"); + + assert!(created.multicast_ip.is_multicast()); + assert!(created.source_ips.is_empty()); + wait_for_group_active(client, group_name).await; + + object_delete(client, &mcast_group_url(group_name)).await; +} + +/// Verify SSM creation fails when `source_ips` is omitted (missing sources) +#[nexus_test] +async fn test_multicast_group_create_raw_ssm_missing_sources( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let pool_name = "raw-ssm-pool"; + let group_name = "raw-ssm-group"; + + // Pool for validation + create_multicast_ip_pool_with_range( + client, + pool_name, + (224, 11, 0, 10), + (224, 11, 0, 255), + ) + .await; + + let group_url = mcast_groups_url(); + let body = format!( + r#"{{"name":"{group}","description":"SSM missing sources","multicast_ip":"232.1.2.3","pool":"{pool}"}}"#, + group = group_name, + pool = pool_name, + ); + + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &group_url) + .header("content-type", "application/json") + .raw_body(Some(body)) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("SSM creation without sources should fail") + .parsed_body() + .expect("Should parse error response body"); + + assert!( + error + .message + .contains("SSM multicast addresses require at least one source IP"), + "unexpected error message: {}", + error.message + ); +} + +#[nexus_test] +async fn test_multicast_group_basic_crud(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + let group_name = "test-group"; + let description = "A test multicast group"; + + // Create a project + create_project(&client, project_name).await; + + // Test with explicit multicast pool using unique range for this test + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mcast-pool", + (224, 1, 0, 10), + (224, 1, 0, 255), + ) + .await; + + let group_url = mcast_groups_url(); + + // Verify empty list initially + let groups = list_multicast_groups(&client).await; + assert_eq!(groups.len(), 0, "Expected empty list of multicast groups"); + + // Test creating a multicast group with auto-allocated IP + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: String::from(description), + }, + multicast_ip: None, // Auto-allocate + source_ips: None, // Any-Source Multicast + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + + wait_for_group_active(client, group_name).await; + + assert_eq!(created_group.identity.name, group_name); + assert_eq!(created_group.identity.description, description); + assert!(created_group.multicast_ip.is_multicast()); + assert_eq!(created_group.source_ips.len(), 0); + + // Verify we can list and find it + let groups = list_multicast_groups(&client).await; + assert_eq!(groups.len(), 1, "Expected exactly 1 multicast group"); + assert_groups_eq(&created_group, &groups[0]); + + // Verify we can fetch it directly + let fetched_group_url = mcast_group_url(group_name); + let fetched_group: MulticastGroup = + object_get(client, &fetched_group_url).await; + assert_groups_eq(&created_group, &fetched_group); + + // Test conflict error for duplicate name + object_create_error(client, &group_url, ¶ms, StatusCode::BAD_REQUEST) + .await; + + // Test updating the group + let new_description = "Updated description"; + let update_params = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: Some(String::from(new_description)), + }, + source_ips: None, + mvlan: None, + }; + + let updated_group: MulticastGroup = + object_put(client, &fetched_group_url, &update_params).await; + assert_eq!(updated_group.identity.description, new_description); + assert_eq!(updated_group.identity.id, created_group.identity.id); + assert!( + updated_group.identity.time_modified + > created_group.identity.time_modified + ); + + // Test deleting the group + object_delete(client, &fetched_group_url).await; + + // Wait for group to be deleted (should return 404) + wait_for_group_deleted(client, group_name).await; + + let groups = list_multicast_groups(&client).await; + assert_eq!(groups.len(), 0, "Expected empty list after deletion"); +} + +#[nexus_test] +async fn test_multicast_group_with_default_pool( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + let group_name = "test-default-pool-group"; + + // Create a project for testing + create_project(&client, project_name).await; + + // Create multicast IP pool + let pool_params = IpPoolCreate::new_multicast( + omicron_common::api::external::IdentityMetadataCreateParams { + name: "default".parse().unwrap(), + description: "Default multicast IP pool for testing".to_string(), + }, + IpVersion::V4, + ); + + object_create::<_, IpPool>(&client, "/v1/system/ip-pools", &pool_params) + .await; + + // Add IPv4 multicast range - use unique range for this test + let ipv4_range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 8, 0, 10), + Ipv4Addr::new(224, 8, 0, 255), + ) + .unwrap(), + ); + let range_url = "/v1/system/ip-pools/default/ranges/add"; + object_create::<_, IpPoolRange>(&client, range_url, &ipv4_range).await; + + // Link the pool to the silo as the default multicast pool + link_ip_pool(&client, "default", &DEFAULT_SILO.id(), true).await; + + let group_url = "/v1/multicast-groups".to_string(); + + // Test creating with default pool (pool: None) + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Group using default pool".to_string(), + }, + multicast_ip: None, // Auto-allocate + source_ips: None, // Any-Source Multicast + pool: None, // Use default multicast pool + mvlan: None, + }; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + assert_eq!(created_group.identity.name, group_name); + assert!(created_group.multicast_ip.is_multicast()); + + wait_for_group_active(client, group_name).await; + + // Clean up + let group_delete_url = mcast_group_url(group_name); + object_delete(client, &group_delete_url).await; + + // Wait for the multicast group reconciler to process the deletion + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // After reconciler processing, the group should be gone (404) + object_get_error(client, &group_delete_url, StatusCode::NOT_FOUND).await; +} + +#[nexus_test] +async fn test_multicast_group_with_specific_ip( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + let group_name = "test-group-specific-ip"; + + // Create a project and multicast IP pool + create_project(&client, project_name).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mcast-pool", + (224, 2, 0, 10), + (224, 2, 0, 255), + ) + .await; + let group_url = "/v1/multicast-groups".to_string(); + + // Auto-allocation (should work) + let auto_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Group with auto-allocated IP".to_string(), + }, + multicast_ip: None, // Auto-allocate + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + let auto_group: MulticastGroup = + object_create(client, &group_url, &auto_params).await; + + wait_for_group_active(client, group_name).await; + + assert!(auto_group.multicast_ip.is_multicast()); + assert_eq!(auto_group.identity.name, group_name); + assert_eq!(auto_group.identity.description, "Group with auto-allocated IP"); + + // Clean up auto-allocated group + let auto_delete_url = mcast_group_url(group_name); + object_delete(client, &auto_delete_url).await; + + // Wait for the multicast group reconciler to process the deletion + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // After reconciler processing, the group should be gone (404) + object_get_error(client, &auto_delete_url, StatusCode::NOT_FOUND).await; + + // Explicit IP allocation + let explicit_group_name = "test-group-explicit"; + let ipv4_addr = IpAddr::V4(Ipv4Addr::new(224, 2, 0, 20)); + let explicit_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: explicit_group_name.parse().unwrap(), + description: "Group with explicit IPv4".to_string(), + }, + multicast_ip: Some(ipv4_addr), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + let explicit_group: MulticastGroup = + object_create(client, &group_url, &explicit_params).await; + assert_eq!(explicit_group.multicast_ip, ipv4_addr); + assert_eq!(explicit_group.identity.name, explicit_group_name); + assert_eq!(explicit_group.identity.description, "Group with explicit IPv4"); + + // Wait for explicit group to become active before deletion + wait_for_group_active(client, explicit_group_name).await; + + // Clean up explicit group + let explicit_delete_url = mcast_group_url(explicit_group_name); + object_delete(client, &explicit_delete_url).await; + + // Wait for the multicast group reconciler to process the deletion + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + object_get_error(client, &explicit_delete_url, StatusCode::NOT_FOUND).await; +} + +#[nexus_test] +async fn test_multicast_group_with_source_ips( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + let group_name = "test-ssm-group"; + + // Create a project and SSM multicast IP pool (232.0.0.0/8 range) + create_project(&client, project_name).await; + create_default_ip_pool(&client).await; // Required for any instance operations + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mcast-pool", + (232, 11, 0, 10), // SSM range: 232.11.0.10 - 232.11.0.255 + (232, 11, 0, 255), + ) + .await; + let group_url = "/v1/multicast-groups".to_string(); + + // Test creating with Source-Specific Multicast (SSM) source IPs + // SSM range is 232.0.0.0/8, so we use our unique SSM range + let ssm_ip = IpAddr::V4(Ipv4Addr::new(232, 11, 0, 50)); // From our SSM range + let source_ips = vec![ + IpAddr::V4(Ipv4Addr::new(8, 8, 8, 8)), // Public DNS server + IpAddr::V4(Ipv4Addr::new(1, 1, 1, 1)), // Cloudflare DNS + ]; + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "SSM group with source IPs".to_string(), + }, + multicast_ip: Some(ssm_ip), + source_ips: Some(source_ips.clone()), + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + + // Wait for group to become active + let active_group = wait_for_group_active(client, group_name).await; + + // Verify SSM group properties + assert_eq!(created_group.source_ips, source_ips); + assert_eq!(created_group.multicast_ip, ssm_ip); + assert_eq!(active_group.state, "Active"); + + // DPD Validation: Check that SSM group exists in dataplane + let dpd_client = dpd_client(cptestctx); + let dpd_group = dpd_client + .multicast_group_get(&ssm_ip) + .await + .expect("SSM group should exist in dataplane after creation"); + validate_dpd_group_response( + &dpd_group, + &ssm_ip, + Some(0), // No members initially + "SSM group creation", + ); + + // Clean up + let group_delete_url = mcast_group_url(group_name); + object_delete(client, &group_delete_url).await; + + // Wait for the multicast group reconciler to process the deletion + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Verify deletion + object_get_error(client, &group_delete_url, StatusCode::NOT_FOUND).await; +} + +#[nexus_test] +async fn test_multicast_group_validation_errors( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + + // Create a project and multicast IP pool + create_project(&client, project_name).await; + create_multicast_ip_pool_with_range( + &client, + "mcast-pool", + (224, 3, 0, 10), + (224, 3, 0, 255), + ) + .await; + + let group_url = "/v1/multicast-groups".to_string(); + + // Test with non-multicast IP address + let unicast_ip = IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1)); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "invalid-group".parse().unwrap(), + description: "Group with invalid IP".to_string(), + }, + multicast_ip: Some(unicast_ip), + source_ips: None, + pool: None, // Use default pool for validation test + mvlan: None, + }; + + object_create_error(client, &group_url, ¶ms, StatusCode::BAD_REQUEST) + .await; + + // Test with link-local multicast (should be rejected) + let link_local_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 0, 1)); + let params_link_local = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "link-local-group".parse().unwrap(), + description: "Group with link-local IP".to_string(), + }, + multicast_ip: Some(link_local_ip), + source_ips: None, + pool: None, // Use default pool for validation test + mvlan: None, + }; + + object_create_error( + client, + &group_url, + ¶ms_link_local, + StatusCode::BAD_REQUEST, + ) + .await; + + // Test with IPv6 unicast (should be rejected) + let ipv6_unicast = IpAddr::V6(Ipv6Addr::new( + 0x2001, 0xdb8, 0x1234, 0x5678, 0x9abc, 0xdef0, 0x1234, 0x5678, + )); + let params_ipv6_unicast = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "ipv6-unicast-group".parse().unwrap(), + description: "Group with IPv6 unicast IP".to_string(), + }, + multicast_ip: Some(ipv6_unicast), + source_ips: None, + pool: None, + mvlan: None, + }; + + object_create_error( + client, + &group_url, + ¶ms_ipv6_unicast, + StatusCode::BAD_REQUEST, + ) + .await; + + // Test with IPv6 interface-local multicast ff01:: (should be rejected) + let ipv6_interface_local = + IpAddr::V6(Ipv6Addr::new(0xff01, 0, 0, 0, 0, 0, 0, 1)); + let params_ipv6_interface_local = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "ipv6-interface-local-group".parse().unwrap(), + description: "Group with IPv6 interface-local multicast IP" + .to_string(), + }, + multicast_ip: Some(ipv6_interface_local), + source_ips: None, + pool: None, + mvlan: None, + }; + + object_create_error( + client, + &group_url, + ¶ms_ipv6_interface_local, + StatusCode::BAD_REQUEST, + ) + .await; + + // Test with IPv6 link-local multicast ff02:: (should be rejected) + let ipv6_link_local_mcast = + IpAddr::V6(Ipv6Addr::new(0xff02, 0, 0, 0, 0, 0, 0, 1)); + let params_ipv6_link_local = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "ipv6-link-local-group".parse().unwrap(), + description: "Group with IPv6 link-local multicast IP".to_string(), + }, + multicast_ip: Some(ipv6_link_local_mcast), + source_ips: None, + pool: None, + mvlan: None, + }; + + object_create_error( + client, + &group_url, + ¶ms_ipv6_link_local, + StatusCode::BAD_REQUEST, + ) + .await; +} + +/// Test that multicast IP pools reject invalid ranges at the pool level +#[nexus_test] +async fn test_multicast_ip_pool_range_validation( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + // Create IPv4 multicast pool + let pool_params = IpPoolCreate::new_multicast( + IdentityMetadataCreateParams { + name: "test-v4-pool".parse().unwrap(), + description: "IPv4 multicast pool for validation tests".to_string(), + }, + IpVersion::V4, + ); + object_create::<_, IpPool>(client, "/v1/system/ip-pools", &pool_params) + .await; + + let range_url = "/v1/system/ip-pools/test-v4-pool/ranges/add"; + + // IPv4 non-multicast range should be rejected + let ipv4_unicast_range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 255), + ) + .unwrap(), + ); + object_create_error( + client, + range_url, + &ipv4_unicast_range, + StatusCode::BAD_REQUEST, + ) + .await; + + // IPv4 link-local multicast range should be rejected + let ipv4_link_local_range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(224, 0, 0, 1), + Ipv4Addr::new(224, 0, 0, 255), + ) + .unwrap(), + ); + object_create_error( + client, + range_url, + &ipv4_link_local_range, + StatusCode::BAD_REQUEST, + ) + .await; + + // Valid IPv4 multicast range should be accepted + let valid_ipv4_range = IpRange::V4( + Ipv4Range::new( + Ipv4Addr::new(239, 0, 0, 1), + Ipv4Addr::new(239, 0, 0, 255), + ) + .unwrap(), + ); + object_create::<_, IpPoolRange>(client, range_url, &valid_ipv4_range).await; + + // TODO: Remove this test once IPv6 is enabled for multicast pools. + // IPv6 ranges should currently be rejected (not yet supported) + let ipv6_range = IpRange::V6( + Ipv6Range::new( + Ipv6Addr::new(0xff05, 0, 0, 0, 0, 0, 0, 1), + Ipv6Addr::new(0xff05, 0, 0, 0, 0, 0, 0, 255), + ) + .unwrap(), + ); + let error = object_create_error( + client, + range_url, + &ipv6_range, + StatusCode::BAD_REQUEST, + ) + .await; + assert_eq!(error.message, "IPv6 ranges are not allowed yet"); +} + +#[nexus_test] +async fn test_multicast_group_member_operations( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + let group_name = "test-group"; + let instance_name = "test-instance"; + + // Create project and IP pools in parallel + let (_, _, mcast_pool) = ops::join3( + create_project(&client, project_name), + create_default_ip_pool(&client), // For instance networking + create_multicast_ip_pool_with_range( + &client, + "mcast-pool", + (224, 4, 0, 10), + (224, 4, 0, 255), + ), + ) + .await; + + // Create multicast group and instance in parallel + let group_url = "/v1/multicast-groups".to_string(); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Test group for member operations".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + let (_, instance) = ops::join2( + async { + object_create::<_, MulticastGroup>(client, &group_url, ¶ms) + .await; + wait_for_group_active(client, group_name).await; + }, + create_instance(client, project_name, instance_name), + ) + .await; + + // Test listing members (should be empty initially) + let members = list_multicast_group_members(&client, group_name).await; + assert_eq!(members.len(), 0, "Expected empty member list initially"); + + // Test adding instance to multicast group + let member_add_url = format!( + "{}?project={project_name}", + mcast_group_members_url(group_name) + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + let added_member: MulticastGroupMember = + object_create(client, &member_add_url, &member_params).await; + + assert_eq!( + added_member.instance_id.to_string(), + instance.identity.id.to_string() + ); + + // Wait for member to become joined + // Member starts in "Joining" state and transitions to "Joined" via reconciler + // Member only transitions to "Joined" AFTER successful DPD update + wait_for_member_state( + cptestctx, + group_name, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + // Test listing members (should have 1 now in Joined state) + let members = list_multicast_group_members(&client, group_name).await; + assert_eq!(members.len(), 1, "Expected exactly 1 member"); + assert_eq!(members[0].instance_id, added_member.instance_id); + assert_eq!(members[0].multicast_group_id, added_member.multicast_group_id); + + // DPD Validation: Verify groups exist in dataplane after member addition + let dpd_client = dpd_client(cptestctx); + // Get the multicast IP from the group (since member doesn't have the IP field) + let group_get_url = mcast_group_url(group_name); + let group: MulticastGroup = object_get(client, &group_get_url).await; + let external_multicast_ip = group.multicast_ip; + + // List all groups in DPD to find both external and underlay groups + let dpd_groups = dpd_client + .multicast_groups_list(None, None) + .await + .expect("Should list DPD groups"); + + // Find the external IPv4 group (should exist but may not have members) + let expect_msg = + format!("External group {external_multicast_ip} should exist in DPD"); + dpd_groups + .items + .iter() + .find(|g| { + let ip = match g { + dpd_types::MulticastGroupResponse::External { + group_ip, + .. + } => *group_ip, + dpd_types::MulticastGroupResponse::Underlay { + group_ip, + .. + } => IpAddr::V6(group_ip.0), + }; + ip == external_multicast_ip + && matches!( + g, + dpd_types::MulticastGroupResponse::External { .. } + ) + }) + .expect(&expect_msg); + + // Directly get the underlay IPv6 group by finding the admin-scoped address + // First find the underlay group IP from the list to get the exact IPv6 address + let underlay_ip = dpd_groups + .items + .iter() + .find_map(|g| { + match g { + dpd_types::MulticastGroupResponse::Underlay { + group_ip, + .. + } => { + // Check if it starts with ff04 (admin-scoped multicast) + if group_ip.0.segments()[0] == 0xff04 { + Some(group_ip.clone()) + } else { + None + } + } + dpd_types::MulticastGroupResponse::External { .. } => None, + } + }) + .expect("Should find underlay group IP in DPD response"); + + // Get the underlay group directly + let underlay_group = dpd_client + .multicast_group_get_underlay(&underlay_ip) + .await + .expect("Should get underlay group from DPD"); + + assert_eq!( + underlay_group.members.len(), + 1, + "Underlay group should have exactly 1 member after member addition" + ); + + // Assert all underlay members use rear (backplane) ports with Underlay direction + for member in &underlay_group.members { + assert!( + matches!(member.port_id, dpd_client::types::PortId::Rear(_)), + "Underlay member should use rear (backplane) port, got: {:?}", + member.port_id + ); + assert_eq!( + member.direction, + dpd_client::types::Direction::Underlay, + "Underlay member should have Underlay direction" + ); + } + + // Test removing instance from multicast group using path-based DELETE + let member_remove_url = format!( + "{}/{instance_name}?project={project_name}", + mcast_group_members_url(group_name) + ); + + NexusRequest::new( + RequestBuilder::new(client, http::Method::DELETE, &member_remove_url) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Should remove member from multicast group"); + + // Wait for member count to reach 0 after removal + wait_for_member_count(&client, group_name, 0).await; + + // DPD Validation: Verify group has no members in dataplane after removal + let dpd_group = dpd_client.multicast_group_get(&external_multicast_ip).await + .expect("Multicast group should still exist in dataplane after member removal"); + validate_dpd_group_response( + &dpd_group, + &external_multicast_ip, + Some(0), // Should have 0 members after removal + "external group after member removal", + ); + + let group_delete_url = mcast_group_url(group_name); + object_delete(client, &group_delete_url).await; +} + +#[nexus_test] +async fn test_instance_multicast_endpoints( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + let group1_name = "mcast-group-1"; + let group2_name = "mcast-group-2"; + let instance_name = "test-instance"; + + // Create a project, default unicast pool, and multicast IP pool + create_project(&client, project_name).await; + create_default_ip_pool(&client).await; // For instance networking + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mcast-pool", + (224, 5, 0, 10), + (224, 5, 0, 255), + ) + .await; + + // Create two multicast groups in parallel + let group_url = "/v1/multicast-groups".to_string(); + + let group1_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: group1_name.parse().unwrap(), + description: "First test group".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + let group2_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: group2_name.parse().unwrap(), + description: "Second test group".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + // Create both groups in parallel then wait for both to be active + ops::join2( + object_create::<_, MulticastGroup>(client, &group_url, &group1_params), + object_create::<_, MulticastGroup>(client, &group_url, &group2_params), + ) + .await; + + ops::join2( + wait_for_group_active(client, group1_name), + wait_for_group_active(client, group2_name), + ) + .await; + + // Create an instance (starts automatically with create_instance helper) + let instance = create_instance(client, project_name, instance_name).await; + let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); + + // Simulate and wait for instance to be fully running with sled_id assigned + let nexus = &cptestctx.server.server_context().nexus; + instance_simulate(nexus, &instance_id).await; + instance_wait_for_state(client, instance_id, InstanceState::Running).await; + wait_for_instance_sled_assignment(cptestctx, &instance_id).await; + + // Test: List instance multicast groups (should be empty initially) + let instance_groups_url = format!( + "/v1/instances/{instance_name}/multicast-groups?project={project_name}" + ); + let instance_memberships: ResultsPage = + object_get(client, &instance_groups_url).await; + assert_eq!( + instance_memberships.items.len(), + 0, + "Instance should have no multicast memberships initially" + ); + + // Test: Join group1 using instance-centric endpoint + let instance_join_group1_url = format!( + "/v1/instances/{instance_name}/multicast-groups/{group1_name}?project={project_name}" + ); + // Use PUT method but expect 201 Created (not 200 OK like object_put) + // This is correct HTTP semantics - PUT can return 201 when creating new resource + let member1: MulticastGroupMember = NexusRequest::new( + RequestBuilder::new( + client, + http::Method::PUT, + &instance_join_group1_url, + ) + .body(Some(&())) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!(member1.instance_id, instance.identity.id); + + // Wait for member to become joined + wait_for_member_state( + cptestctx, + group1_name, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + // Test: Verify membership shows up in both endpoints + // Check group-centric view + let group1_members = + list_multicast_group_members(&client, group1_name).await; + assert_eq!(group1_members.len(), 1); + assert_eq!(group1_members[0].instance_id, instance.identity.id); + + // Check instance-centric view (test the list endpoint thoroughly) + let instance_memberships: ResultsPage = + object_get(client, &instance_groups_url).await; + assert_eq!( + instance_memberships.items.len(), + 1, + "Instance should have exactly 1 membership" + ); + assert_eq!(instance_memberships.items[0].instance_id, instance.identity.id); + assert_eq!( + instance_memberships.items[0].multicast_group_id, + member1.multicast_group_id + ); + assert_eq!(instance_memberships.items[0].state, "Joined"); + + // Join group2 using group-centric endpoint (test both directions) + let member_add_url = format!( + "{}?project={project_name}", + mcast_group_members_url(group2_name) + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + let member2: MulticastGroupMember = + object_create(client, &member_add_url, &member_params).await; + assert_eq!(member2.instance_id, instance.identity.id); + + // Wait for member to become joined + wait_for_member_state( + cptestctx, + group2_name, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + // Verify instance now belongs to both groups (comprehensive list test) + let instance_memberships: ResultsPage = + object_get(client, &instance_groups_url).await; + assert_eq!( + instance_memberships.items.len(), + 2, + "Instance should belong to both groups" + ); + + // Verify the list endpoint returns the correct membership details + let membership_group_ids: Vec<_> = instance_memberships + .items + .iter() + .map(|m| m.multicast_group_id) + .collect(); + assert!( + membership_group_ids.contains(&member1.multicast_group_id), + "List should include group1 membership" + ); + assert!( + membership_group_ids.contains(&member2.multicast_group_id), + "List should include group2 membership" + ); + + // Verify all memberships show correct instance_id and state + for membership in &instance_memberships.items { + assert_eq!(membership.instance_id, instance.identity.id); + assert_eq!(membership.state, "Joined"); + } + + // Verify each group shows the instance as a member + let group1_members = + list_multicast_group_members(&client, group1_name).await; + let group2_members = + list_multicast_group_members(&client, group2_name).await; + assert_eq!(group1_members.len(), 1); + assert_eq!(group2_members.len(), 1); + assert_eq!(group1_members[0].instance_id, instance.identity.id); + assert_eq!(group2_members[0].instance_id, instance.identity.id); + + // Leave group1 using instance-centric endpoint + let instance_leave_group1_url = format!( + "/v1/instances/{instance_name}/multicast-groups/{group1_name}?project={project_name}" + ); + object_delete(client, &instance_leave_group1_url).await; + + // Wait for reconciler to process the removal and completely delete the member + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Verify membership removed from both views + // Check instance-centric view - should only show active memberships (group2) + let instance_memberships: ResultsPage = + object_get(client, &instance_groups_url).await; + assert_eq!( + instance_memberships.items.len(), + 1, + "Instance should only show active membership (group2)" + ); + assert_eq!( + instance_memberships.items[0].multicast_group_id, + member2.multicast_group_id, + "Remaining membership should be group2" + ); + assert_eq!( + instance_memberships.items[0].state, "Joined", + "Group2 membership should be Joined" + ); + + // Check group-centric views + let group1_members = + list_multicast_group_members(&client, group1_name).await; + let group2_members = + list_multicast_group_members(&client, group2_name).await; + assert_eq!(group1_members.len(), 0, "Group1 should have no members"); + assert_eq!(group2_members.len(), 1, "Group2 should still have 1 member"); + + // Leave group2 using group-centric endpoint + let member_remove_url = format!( + "{}/{instance_name}?project={project_name}", + mcast_group_members_url(group2_name) + ); + + NexusRequest::new( + RequestBuilder::new(client, http::Method::DELETE, &member_remove_url) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Should remove member from group2"); + + // Wait for reconciler to process the removal + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Verify all memberships are gone + let instance_memberships: ResultsPage = + object_get(client, &instance_groups_url).await; + assert_eq!( + instance_memberships.items.len(), + 0, + "Instance should have no memberships" + ); + + let group1_members = + list_multicast_group_members(&client, group1_name).await; + let group2_members = + list_multicast_group_members(&client, group2_name).await; + assert_eq!(group1_members.len(), 0); + assert_eq!(group2_members.len(), 0); + + // Clean up + let group1_delete_url = mcast_group_url(group1_name); + let group2_delete_url = mcast_group_url(group2_name); + + object_delete(client, &group1_delete_url).await; + object_delete(client, &group2_delete_url).await; +} + +#[nexus_test] +async fn test_multicast_group_member_errors( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + let group_name = "test-group"; + let nonexistent_instance = "nonexistent-instance"; + + // Create a project and multicast IP pool + create_project(&client, project_name).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mcast-pool", + (224, 6, 0, 10), + (224, 6, 0, 255), + ) + .await; + + // Create a multicast group + let group_url = "/v1/multicast-groups".to_string(); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Test group for error cases".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + object_create::<_, MulticastGroup>(client, &group_url, ¶ms).await; + + // Wait for group to become active before testing member operations + wait_for_group_active(&client, group_name).await; + + // Test adding nonexistent instance to group + let member_add_url = format!( + "{}?project={project_name}", + mcast_group_members_url(group_name) + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(nonexistent_instance.parse().unwrap()), + }; + object_create_error( + client, + &member_add_url, + &member_params, + StatusCode::NOT_FOUND, + ) + .await; + + // Test adding member to nonexistent group + let nonexistent_group = "nonexistent-group"; + let member_add_bad_group_url = format!( + "{}?project={project_name}", + mcast_group_members_url(nonexistent_group) + ); + object_create_error( + client, + &member_add_bad_group_url, + &member_params, + StatusCode::NOT_FOUND, + ) + .await; + + // Clean up - follow standard deletion pattern + let group_delete_url = mcast_group_url(group_name); + object_delete(client, &group_delete_url).await; +} + +#[nexus_test] +async fn test_lookup_multicast_group_by_ip( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + let group_name = "test-lookup-group"; + + // Create a project and multicast IP pool + create_project(&client, project_name).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mcast-pool", + (224, 7, 0, 10), + (224, 7, 0, 255), + ) + .await; + + // Create a multicast group with specific IP - use safe IP range + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 7, 0, 100)); + let group_url = "/v1/multicast-groups".to_string(); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Group for IP lookup test".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + + // Wait for group to become active - follow working pattern + wait_for_group_active(&client, group_name).await; + + // Test lookup by IP + let lookup_url = + format!("/v1/system/multicast-groups/by-ip/{multicast_ip}"); + let found_group: MulticastGroup = object_get(client, &lookup_url).await; + assert_groups_eq(&created_group, &found_group); + + // Test lookup with nonexistent IP + let nonexistent_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 200)); + let lookup_bad_url = + format!("/v1/system/multicast-groups/by-ip/{nonexistent_ip}"); + + object_get_error(client, &lookup_bad_url, StatusCode::NOT_FOUND).await; + + // Clean up - follow standard deletion pattern + let group_delete_url = mcast_group_url(group_name); + object_delete(client, &group_delete_url).await; +} + +#[nexus_test] +async fn test_instance_deletion_removes_multicast_memberships( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "springfield-squidport"; // Use the same project name as instance helpers + let group_name = "instance-deletion-group"; + let instance_name = "deletion-test-instance"; + + // Setup: project, pools, group with unique IP range + create_project(&client, project_name).await; + create_default_ip_pool(&client).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mcast-pool", + (224, 9, 0, 10), + (224, 9, 0, 255), + ) + .await; + + // Create multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 9, 0, 50)); // Use IP from our range + let group_url = "/v1/multicast-groups".to_string(); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Group for instance deletion test".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + + // Wait for group to become active + wait_for_group_active(&client, group_name).await; + + // Create instance and add as member + let instance = create_instance(client, project_name, instance_name).await; + let member_add_url = format!( + "{}?project={project_name}", + mcast_group_members_url(group_name) + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + + object_create::<_, MulticastGroupMember>( + client, + &member_add_url, + &member_params, + ) + .await; + + // Wait for member to join + wait_for_member_state( + cptestctx, + group_name, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + // Verify member was added + let members = list_multicast_group_members(&client, group_name).await; + assert_eq!(members.len(), 1, "Instance should be a member of the group"); + assert_eq!(members[0].instance_id, instance.identity.id); + + // Test: Instance deletion should clean up multicast memberships + // Use the helper function for proper instance deletion (handles Starting state) + cleanup_instances(cptestctx, client, project_name, &[instance_name]).await; + + // Verify instance is gone + let instance_url = + format!("/v1/instances/{instance_name}?project={project_name}"); + + object_get_error(client, &instance_url, StatusCode::NOT_FOUND).await; + + // Critical test: Verify instance was automatically removed from multicast group + wait_for_member_count(&client, group_name, 0).await; + + // DPD Validation: Ensure dataplane members are cleaned up + let dpd_client = dpd_client(cptestctx); + let dpd_group = dpd_client.multicast_group_get(&multicast_ip).await + .expect("Multicast group should still exist in dataplane after instance deletion"); + validate_dpd_group_response( + &dpd_group, + &multicast_ip, + Some(0), // Should have 0 members after instance deletion + "external group after instance deletion", + ); + + // Verify group still exists (just no members) + let group_get_url = mcast_group_url(group_name); + let group_after_deletion: MulticastGroup = + object_get(client, &group_get_url).await; + assert_eq!(group_after_deletion.identity.id, created_group.identity.id); + + // Clean up + object_delete(client, &group_get_url).await; +} + +#[nexus_test] +async fn test_member_operations_via_rpw_reconciler( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "test-project"; + let group_name = "rpw-test-group"; + let instance_name = "rpw-test-instance"; + + // Setup: project, pools, group with unique IP range + create_project(&client, project_name).await; + create_default_ip_pool(&client).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mcast-pool", + (224, 10, 0, 10), + (224, 10, 0, 255), + ) + .await; + + // Create multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 10, 0, 50)); // Use IP from our range + let group_url = "/v1/multicast-groups".to_string(); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Group for RPW member operations test".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + + // Wait for group to become active + wait_for_group_active(&client, group_name).await; + + assert_eq!(created_group.multicast_ip, multicast_ip); + assert_eq!(created_group.identity.name, group_name); + + // Create instance + let instance = create_instance(client, project_name, instance_name).await; + + // Test: Add member via API (should use RPW pattern via reconciler) + let member_add_url = format!( + "{}?project={project_name}", + mcast_group_members_url(group_name) + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + let added_member: MulticastGroupMember = + object_create(client, &member_add_url, &member_params).await; + + // Wait for member to become joined + wait_for_member_state( + cptestctx, + group_name, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + // Verify member was added and reached Joined state + let members = list_multicast_group_members(&client, group_name).await; + assert_eq!(members.len(), 1, "Member should be added to group"); + assert_eq!(members[0].instance_id, added_member.instance_id); + assert_eq!(members[0].state, "Joined", "Member should be in Joined state"); + + // DPD Validation: Check external group configuration + let dpd_client = dpd_client(cptestctx); + let dpd_group = dpd_client + .multicast_group_get(&multicast_ip) + .await + .expect("Multicast group should exist in dataplane after member join"); + validate_dpd_group_response( + &dpd_group, + &multicast_ip, + None, // Don't assert member count due to timing + "external group after member join", + ); + + // Test: Remove member via API (should use RPW pattern via reconciler) + let member_remove_url = format!( + "{}/{instance_name}?project={project_name}", + mcast_group_members_url(group_name) + ); + + NexusRequest::new( + RequestBuilder::new(client, http::Method::DELETE, &member_remove_url) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Should remove member from multicast group"); + + // Verify member was removed (wait for member count to reach 0) + wait_for_member_count(&client, group_name, 0).await; + + // DPD Validation: Check group has no members after removal + let dpd_group = dpd_client.multicast_group_get(&multicast_ip).await.expect( + "Multicast group should still exist in dataplane after member removal", + ); + validate_dpd_group_response( + &dpd_group, + &multicast_ip, + Some(0), // Should have 0 members after removal + "external group after member removal", + ); + + // Clean up - reconciler is automatically activated by deletion + let group_delete_url = mcast_group_url(group_name); + object_delete(client, &group_delete_url).await; +} + +/// Test comprehensive multicast group update operations including the update saga. +/// Tests both description-only updates (no saga) and name updates (requires saga). +#[nexus_test] +async fn test_multicast_group_comprehensive_updates( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "update-test-project"; + let original_name = "original-group"; + let updated_name = "updated-group"; + let final_name = "final-group"; + let original_description = "Original description"; + let updated_description = "Updated description"; + let final_description = "Final description"; + + // Create project and IP pool + create_project(&client, project_name).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "update-test-pool", + (224, 11, 0, 10), + (224, 11, 0, 255), + ) + .await; + + // Create multicast group + let group_url = "/v1/multicast-groups".to_string(); + let create_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(original_name).parse().unwrap(), + description: String::from(original_description), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + let created_group: MulticastGroup = + object_create(client, &group_url, &create_params).await; + + wait_for_group_active(client, original_name).await; + + let original_group_url = mcast_group_url(original_name); + + // Description-only update (no saga required) + let description_update = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, // Keep same name + description: Some(String::from(updated_description)), + }, + source_ips: None, + mvlan: None, + }; + + let desc_updated_group: MulticastGroup = + object_put(client, &original_group_url, &description_update).await; + + // No wait needed for description-only updates + assert_eq!(desc_updated_group.identity.name, original_name); + assert_eq!(desc_updated_group.identity.description, updated_description); + assert_eq!(desc_updated_group.identity.id, created_group.identity.id); + assert!( + desc_updated_group.identity.time_modified + > created_group.identity.time_modified + ); + + // Name-only update (requires update saga) + let name_update = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: Some(String::from(updated_name).parse().unwrap()), + description: None, // Keep current description + }, + source_ips: None, + mvlan: None, + }; + + let name_updated_group: MulticastGroup = + object_put(client, &original_group_url, &name_update).await; + + // Wait for update saga to complete DPD configuration application + // Name updates don't change DPD state, just verify saga completed without errors + wait_for_group_dpd_update( + cptestctx, + &created_group.multicast_ip, + dpd_predicates::expect_external_group(), + "name update saga completed", + ) + .await; + + // Verify name update worked + assert_eq!(name_updated_group.identity.name, updated_name); + assert_eq!(name_updated_group.identity.description, updated_description); // Should keep previous description + assert_eq!(name_updated_group.identity.id, created_group.identity.id); + assert!( + name_updated_group.identity.time_modified + > desc_updated_group.identity.time_modified + ); + + // Verify we can access with new name + let updated_group_url = mcast_group_url(updated_name); + let fetched_group: MulticastGroup = + object_get(client, &updated_group_url).await; + assert_eq!(fetched_group.identity.name, updated_name); + + // Verify old name is no longer accessible + object_get_error(client, &original_group_url, StatusCode::NOT_FOUND).await; + + // Combined name and description update (requires saga) + let combined_update = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: Some(String::from(final_name).parse().unwrap()), + description: Some(String::from(final_description)), + }, + source_ips: None, + mvlan: None, + }; + + let final_updated_group: MulticastGroup = + object_put(client, &updated_group_url, &combined_update).await; + + // Wait for update saga to complete + // Combined name+description updates don't change DPD state + wait_for_group_dpd_update( + cptestctx, + &created_group.multicast_ip, + dpd_predicates::expect_external_group(), + "combined name+description update saga completed", + ) + .await; + + // Verify combined update worked + assert_eq!(final_updated_group.identity.name, final_name); + assert_eq!(final_updated_group.identity.description, final_description); + assert_eq!(final_updated_group.identity.id, created_group.identity.id); + assert!( + final_updated_group.identity.time_modified + > name_updated_group.identity.time_modified + ); + + // Verify group remains active through updates + let final_group_url = mcast_group_url(final_name); + wait_for_group_active(client, final_name).await; + + // DPD validation + let dpd_client = dpd_client(cptestctx); + match dpd_client + .multicast_group_get(&final_updated_group.multicast_ip) + .await + { + Ok(dpd_group) => { + let group_data = dpd_group.into_inner(); + let tag = match &group_data { + dpd_types::MulticastGroupResponse::External { tag, .. } => { + tag.as_deref() + } + dpd_types::MulticastGroupResponse::Underlay { tag, .. } => { + tag.as_deref() + } + }; + assert_eq!( + tag, + Some(final_name), + "DPD group tag should match final group name" + ); + } + Err(DpdError::ErrorResponse(resp)) + if resp.status() == reqwest::StatusCode::NOT_FOUND => {} + Err(_) => {} + } + + // Clean up + object_delete(client, &final_group_url).await; +} + +/// Validate DPD multicast group response with comprehensive checks +fn validate_dpd_group_response( + dpd_group: &dpd_types::MulticastGroupResponse, + expected_ip: &IpAddr, + expected_member_count: Option, + test_context: &str, +) { + // Basic validation using our utility function + let ip = match dpd_group { + dpd_types::MulticastGroupResponse::External { group_ip, .. } => { + *group_ip + } + dpd_types::MulticastGroupResponse::Underlay { group_ip, .. } => { + IpAddr::V6(group_ip.0) + } + }; + assert_eq!(ip, *expected_ip, "DPD group IP mismatch in {test_context}"); + + match dpd_group { + dpd_types::MulticastGroupResponse::External { + external_group_id, + .. + } => { + if let Some(_expected_count) = expected_member_count { + // External groups typically don't have direct members, + // but we can validate if they do + // Note: External groups may not expose member count directly + eprintln!( + "Note: External group member validation skipped in {test_context}" + ); + } + + // Validate external group specific fields + assert_ne!( + *external_group_id, 0, + "DPD external_group_id should be non-zero in {test_context}" + ); + } + dpd_types::MulticastGroupResponse::Underlay { + members, + external_group_id, + underlay_group_id, + .. + } => { + if let Some(expected_count) = expected_member_count { + assert_eq!( + members.len(), + expected_count, + "DPD underlay group member count mismatch in {test_context}: expected {expected_count}, got {}", + members.len() + ); + } + + // Assert all underlay members use rear (backplane) ports with Underlay direction + for member in members { + assert!( + matches!( + member.port_id, + dpd_client::types::PortId::Rear(_) + ), + "Underlay member should use rear (backplane) port, got: {:?}", + member.port_id + ); + assert_eq!( + member.direction, + dpd_client::types::Direction::Underlay, + "Underlay member should have Underlay direction" + ); + } + + // Validate underlay group specific fields + assert_ne!( + *external_group_id, 0, + "DPD external_group_id should be non-zero in {test_context}" + ); + assert_ne!( + *underlay_group_id, 0, + "DPD underlay_group_id should be non-zero in {test_context}" + ); + } + } +} + +/// Test source_ips updates and multicast group validation. +/// Verifies proper ASM/SSM handling, validation of invalid transitions, and mixed pool allocation. +#[nexus_test] +async fn test_multicast_source_ips_update(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + let project_name = "source-update-project"; + + // Create project and separate ASM and SSM pools + create_project(&client, project_name).await; + + // Create ASM pool for ASM testing + let asm_pool = create_multicast_ip_pool_with_range( + &client, + "asm-update-pool", + (224, 99, 0, 10), + (224, 99, 0, 50), + ) + .await; + + // Create SSM pool for SSM testing + let ssm_pool = create_multicast_ip_pool_with_range( + &client, + "ssm-update-pool", + (232, 99, 0, 10), + (232, 99, 0, 50), + ) + .await; + + let group_url = "/v1/multicast-groups".to_string(); + + // Negative: creating in SSM pool without sources should be rejected + let ssm_no_sources = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "ssm-no-sources".parse().unwrap(), + description: "should fail: SSM pool requires sources".to_string(), + }, + multicast_ip: None, // implicit allocation + source_ips: None, // missing sources in SSM pool + pool: Some(NameOrId::Name(ssm_pool.identity.name.clone())), + mvlan: None, + }; + object_create_error( + client, + &group_url, + &ssm_no_sources, + StatusCode::BAD_REQUEST, + ) + .await; + + // Negative: creating in ASM pool with sources (implicit IP) should be rejected + let asm_with_sources = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "asm-with-sources".parse().unwrap(), + description: + "should fail: ASM pool cannot allocate SSM with sources" + .to_string(), + }, + multicast_ip: None, // implicit allocation + source_ips: Some(vec!["10.10.10.10".parse().unwrap()]), // sources present + pool: Some(NameOrId::Name(asm_pool.identity.name.clone())), + mvlan: None, + }; + let err2: HttpErrorResponseBody = object_create_error( + client, + &group_url, + &asm_with_sources, + StatusCode::BAD_REQUEST, + ) + .await; + assert!( + err2.message + .contains("Cannot allocate SSM multicast group from ASM pool"), + "Expected ASM pool + sources to be rejected, got: {}", + err2.message + ); + + // Create ASM group (no sources) + let asm_group_name = "asm-group"; + let asm_create_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(asm_group_name).parse().unwrap(), + description: "ASM group for testing".to_string(), + }, + multicast_ip: None, + source_ips: None, // No sources = ASM + pool: Some(NameOrId::Name(asm_pool.identity.name.clone())), + mvlan: None, + }; + + let asm_group = object_create::<_, MulticastGroup>( + client, + &group_url, + &asm_create_params, + ) + .await; + wait_for_group_active(client, asm_group_name).await; + + // Verify ASM group allocation (should get any available multicast address) + assert!( + asm_group.source_ips.is_empty(), + "ASM group should have no sources" + ); + + // ASM group updates (valid operations) + + // Description-only update (always valid) + let description_update = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: Some("Updated ASM description".to_string()), + }, + source_ips: None, + mvlan: None, + }; + let updated_asm: MulticastGroup = object_put( + client, + &mcast_group_url(asm_group_name), + &description_update, + ) + .await; + assert_eq!(updated_asm.identity.description, "Updated ASM description"); + assert!(updated_asm.source_ips.is_empty()); + + // Try invalid ASM→SSM transition (should be rejected) + let invalid_ssm_update = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + source_ips: Some(vec!["10.1.1.1".parse().unwrap()]), // Try to add sources + mvlan: None, + }; + + object_put_error( + client, + &mcast_group_url(asm_group_name), + &invalid_ssm_update, + StatusCode::BAD_REQUEST, + ) + .await; + + // Create SSM group from scratch (with explicit SSM IP and sources) + let ssm_group_name = "ssm-group"; + let ssm_create_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(ssm_group_name).parse().unwrap(), + description: "SSM group with explicit SSM address".to_string(), + }, + multicast_ip: Some("232.99.0.20".parse().unwrap()), // Explicit SSM IP required + source_ips: Some(vec!["10.2.2.2".parse().unwrap()]), // SSM sources from start + pool: Some(NameOrId::Name(ssm_pool.identity.name.clone())), + mvlan: None, + }; + + let ssm_group = object_create::<_, MulticastGroup>( + client, + &group_url, + &ssm_create_params, + ) + .await; + wait_for_group_active(client, ssm_group_name).await; + + // Verify SSM group has correct explicit IP and sources + assert_eq!(ssm_group.multicast_ip.to_string(), "232.99.0.20"); + assert_eq!(ssm_group.source_ips.len(), 1); + assert_eq!(ssm_group.source_ips[0].to_string(), "10.2.2.2"); + + // Create SSM group with mvlan at creation time + let ssm_with_mvlan_name = "ssm-group-with-mvlan"; + let ssm_with_mvlan_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(ssm_with_mvlan_name).parse().unwrap(), + description: "SSM group created with mvlan".to_string(), + }, + multicast_ip: Some("232.99.0.30".parse().unwrap()), + source_ips: Some(vec!["10.7.7.7".parse().unwrap()]), + pool: Some(NameOrId::Name(ssm_pool.identity.name.clone())), + mvlan: Some(VlanID::new(2048).unwrap()), // Create with mvlan + }; + let ssm_with_mvlan_created = object_create::<_, MulticastGroup>( + client, + &group_url, + &ssm_with_mvlan_params, + ) + .await; + wait_for_group_active(client, ssm_with_mvlan_name).await; + + assert_eq!(ssm_with_mvlan_created.multicast_ip.to_string(), "232.99.0.30"); + assert_eq!(ssm_with_mvlan_created.source_ips.len(), 1); + assert_eq!( + ssm_with_mvlan_created.mvlan, + Some(VlanID::new(2048).unwrap()), + "SSM group should be created with mvlan" + ); + + // Valid SSM group updates + + // Update SSM sources (valid - SSM→SSM) + let ssm_update = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + source_ips: Some(vec![ + "10.3.3.3".parse().unwrap(), + "10.3.3.4".parse().unwrap(), + ]), + mvlan: None, + }; + let updated_ssm: MulticastGroup = + object_put(client, &mcast_group_url(ssm_group_name), &ssm_update).await; + + // Wait for update saga to complete + wait_for_group_dpd_update( + cptestctx, + &updated_ssm.multicast_ip, + dpd_predicates::expect_external_group(), + "source_ips update saga completed", + ) + .await; + + assert_eq!(updated_ssm.source_ips.len(), 2); + let source_strings: std::collections::HashSet = + updated_ssm.source_ips.iter().map(|ip| ip.to_string()).collect(); + assert!(source_strings.contains("10.3.3.3")); + assert!(source_strings.contains("10.3.3.4")); + + // Valid SSM source reduction (but must maintain at least one source) + let ssm_source_reduction = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + source_ips: Some(vec!["10.3.3.3".parse().unwrap()]), // Reduce to one source + mvlan: None, + }; + let reduced_ssm: MulticastGroup = object_put( + client, + &mcast_group_url(ssm_group_name), + &ssm_source_reduction, + ) + .await; + + // Wait for source reduction saga to complete + wait_for_group_dpd_update( + cptestctx, + &reduced_ssm.multicast_ip, + dpd_predicates::expect_external_group(), + "source_ips reduction saga completed", + ) + .await; + + assert_eq!( + reduced_ssm.source_ips.len(), + 1, + "SSM group should have exactly one source after reduction" + ); + assert_eq!(reduced_ssm.source_ips[0].to_string(), "10.3.3.3"); + + // Test SSM group with mvlan (combined features) + let ssm_update_with_mvlan = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + source_ips: Some(vec![ + "10.4.4.4".parse().unwrap(), + "10.4.4.5".parse().unwrap(), + ]), + mvlan: Some(Nullable(Some(VlanID::new(2500).unwrap()))), // Set mvlan on SSM group + }; + let ssm_with_mvlan: MulticastGroup = object_put( + client, + &mcast_group_url(ssm_group_name), + &ssm_update_with_mvlan, + ) + .await; + + // Wait for combined source_ips+mvlan update saga to complete + // Must verify vlan_id was applied to DPD + wait_for_group_dpd_update( + cptestctx, + &ssm_with_mvlan.multicast_ip, + dpd_predicates::expect_vlan_id(2500), + "source_ips+mvlan update saga completed, vlan_id=2500", + ) + .await; + + assert_eq!(ssm_with_mvlan.source_ips.len(), 2); + assert_eq!( + ssm_with_mvlan.mvlan, + Some(VlanID::new(2500).unwrap()), + "SSM group should support mvlan" + ); + + // Update mvlan while keeping sources + let update_mvlan_only = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + source_ips: None, // Don't change sources + mvlan: Some(Nullable(Some(VlanID::new(3000).unwrap()))), + }; + let mvlan_updated: MulticastGroup = object_put( + client, + &mcast_group_url(ssm_group_name), + &update_mvlan_only, + ) + .await; + assert_eq!(mvlan_updated.mvlan, Some(VlanID::new(3000).unwrap())); + assert_eq!( + mvlan_updated.source_ips.len(), + 2, + "Sources should be unchanged" + ); + + // Clear mvlan while updating sources + let clear_mvlan_update_sources = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + source_ips: Some(vec!["10.5.5.5".parse().unwrap()]), + mvlan: Some(Nullable(None)), // Clear mvlan + }; + let mvlan_cleared: MulticastGroup = object_put( + client, + &mcast_group_url(ssm_group_name), + &clear_mvlan_update_sources, + ) + .await; + assert_eq!(mvlan_cleared.mvlan, None, "MVLAN should be cleared"); + assert_eq!(mvlan_cleared.source_ips.len(), 1); + assert_eq!(mvlan_cleared.source_ips[0].to_string(), "10.5.5.5"); + + // Create SSM group that requires proper address validation + let ssm_explicit_name = "ssm-explicit"; + let ssm_explicit_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(ssm_explicit_name).parse().unwrap(), + description: "SSM group with explicit 232.x.x.x IP".to_string(), + }, + multicast_ip: Some("232.99.0.42".parse().unwrap()), // Explicit SSM IP + source_ips: Some(vec!["10.5.5.5".parse().unwrap()]), + pool: Some(NameOrId::Name(ssm_pool.identity.name.clone())), + mvlan: None, + }; + + let ssm_explicit = object_create::<_, MulticastGroup>( + client, + &group_url, + &ssm_explicit_params, + ) + .await; + wait_for_group_active(client, ssm_explicit_name).await; + + assert_eq!(ssm_explicit.multicast_ip.to_string(), "232.99.0.42"); + assert_eq!(ssm_explicit.source_ips.len(), 1); + + // Try creating SSM group with invalid IP (should be rejected) + let invalid_ssm_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "invalid-ssm".parse().unwrap(), + description: "Should be rejected".to_string(), + }, + multicast_ip: Some("224.99.0.42".parse().unwrap()), // ASM IP with sources + source_ips: Some(vec!["10.6.6.6".parse().unwrap()]), // Sources with ASM IP + pool: Some(NameOrId::Name(ssm_pool.identity.name.clone())), + mvlan: None, + }; + + object_create_error( + client, + &group_url, + &invalid_ssm_params, + StatusCode::BAD_REQUEST, + ) + .await; + + // Clean up all groups + for group_name in [asm_group_name, ssm_group_name, ssm_explicit_name] { + let delete_url = mcast_group_url(group_name); + object_delete(client, &delete_url).await; + } +} + +#[nexus_test] +async fn test_multicast_group_with_mvlan(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + let project_name = "mvlan-test-project"; + let group_name = "mvlan-test-group"; + + // Setup + create_project(&client, project_name).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mvlan-pool", + (224, 50, 0, 10), + (224, 50, 0, 255), + ) + .await; + + let group_url = "/v1/multicast-groups".to_string(); + + // Test creating group with mvlan + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Group with MVLAN for external uplink forwarding" + .to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: Some(VlanID::new(100).unwrap()), // Set MVLAN to 100 + }; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + + wait_for_group_active(client, group_name).await; + + // Verify mvlan was set correctly + assert_eq!( + created_group.mvlan, + Some(VlanID::new(100).unwrap()), + "MVLAN should be set to 100" + ); + assert_eq!(created_group.identity.name, group_name); + + // Verify we can fetch it and mvlan persists + let fetched_group_url = mcast_group_url(group_name); + let fetched_group: MulticastGroup = + object_get(client, &fetched_group_url).await; + assert_eq!( + fetched_group.mvlan, + Some(VlanID::new(100).unwrap()), + "MVLAN should persist after fetch" + ); + + // DPD Validation: Verify mvlan is propagated to dataplane as vlan_id + let dpd_client = dpd_client(cptestctx); + let dpd_group = dpd_client + .multicast_group_get(&created_group.multicast_ip) + .await + .expect("Multicast group should exist in dataplane"); + + // Extract vlan_id from DPD response and verify it matches mvlan + match dpd_group.into_inner() { + dpd_types::MulticastGroupResponse::External { + external_forwarding, + .. + } => { + assert_eq!( + external_forwarding.vlan_id, + Some(100), + "DPD external_forwarding.vlan_id should match group mvlan" + ); + } + dpd_types::MulticastGroupResponse::Underlay { .. } => { + panic!("Expected external group, got underlay group"); + } + } + + // Clean up + object_delete(client, &fetched_group_url).await; + wait_for_group_deleted(client, group_name).await; +} + +#[nexus_test] +async fn test_multicast_group_mvlan_updates( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "mvlan-update-project"; + let group_name = "mvlan-update-group"; + + // Setup + create_project(&client, project_name).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mvlan-update-pool", + (224, 51, 0, 10), + (224, 51, 0, 255), + ) + .await; + + let group_url = "/v1/multicast-groups".to_string(); + + // Create group without mvlan + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Group for MVLAN update testing".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, // Start without MVLAN + }; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + + wait_for_group_active(client, group_name).await; + + assert_eq!(created_group.mvlan, None, "MVLAN should initially be None"); + + let group_update_url = mcast_group_url(group_name); + + // Set mvlan to a value + let set_mvlan_update = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + source_ips: None, + mvlan: Some(Nullable(Some(VlanID::new(200).unwrap()))), // Set to 200 + }; + + let updated_group: MulticastGroup = + object_put(client, &group_update_url, &set_mvlan_update).await; + assert_eq!( + updated_group.mvlan, + Some(VlanID::new(200).unwrap()), + "MVLAN should be set to 200" + ); + + // Change mvlan to a different value + let change_mvlan_update = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + source_ips: None, + mvlan: Some(Nullable(Some(VlanID::new(300).unwrap()))), // Change to 300 + }; + + let changed_group: MulticastGroup = + object_put(client, &group_update_url, &change_mvlan_update).await; + assert_eq!( + changed_group.mvlan, + Some(VlanID::new(300).unwrap()), + "MVLAN should be changed to 300" + ); + + // Clear mvlan back to None + let clear_mvlan_update = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + source_ips: None, + mvlan: Some(Nullable(None)), // Clear to NULL + }; + + let cleared_group: MulticastGroup = + object_put(client, &group_update_url, &clear_mvlan_update).await; + assert_eq!(cleared_group.mvlan, None, "MVLAN should be cleared to None"); + + // Set mvlan again, then test omitting the field preserves existing value + let set_mvlan_200 = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + source_ips: None, + mvlan: Some(Nullable(Some(VlanID::new(200).unwrap()))), + }; + + let group_with_200: MulticastGroup = + object_put(client, &group_update_url, &set_mvlan_200).await; + assert_eq!( + group_with_200.mvlan, + Some(VlanID::new(200).unwrap()), + "MVLAN should be set to 200" + ); + + // Omit mvlan field entirely - should preserve existing value (200) + let omit_mvlan_update = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: Some("Updated description".to_string()), + }, + source_ips: None, + mvlan: None, // Omit the field + }; + + let unchanged_group: MulticastGroup = + object_put(client, &group_update_url, &omit_mvlan_update).await; + assert_eq!( + unchanged_group.mvlan, + Some(VlanID::new(200).unwrap()), + "MVLAN should remain at 200 when field is omitted" + ); + assert_eq!( + unchanged_group.identity.description, "Updated description", + "Description should be updated" + ); + + // Test invalid mvlan during update (reserved value 1) + let invalid_mvlan_update = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + source_ips: None, + mvlan: Some(Nullable(Some(VlanID::new(1).unwrap()))), // Reserved value + }; + + object_put_error( + client, + &group_update_url, + &invalid_mvlan_update, + StatusCode::BAD_REQUEST, + ) + .await; + + // Clean up + object_delete(client, &group_update_url).await; + wait_for_group_deleted(client, group_name).await; +} + +#[nexus_test] +async fn test_multicast_group_mvlan_validation( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "mvlan-validation-project"; + + // Setup + create_project(&client, project_name).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mvlan-validation-pool", + (224, 52, 0, 10), + (224, 52, 0, 255), + ) + .await; + + let group_url = "/v1/multicast-groups".to_string(); + + // Test valid MVLAN values (2-4094) + // Note: VLANs 0 and 1 are reserved and rejected by Dendrite (>= 2 required) + // VLAN 4095 is reserved per IEEE 802.1Q and rejected by VlanID type (max 4094) + + // Valid: mid-range value + let mid_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "mvlan-mid".parse().unwrap(), + description: "Group with mid-range MVLAN".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: Some(VlanID::new(2048).unwrap()), + }; + + let mid_group: MulticastGroup = + object_create(client, &group_url, &mid_params).await; + wait_for_group_active(client, "mvlan-mid").await; + assert_eq!( + mid_group.mvlan, + Some(VlanID::new(2048).unwrap()), + "MVLAN 2048 should be valid" + ); + object_delete(client, &mcast_group_url("mvlan-mid")).await; + wait_for_group_deleted(client, "mvlan-mid").await; + + // Valid: maximum value (4094) + let max_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "mvlan-max".parse().unwrap(), + description: "Group with maximum MVLAN".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: Some(VlanID::new(4094).unwrap()), + }; + + let max_group: MulticastGroup = + object_create(client, &group_url, &max_params).await; + wait_for_group_active(client, "mvlan-max").await; + assert_eq!( + max_group.mvlan, + Some(VlanID::new(4094).unwrap()), + "MVLAN 4094 should be valid" + ); + object_delete(client, &mcast_group_url("mvlan-max")).await; + wait_for_group_deleted(client, "mvlan-max").await; + + // Invalid: reserved value 0 (rejected by Dendrite) + let invalid_params0 = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "mvlan-invalid-0".parse().unwrap(), + description: "Group with invalid MVLAN 0".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: Some(VlanID::new(0).unwrap()), + }; + + object_create_error( + client, + &group_url, + &invalid_params0, + StatusCode::BAD_REQUEST, + ) + .await; + + // Invalid: reserved value 1 (rejected by Dendrite) + let invalid_params1 = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "mvlan-invalid-1".parse().unwrap(), + description: "Group with invalid MVLAN 1".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: Some(VlanID::new(1).unwrap()), + }; + + object_create_error( + client, + &group_url, + &invalid_params1, + StatusCode::BAD_REQUEST, + ) + .await; + + // Test invalid MVLAN at API boundary using raw JSON. + // The deserializer rejects invalid values at the HTTP boundary before they + // reach the business logic layer. + + // Invalid: raw JSON with mvlan = 0 (should get 400 Bad Request) + let raw_json0 = serde_json::json!({ + "identity": { + "name": "mvlan-raw-0", + "description": "Test raw JSON with mvlan 0" + }, + "mvlan": 0, + "pool": mcast_pool.identity.name + }); + + NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &group_url) + .body(Some(&raw_json0)) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Expected 400 Bad Request for raw JSON mvlan=0"); + + // Invalid: raw JSON with mvlan = 1 (should get 400 Bad Request) + let raw_json1 = serde_json::json!({ + "identity": { + "name": "mvlan-raw-1", + "description": "Test raw JSON with mvlan 1" + }, + "mvlan": 1, + "pool": mcast_pool.identity.name + }); + + NexusRequest::new( + RequestBuilder::new(client, http::Method::POST, &group_url) + .body(Some(&raw_json1)) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Expected 400 Bad Request for raw JSON mvlan=1"); +} + +/// Database round-trip tests for MVLAN values +/// Verifies that VlanID <-> i16 conversion works correctly for all valid values +#[nexus_test] +async fn test_mvlan_database_round_trip(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + let project_name = "mvlan-roundtrip-project"; + + // Setup + create_project(&client, project_name).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mvlan-roundtrip-pool", + (224, 53, 0, 10), + (224, 53, 0, 255), + ) + .await; + + let group_url = "/v1/multicast-groups".to_string(); + + // Test cases: (group_name, mvlan_value) + let test_cases = vec![ + ("mvlan-none", None), + ("mvlan-2", Some(VlanID::new(2).unwrap())), + ("mvlan-100", Some(VlanID::new(100).unwrap())), + ("mvlan-4094", Some(VlanID::new(4094).unwrap())), + ]; + + for (group_name, mvlan) in &test_cases { + // Create group with specified mvlan + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: group_name.parse().unwrap(), + description: format!("Testing mvlan={mvlan:?}"), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: *mvlan, + }; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + wait_for_group_active(client, group_name).await; + + // Verify the created group has the correct mvlan + assert_eq!( + created_group.mvlan, *mvlan, + "Created group should have mvlan={:?}", + mvlan + ); + + // Fetch the group back from the database and verify it matches + let fetched_group = get_multicast_group(client, group_name).await; + assert_eq!( + fetched_group.mvlan, *mvlan, + "Fetched group should have mvlan={:?}", + mvlan + ); + assert_eq!( + fetched_group.identity.id, created_group.identity.id, + "Fetched group ID should match created group ID" + ); + + // Clean up + object_delete(client, &mcast_group_url(group_name)).await; + wait_for_group_deleted(client, group_name).await; + } +} + +#[nexus_test] +async fn test_multicast_group_mvlan_with_member_operations( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "mvlan-member-project"; + let group_name = "mvlan-member-group"; + let instance_name = "mvlan-test-instance"; + + // Setup + create_default_ip_pool(&client).await; + create_project(&client, project_name).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mvlan-member-pool", + (224, 60, 0, 10), + (224, 60, 0, 50), + ) + .await; + + let group_url = "/v1/multicast-groups".to_string(); + + // Create group with mvlan + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Group for testing mvlan with members".to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: Some(VlanID::new(2048).unwrap()), // Set MVLAN + }; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + wait_for_group_active(client, group_name).await; + + assert_eq!(created_group.mvlan, Some(VlanID::new(2048).unwrap())); + + // Create and start instance + let instance = instance_for_multicast_groups( + cptestctx, + project_name, + instance_name, + true, // start the instance + &[], // no groups at creation + ) + .await; + + // Attach instance to group with mvlan + multicast_group_attach(cptestctx, project_name, instance_name, group_name) + .await; + + // Wait for member to reach Joined state + wait_for_member_state( + cptestctx, + group_name, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + // Verify DPD shows vlan_id=2048 + let dpd_client = dpd_client(cptestctx); + let dpd_group = dpd_client + .multicast_group_get(&created_group.multicast_ip) + .await + .expect("Multicast group should exist in DPD"); + + match dpd_group.into_inner() { + dpd_types::MulticastGroupResponse::External { + external_forwarding, + .. + } => { + assert_eq!( + external_forwarding.vlan_id, + Some(2048), + "DPD should show vlan_id matching group mvlan" + ); + } + dpd_types::MulticastGroupResponse::Underlay { .. } => { + panic!("Expected external group, got underlay"); + } + } + + // Clean up: stop instance before deleting + let instance_stop_url = + format!("/v1/instances/{instance_name}/stop?project={project_name}"); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &instance_stop_url) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Should stop instance"); + + let nexus = &cptestctx.server.server_context().nexus; + let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); + instance_simulate(nexus, &instance_id).await; + instance_wait_for_state(client, instance_id, InstanceState::Stopped).await; + + let instance_url = + format!("/v1/instances/{instance_name}?project={project_name}"); + object_delete(client, &instance_url).await; + object_delete(client, &mcast_group_url(group_name)).await; + wait_for_group_deleted(client, group_name).await; +} + +#[nexus_test] +async fn test_multicast_group_mvlan_reconciler_update( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "mvlan-reconciler-project"; + let group_name = "mvlan-reconciler-group"; + let instance_name = "mvlan-reconciler-instance"; + + // Setup + create_default_ip_pool(&client).await; + create_project(&client, project_name).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mvlan-reconciler-pool", + (224, 70, 0, 10), + (224, 70, 0, 50), + ) + .await; + + let group_url = "/v1/multicast-groups".to_string(); + + // Create group with initial mvlan=2000 + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: String::from(group_name).parse().unwrap(), + description: "Group for testing reconciler mvlan updates" + .to_string(), + }, + multicast_ip: None, + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: Some(VlanID::new(2000).unwrap()), + }; + + let created_group: MulticastGroup = + object_create(client, &group_url, ¶ms).await; + wait_for_group_active(client, group_name).await; + + // Create and start instance, attach to group + let instance = instance_for_multicast_groups( + cptestctx, + project_name, + instance_name, + true, // start the instance + &[], + ) + .await; + + multicast_group_attach(cptestctx, project_name, instance_name, group_name) + .await; + wait_for_member_state( + cptestctx, + group_name, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + // Verify initial mvlan in DPD + let dpd_client = dpd_client(cptestctx); + let initial_dpd_group = dpd_client + .multicast_group_get(&created_group.multicast_ip) + .await + .expect("Group should exist in DPD"); + + match initial_dpd_group.into_inner() { + dpd_types::MulticastGroupResponse::External { + external_forwarding, + .. + } => { + assert_eq!( + external_forwarding.vlan_id, + Some(2000), + "DPD should show initial vlan_id=2000" + ); + } + dpd_types::MulticastGroupResponse::Underlay { .. } => { + panic!("Expected external group"); + } + } + + // Update mvlan to 3500 while member is active + let update_mvlan = MulticastGroupUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + source_ips: None, + mvlan: Some(Nullable(Some(VlanID::new(3500).unwrap()))), // Update to 3500 + }; + + let updated_group: MulticastGroup = + object_put(client, &mcast_group_url(group_name), &update_mvlan).await; + assert_eq!( + updated_group.mvlan, + Some(VlanID::new(3500).unwrap()), + "Group mvlan should be updated" + ); + + // Wait for reconciler to process the mvlan change and verify DPD state + wait_for_group_dpd_update( + cptestctx, + &created_group.multicast_ip, + dpd_predicates::expect_vlan_id(3500), + "vlan_id = Some(3500)", + ) + .await; + + // Member should still be Joined after mvlan update + let members = list_multicast_group_members(client, group_name).await; + assert_eq!(members.len(), 1); + assert_eq!( + members[0].state, "Joined", + "Member should remain Joined after mvlan update" + ); + + // Clean up: stop instance before deleting + let instance_stop_url = + format!("/v1/instances/{instance_name}/stop?project={project_name}"); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &instance_stop_url) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Should stop instance"); + + let nexus = &cptestctx.server.server_context().nexus; + let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); + instance_simulate(nexus, &instance_id).await; + instance_wait_for_state(client, instance_id, InstanceState::Stopped).await; + + let instance_url = + format!("/v1/instances/{instance_name}?project={project_name}"); + object_delete(client, &instance_url).await; + object_delete(client, &mcast_group_url(group_name)).await; + wait_for_group_deleted(client, group_name).await; +} + +/// Assert that two multicast groups are equal in all fields. +fn assert_groups_eq(left: &MulticastGroup, right: &MulticastGroup) { + assert_eq!(left.identity.id, right.identity.id); + assert_eq!(left.identity.name, right.identity.name); + assert_eq!(left.identity.description, right.identity.description); + assert_eq!(left.multicast_ip, right.multicast_ip); + assert_eq!(left.source_ips, right.source_ips); + assert_eq!(left.mvlan, right.mvlan); + assert_eq!(left.ip_pool_id, right.ip_pool_id); +} diff --git a/nexus/tests/integration_tests/multicast/instances.rs b/nexus/tests/integration_tests/multicast/instances.rs new file mode 100644 index 00000000000..335a269e049 --- /dev/null +++ b/nexus/tests/integration_tests/multicast/instances.rs @@ -0,0 +1,1632 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/ +// +// Copyright 2025 Oxide Computer Company + +//! Tests multicast group + instance integration. +//! +//! Tests that verify multicast group functionality when integrated with +//! instance creation, modification, and deletion. + +use std::net::{IpAddr, Ipv4Addr}; + +use http::{Method, StatusCode}; +use nexus_db_queries::context::OpContext; +use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; +use nexus_test_utils::resource_helpers::{ + create_default_ip_pool, create_instance, create_project, object_create, + object_delete, object_get, +}; +use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::params::{ + InstanceCreate, InstanceNetworkInterfaceAttachment, MulticastGroupCreate, + MulticastGroupMemberAdd, +}; +use nexus_types::external_api::views::{MulticastGroup, MulticastGroupMember}; +use nexus_types::internal_api::params::InstanceMigrateRequest; + +use omicron_common::api::external::{ + ByteCount, IdentityMetadataCreateParams, Instance, InstanceCpuCount, + InstanceState, NameOrId, +}; +use omicron_common::vlan::VlanID; +use omicron_nexus::TestInterfaces; +use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; +use sled_agent_client::TestInterfaces as _; + +use super::*; +use crate::integration_tests::instances::{ + instance_simulate, instance_wait_for_state, +}; + +const PROJECT_NAME: &str = "test-project"; + +/// Consolidated multicast lifecycle test that combines multiple scenarios. +#[nexus_test] +async fn test_multicast_lifecycle(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + // Setup - create IP pool and project (shared across all operations) + create_default_ip_pool(&client).await; + create_project(client, PROJECT_NAME).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mcast-pool-comprehensive", + (224, 30, 0, 1), // Large range: 224.30.0.1 + (224, 30, 0, 255), // to 224.30.0.255 (255 IPs) + ) + .await; + + // Create multiple multicast groups in parallel + let group_specs = &[ + MulticastGroupForTest { + name: "group-lifecycle-1", + multicast_ip: IpAddr::V4(Ipv4Addr::new(224, 30, 0, 101)), + description: Some("Group for lifecycle testing 1".to_string()), + }, + MulticastGroupForTest { + name: "group-lifecycle-2", + multicast_ip: IpAddr::V4(Ipv4Addr::new(224, 30, 0, 102)), + description: Some("Group for lifecycle testing 2".to_string()), + }, + MulticastGroupForTest { + name: "group-lifecycle-3", + multicast_ip: IpAddr::V4(Ipv4Addr::new(224, 30, 0, 103)), + description: Some("Group for lifecycle testing 3".to_string()), + }, + MulticastGroupForTest { + name: "group-lifecycle-4", + multicast_ip: IpAddr::V4(Ipv4Addr::new(224, 30, 0, 104)), + description: Some("Group for lifecycle testing 4".to_string()), + }, + ]; + + let groups = + create_multicast_groups(client, &mcast_pool, group_specs).await; + + // Wait for all groups to become active in parallel + let group_names: Vec<&str> = group_specs.iter().map(|g| g.name).collect(); + wait_for_groups_active(client, &group_names).await; + + // Create multiple instances in parallel - test various attachment scenarios + let instances = vec![ + // Instance with group attached at creation + instance_for_multicast_groups( + cptestctx, + PROJECT_NAME, + "instance-create-attach", + false, + &["group-lifecycle-1"], + ) + .await, + // Instances for live attach/detach testing + instance_for_multicast_groups( + cptestctx, + PROJECT_NAME, + "instance-live-1", + false, + &[], + ) + .await, + instance_for_multicast_groups( + cptestctx, + PROJECT_NAME, + "instance-live-2", + false, + &[], + ) + .await, + // Instance for multi-group testing + instance_for_multicast_groups( + cptestctx, + PROJECT_NAME, + "instance-multi-groups", + false, + &[], + ) + .await, + ]; + + // Verify create-time attachment worked + wait_for_member_state( + cptestctx, + "group-lifecycle-1", + instances[0].identity.id, + // Instance is stopped, so should be "Left" + nexus_db_model::MulticastGroupMemberState::Left, + ) + .await; + + // Live attach/detach operations + // Attach instance-live-1 to group-lifecycle-2 + multicast_group_attach( + cptestctx, + PROJECT_NAME, + "instance-live-1", + "group-lifecycle-2", + ) + .await; + + // Attach instance-live-2 to group-lifecycle-2 (test multiple instances per group) + multicast_group_attach( + cptestctx, + PROJECT_NAME, + "instance-live-2", + "group-lifecycle-2", + ) + .await; + + // Verify both instances are attached to group-lifecycle-2 + for i in 0..2 { + wait_for_member_state( + cptestctx, + "group-lifecycle-2", + instances[i + 1].identity.id, + nexus_db_model::MulticastGroupMemberState::Left, // Stopped instances + ) + .await; + } + + // Multi-group attachment (instance to multiple groups) + // Attach instance-multi-groups to multiple groups + multicast_group_attach( + cptestctx, + PROJECT_NAME, + "instance-multi-groups", + "group-lifecycle-3", + ) + .await; + + multicast_group_attach( + cptestctx, + PROJECT_NAME, + "instance-multi-groups", + "group-lifecycle-4", + ) + .await; + + // Verify multi-group membership + for group_name in ["group-lifecycle-3", "group-lifecycle-4"] { + wait_for_member_state( + cptestctx, + group_name, + instances[3].identity.id, + nexus_db_model::MulticastGroupMemberState::Left, // Stopped instance + ) + .await; + } + + // Detach operations and idempotency + // Detach instance-live-1 from group-lifecycle-2 + multicast_group_detach( + client, + PROJECT_NAME, + "instance-live-1", + "group-lifecycle-2", + ) + .await; + + // Test idempotency - detach again (should not error) + multicast_group_detach( + client, + PROJECT_NAME, + "instance-live-1", + "group-lifecycle-2", + ) + .await; + + // Verify instance-live-1 is no longer a member of group-lifecycle-2 + let members = + nexus_test_utils::http_testing::NexusRequest::iter_collection_authn::< + MulticastGroupMember, + >( + client, + &mcast_group_members_url("group-lifecycle-2"), + &format!("project={PROJECT_NAME}"), + None, + ) + .await + .expect("Should list multicast group members") + .all_items; + + // Should only have instance-live-2 as member now + assert_eq!( + members.len(), + 1, + "group-lifecycle-2 should have 1 member after detach" + ); + assert_eq!(members[0].instance_id, instances[2].identity.id); + + // Verify groups are still active and functional + for (i, group_name) in group_names.iter().enumerate() { + let group_url = mcast_group_url(group_name); + let current_group: MulticastGroup = + object_get(client, &group_url).await; + assert_eq!( + current_group.state, "Active", + "Group {group_name} should remain Active throughout lifecycle" + ); + assert_eq!(current_group.identity.id, groups[i].identity.id); + } + + // Cleanup - use our parallel cleanup functions + cleanup_instances( + cptestctx, + client, + PROJECT_NAME, + &[ + "instance-create-attach", + "instance-live-1", + "instance-live-2", + "instance-multi-groups", + ], + ) + .await; + + cleanup_multicast_groups(client, &group_names).await; +} + +#[nexus_test] +async fn test_multicast_group_attach_conflicts( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + create_default_ip_pool(&client).await; + create_project(client, PROJECT_NAME).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "mcast-pool-conflicts", + (224, 23, 0, 1), // Unique range: 224.23.0.1 + (224, 23, 0, 255), // to 224.23.0.255 + ) + .await; + + // Create a multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 23, 0, 103)); + let group_url = "/v1/multicast-groups".to_string(); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "mcast-group-1".parse().unwrap(), + description: "Group for conflict testing".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + object_create::<_, MulticastGroup>(client, &group_url, ¶ms).await; + + // Wait for group to become Active before proceeding + wait_for_group_active(client, "mcast-group-1").await; + + // Create first instance with the multicast group + instance_for_multicast_groups( + cptestctx, + PROJECT_NAME, + "mcast-instance-1", + false, + &["mcast-group-1"], + ) + .await; + + // Create second instance with the same multicast group + // This should succeed (multicast groups can have multiple members, unlike floating IPs) + instance_for_multicast_groups( + cptestctx, + PROJECT_NAME, + "mcast-instance-2", + false, + &["mcast-group-1"], + ) + .await; + + // Wait for reconciler + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Verify both instances are members of the group + let members = + nexus_test_utils::http_testing::NexusRequest::iter_collection_authn::< + MulticastGroupMember, + >( + client, + &mcast_group_members_url("mcast-group-1"), + &format!("project={PROJECT_NAME}"), + None, + ) + .await + .expect("Should list multicast group members") + .all_items; + + assert_eq!( + members.len(), + 2, + "Multicast group should support multiple members (unlike floating IPs)" + ); + + // Clean up - use cleanup functions + cleanup_instances( + cptestctx, + client, + PROJECT_NAME, + &["mcast-instance-1", "mcast-instance-2"], + ) + .await; + cleanup_multicast_groups(client, &["mcast-group-1"]).await; +} + +#[nexus_test] +async fn test_multicast_group_attach_limits( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + create_default_ip_pool(&client).await; + create_project(client, PROJECT_NAME).await; + let mcast_pool = create_multicast_ip_pool(&client, "mcast-pool").await; + + // Create multiple multicast groups in parallel to test per-instance limits + let group_specs = &[ + MulticastGroupForTest { + name: "limit-test-group-0", + multicast_ip: IpAddr::V4(Ipv4Addr::new(224, 0, 1, 104)), + description: Some("Group 0 for limit testing".to_string()), + }, + MulticastGroupForTest { + name: "limit-test-group-1", + multicast_ip: IpAddr::V4(Ipv4Addr::new(224, 0, 1, 105)), + description: Some("Group 1 for limit testing".to_string()), + }, + MulticastGroupForTest { + name: "limit-test-group-2", + multicast_ip: IpAddr::V4(Ipv4Addr::new(224, 0, 1, 106)), + description: Some("Group 2 for limit testing".to_string()), + }, + MulticastGroupForTest { + name: "limit-test-group-3", + multicast_ip: IpAddr::V4(Ipv4Addr::new(224, 0, 1, 107)), + description: Some("Group 3 for limit testing".to_string()), + }, + MulticastGroupForTest { + name: "limit-test-group-4", + multicast_ip: IpAddr::V4(Ipv4Addr::new(224, 0, 1, 108)), + description: Some("Group 4 for limit testing".to_string()), + }, + ]; + + create_multicast_groups(client, &mcast_pool, group_specs).await; + let group_names: Vec<&str> = group_specs.iter().map(|g| g.name).collect(); + + // Wait for all groups to become Active in parallel + wait_for_groups_active(client, &group_names).await; + + // Try to create an instance with many multicast groups + // (Check if there's a reasonable limit per instance) + let multicast_group_names: Vec<&str> = group_names[0..3].to_vec(); + + let instance = instance_for_multicast_groups( + cptestctx, + PROJECT_NAME, + "mcast-instance-1", + false, + &multicast_group_names, // Test with 3 groups (reasonable limit) + ) + .await; + + // Wait for members to reach "Left" state for each group + // (instance is stopped, so member starts in "Left" state with no `sled_id`) + for group_name in &multicast_group_names { + wait_for_member_state( + cptestctx, + group_name, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Left, + ) + .await; + } + + // Verify instance is member of multiple groups + for group_name in &multicast_group_names { + let members_url = mcast_group_members_url(group_name); + let members = nexus_test_utils::http_testing::NexusRequest::iter_collection_authn::( + client, + &members_url, + &format!("project={PROJECT_NAME}"), + None, + ) + .await + .expect("Should list multicast group members") + .all_items; + + assert_eq!( + members.len(), + 1, + "Instance should be member of group {group_name}" + ); + assert_eq!(members[0].instance_id, instance.identity.id); + } + + // Clean up - use cleanup functions + cleanup_instances(cptestctx, client, PROJECT_NAME, &["mcast-instance-1"]) + .await; + cleanup_multicast_groups(client, &group_names).await; +} + +#[nexus_test] +async fn test_multicast_group_instance_state_transitions( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + create_default_ip_pool(&client).await; + create_project(client, PROJECT_NAME).await; + let mcast_pool = create_multicast_ip_pool(&client, "mcast-pool").await; + + // Create a multicast group with explicit IP for easy DPD validation + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 200)); + let group_url = "/v1/multicast-groups".to_string(); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "state-test-group".parse().unwrap(), + description: "Group for testing instance state transitions" + .to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + object_create::<_, MulticastGroup>(client, &group_url, ¶ms).await; + + // Wait for group to become Active before proceeding + wait_for_group_active(client, "state-test-group").await; + + // Create stopped instance and add to multicast group + let stopped_instance = instance_for_multicast_groups( + cptestctx, + PROJECT_NAME, + "state-test-instance", + false, // Create stopped + &["state-test-group"], + ) + .await; + + // Verify instance is stopped and in multicast group + assert_eq!(stopped_instance.runtime.run_state, InstanceState::Stopped); + + // Wait for member to reach "Left" state (stopped instance members start in "Left" state) + wait_for_member_state( + cptestctx, + "state-test-group", + stopped_instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Left, + ) + .await; + + // Start the instance and verify multicast behavior + let instance_id = + InstanceUuid::from_untyped_uuid(stopped_instance.identity.id); + let nexus = &cptestctx.server.server_context().nexus; + + // Start the instance using direct POST request (not PUT) + let start_url = format!( + "/v1/instances/state-test-instance/start?project={PROJECT_NAME}" + ); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &start_url) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .unwrap(); + instance_simulate(nexus, &instance_id).await; + instance_wait_for_state(&client, instance_id, InstanceState::Running).await; + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Stop the instance and verify multicast behavior persists + let stop_url = format!( + "/v1/instances/state-test-instance/stop?project={PROJECT_NAME}" + ); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &stop_url) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .unwrap(); + instance_simulate(nexus, &instance_id).await; + instance_wait_for_state(&client, instance_id, InstanceState::Stopped).await; + + // Verify control plane still shows membership regardless of instance state + let members_url = mcast_group_members_url("state-test-group"); + let final_members: Vec = + nexus_test_utils::http_testing::NexusRequest::iter_collection_authn( + client, + &members_url, + "", + None, + ) + .await + .unwrap() + .all_items; + + assert_eq!( + final_members.len(), + 1, + "Control plane should maintain multicast membership across instance state changes" + ); + assert_eq!(final_members[0].instance_id, stopped_instance.identity.id); + + // Clean up + object_delete( + client, + &format!("/v1/instances/state-test-instance?project={PROJECT_NAME}"), + ) + .await; + object_delete(client, &mcast_group_url("state-test-group")).await; +} + +/// Test that multicast group membership persists through instance stop/start cycles +/// (parallel to external IP persistence behavior) +#[nexus_test] +async fn test_multicast_group_persistence_through_stop_start( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + create_default_ip_pool(&client).await; + create_project(client, PROJECT_NAME).await; + let mcast_pool = create_multicast_ip_pool(&client, "mcast-pool").await; + + // Create a multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 0, 1, 200)); + let group_url = "/v1/multicast-groups".to_string(); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "persist-test-group".parse().unwrap(), + description: "Group for stop/start persistence testing".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + object_create::<_, MulticastGroup>(client, &group_url, ¶ms).await; + + // Wait for group to become Active + wait_for_group_active(client, "persist-test-group").await; + + // Create instance with the multicast group and start it + let instance = instance_for_multicast_groups( + cptestctx, + PROJECT_NAME, + "persist-test-instance", + true, // start the instance + &["persist-test-group"], + ) + .await; + + let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); + + // Simulate the instance transitioning to Running state + let nexus = &cptestctx.server.server_context().nexus; + instance_simulate(nexus, &instance_id).await; + + // Wait for member to be joined (reconciler will process the sled_id set by instance start) + wait_for_member_state( + cptestctx, + "persist-test-group", + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + // Verify instance is in the group + let members_url = mcast_group_members_url("persist-test-group"); + let members_before_stop = + nexus_test_utils::http_testing::NexusRequest::iter_collection_authn::< + MulticastGroupMember, + >(client, &members_url, "", None) + .await + .expect("Should list group members before stop") + .all_items; + + assert_eq!( + members_before_stop.len(), + 1, + "Group should have 1 member before stop" + ); + assert_eq!(members_before_stop[0].instance_id, instance.identity.id); + + // Stop the instance + let instance_stop_url = format!( + "/v1/instances/persist-test-instance/stop?project={PROJECT_NAME}" + ); + nexus_test_utils::http_testing::NexusRequest::new( + nexus_test_utils::http_testing::RequestBuilder::new( + client, + http::Method::POST, + &instance_stop_url, + ) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(http::StatusCode::ACCEPTED)), + ) + .authn_as(nexus_test_utils::http_testing::AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Should stop instance"); + + // Simulate the stop transition + let nexus = &cptestctx.server.server_context().nexus; + instance_simulate(nexus, &instance_id).await; + + // Wait for instance to be stopped + instance_wait_for_state( + client, + instance_id, + omicron_common::api::external::InstanceState::Stopped, + ) + .await; + + // Verify multicast group membership persists while stopped + let members_while_stopped = + nexus_test_utils::http_testing::NexusRequest::iter_collection_authn::< + MulticastGroupMember, + >(client, &members_url, "", None) + .await + .expect("Should list group members while stopped") + .all_items; + + assert_eq!( + members_while_stopped.len(), + 1, + "Group membership should persist while instance is stopped" + ); + assert_eq!(members_while_stopped[0].instance_id, instance.identity.id); + + // Start the instance again + let instance_start_url = format!( + "/v1/instances/persist-test-instance/start?project={PROJECT_NAME}" + ); + nexus_test_utils::http_testing::NexusRequest::new( + nexus_test_utils::http_testing::RequestBuilder::new( + client, + http::Method::POST, + &instance_start_url, + ) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(http::StatusCode::ACCEPTED)), + ) + .authn_as(nexus_test_utils::http_testing::AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Should start instance"); + + // Simulate the instance transitioning back to "Running" state + let nexus = &cptestctx.server.server_context().nexus; + instance_simulate(nexus, &instance_id).await; + + // Wait for instance to be running again + instance_wait_for_state( + client, + instance_id, + omicron_common::api::external::InstanceState::Running, + ) + .await; + + // Verify multicast group membership still exists after restart + let members_after_restart = + nexus_test_utils::http_testing::NexusRequest::iter_collection_authn::< + MulticastGroupMember, + >(client, &members_url, "", None) + .await + .expect("Should list group members after restart") + .all_items; + + assert_eq!( + members_after_restart.len(), + 1, + "Group membership should persist after instance restart" + ); + assert_eq!(members_after_restart[0].instance_id, instance.identity.id); + + // Wait for member to be joined again after restart + wait_for_member_state( + cptestctx, + "persist-test-group", + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + // Clean up - use cleanup helper which handles stop/delete + cleanup_instances( + cptestctx, + client, + PROJECT_NAME, + &["persist-test-instance"], + ) + .await; + cleanup_multicast_groups(client, &["persist-test-group"]).await; +} + +/// Verify concurrent multicast operations maintain correct member states. +/// +/// The system handles multiple instances joining simultaneously, rapid attach/detach +/// cycles, and concurrent operations during reconciler processing. These scenarios +/// expose race conditions in member state transitions, reconciler processing, and +/// DPD synchronization that sequential tests can't catch. +#[nexus_test] +async fn test_multicast_concurrent_operations( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + create_default_ip_pool(&client).await; + create_project(client, PROJECT_NAME).await; + let mcast_pool = create_multicast_ip_pool_with_range( + &client, + "concurrent-pool", + (224, 40, 0, 1), + (224, 40, 0, 255), + ) + .await; + + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 40, 0, 100)); + let group_url = "/v1/multicast-groups".to_string(); + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: "concurrent-test-group".parse().unwrap(), + description: "Group for concurrent operations testing".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + object_create::<_, MulticastGroup>(client, &group_url, &group_params).await; + wait_for_group_active(client, "concurrent-test-group").await; + + // Create multiple instances for concurrent testing + let instance_names = [ + "concurrent-instance-1", + "concurrent-instance-2", + "concurrent-instance-3", + "concurrent-instance-4", + ]; + + // Create all instances in parallel (now that we fixed the cleanup double-delete bug) + let create_futures = instance_names + .iter() + .map(|name| create_instance(client, PROJECT_NAME, name)); + let instances = ops::join_all(create_futures).await; + + // Attach all instances to the multicast group in parallel (this is the optimization) + multicast_group_attach_bulk( + cptestctx, + PROJECT_NAME, + &instance_names, + "concurrent-test-group", + ) + .await; + + // Verify all members reached correct state despite concurrent operations + for instance in instances.iter() { + wait_for_member_state( + cptestctx, + "concurrent-test-group", + instance.identity.id, + // create_instance() starts instances, so they should be Joined + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + } + + // Verify final member count matches expected (all 4 instances) + let members = + list_multicast_group_members(client, "concurrent-test-group").await; + assert_eq!( + members.len(), + 4, + "All 4 instances should be members after concurrent addition" + ); + + // Detach first two instances concurrently + let instance_names_to_detach = + ["concurrent-instance-1", "concurrent-instance-2"]; + multicast_group_detach_bulk( + client, + PROJECT_NAME, + &instance_names_to_detach, + "concurrent-test-group", + ) + .await; + + // Wait for member count to reach 2 after detachments + wait_for_member_count(client, "concurrent-test-group", 2).await; + + // Re-attach one instance while detaching another (overlapping operations) + let reattach_future = multicast_group_attach( + cptestctx, + PROJECT_NAME, + "concurrent-instance-1", + "concurrent-test-group", + ); + let detach_future = multicast_group_detach( + client, + PROJECT_NAME, + "concurrent-instance-3", + "concurrent-test-group", + ); + + // Execute overlapping operations + ops::join2(reattach_future, detach_future).await; + + // Wait for final state to be consistent (should still have 2 members) + wait_for_member_count(client, "concurrent-test-group", 2).await; + + // Concurrent operations during reconciler processing + + // Start a member addition and immediately follow with another operation + // This tests handling of operations that arrive while reconciler is processing + let rapid_ops_future = async { + multicast_group_attach( + cptestctx, + PROJECT_NAME, + "concurrent-instance-3", + "concurrent-test-group", + ) + .await; + // Don't wait for reconciler - immediately do another operation + multicast_group_detach( + client, + PROJECT_NAME, + "concurrent-instance-4", + "concurrent-test-group", + ) + .await; + }; + + rapid_ops_future.await; + + // Wait for system to reach consistent final state (should have 2 members) + wait_for_member_count(client, "concurrent-test-group", 2).await; + + // Get the final members for state verification + let post_rapid_members = + list_multicast_group_members(client, "concurrent-test-group").await; + + // Wait for all remaining members to reach "Joined" state + for member in &post_rapid_members { + wait_for_member_state( + cptestctx, + "concurrent-test-group", + member.instance_id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + } + + // Cleanup + cleanup_instances(cptestctx, client, PROJECT_NAME, &instance_names).await; + cleanup_multicast_groups(client, &["concurrent-test-group"]).await; +} + +/// Verify that multicast members are properly cleaned up when an instance +/// is deleted without ever starting (orphaned member cleanup). +/// +/// When an instance is created and added to a multicast group but never started, +/// the member enters "Left" state with sled_id=NULL. If the instance is then +/// deleted before ever starting, the RPW reconciler must detect and clean up the +/// orphaned member. +#[nexus_test] +async fn test_multicast_member_cleanup_instance_never_started( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let project_name = "never-started-project"; + let group_name = "never-started-group"; + let instance_name = "never-started-instance"; + + // Setup: project, pools, group + create_project(client, project_name).await; + create_default_ip_pool(client).await; + let mcast_pool = create_multicast_ip_pool_with_range( + client, + "never-started-pool", + (224, 50, 0, 1), + (224, 50, 0, 255), + ) + .await; + + // Create multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 50, 0, 100)); + let group_url = "/v1/multicast-groups".to_string(); + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: group_name.parse().unwrap(), + description: "Group for never-started instance test".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + object_create::<_, MulticastGroup>(client, &group_url, &group_params).await; + wait_for_group_active(client, group_name).await; + + // Create instance but don't start it - use start: false + let instance_params = InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: "Instance that will never be started".to_string(), + }, + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(1), + hostname: instance_name.parse().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + multicast_groups: vec![], + disks: vec![], + boot_disk: None, + cpu_platform: None, + start: false, // Critical: don't start the instance + auto_restart_policy: Default::default(), + anti_affinity_groups: Vec::new(), + }; + + let instance_url = format!("/v1/instances?project={project_name}"); + let instance: Instance = + object_create(client, &instance_url, &instance_params).await; + + // Add instance as multicast member (will be in "Left" state since instance + // is stopped with no sled_id) + let member_add_url = format!( + "{}?project={project_name}", + mcast_group_members_url(group_name) + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + + object_create::<_, MulticastGroupMember>( + client, + &member_add_url, + &member_params, + ) + .await; + + // Wait for member to reach "Left" state (stopped instance with no sled_id) + wait_for_member_state( + cptestctx, + group_name, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Left, + ) + .await; + + // Verify member count + let members = list_multicast_group_members(client, group_name).await; + assert_eq!(members.len(), 1, "Should have one member"); + + // Delete the instance directly without starting it + // This simulates the case where an instance is created, added to multicast group, + // but then deleted before ever starting (never gets a sled assignment) + let instance_url = + format!("/v1/instances/{instance_name}?project={project_name}"); + object_delete(client, &instance_url).await; + + // Wait for reconciler to process the deletion + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Critical test: Verify the orphaned member was cleaned up + // The RPW reconciler should detect that the member's instance was deleted + // and remove the member from the group + let final_members = list_multicast_group_members(client, group_name).await; + assert_eq!( + final_members.len(), + 0, + "Orphaned member should be cleaned up when instance is deleted without starting" + ); + + // Verify that stale ports were removed from DPD + // Since the instance never started (never had a `sled_id`), there should be + // no rear/underlay ports in DPD for this group. This verifies the reconciler + // only removes ports when it has complete information about all "Joined" members. + + // Get the underlay group IP from the database + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.clone(), datastore.clone()); + + // Fetch the external group to get its underlay_group_id + let external_group = datastore + .multicast_group_lookup_by_ip(&opctx, multicast_ip) + .await + .expect("Should lookup external multicast group by IP"); + + let underlay_group_id = external_group + .underlay_group_id + .expect("External group should have underlay_group_id"); + + // Fetch the underlay group to get its multicast IP + let underlay_group = datastore + .underlay_multicast_group_fetch(&opctx, underlay_group_id) + .await + .expect("Should fetch underlay multicast group"); + + let underlay_multicast_ip = underlay_group.multicast_ip.ip(); + + // Query DPD for the underlay group (where instance members are stored) + let dpd_client = nexus_test_utils::dpd_client(cptestctx); + let dpd_group_response = dpd_client + .multicast_group_get(&underlay_multicast_ip) + .await + .expect("Should be able to query DPD for underlay multicast group"); + + // Extract underlay members from the response + let underlay_members = match dpd_group_response.into_inner() { + dpd_client::types::MulticastGroupResponse::Underlay { + members, .. + } => members, + dpd_client::types::MulticastGroupResponse::External { .. } => { + panic!( + "Expected underlay group when querying underlay IP, got external" + ); + } + }; + + // Filter to only rear/underlay members (instance members on backplane) + let rear_underlay_members: Vec<_> = underlay_members + .iter() + .filter(|m| { + matches!(m.port_id, dpd_client::types::PortId::Rear(_)) + && m.direction == dpd_client::types::Direction::Underlay + }) + .collect(); + + assert_eq!( + rear_underlay_members.len(), + 0, + "DPD should have no rear/underlay ports after instance deletion and reconciler run" + ); + + // Cleanup + cleanup_multicast_groups(client, &[group_name]).await; +} + +/// Verify multicast group membership persists through instance migration. +/// +/// The RPW reconciler detects sled_id changes and updates DPD configuration on +/// both source and target switches to maintain uninterrupted multicast traffic. +/// Member state follows the expected lifecycle: Joined on source sled → sled_id +/// updated during migration → Joined again on target sled after reconciler +/// processes the change. +#[nexus_test(extra_sled_agents = 1)] +async fn test_multicast_group_membership_during_migration( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let lockstep_client = &cptestctx.lockstep_client; + let nexus = &cptestctx.server.server_context().nexus; + let project_name = "migration-test-project"; + let group_name = "migration-test-group"; + let instance_name = "migration-test-instance"; + + // Setup: project, pools, and multicast group + create_project(client, project_name).await; + create_default_ip_pool(client).await; + let mcast_pool = create_multicast_ip_pool_with_range( + client, + "migration-pool", + (224, 60, 0, 1), + (224, 60, 0, 255), + ) + .await; + + // Create multicast group with mvlan + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 60, 0, 100)); + let group_url = "/v1/multicast-groups".to_string(); + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: group_name.parse().unwrap(), + description: "Group for migration testing with mvlan".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: Some(VlanID::new(3000).unwrap()), // Test mvlan persistence through migration + }; + + let created_group: MulticastGroup = + object_create(client, &group_url, &group_params).await; + wait_for_group_active(client, group_name).await; + + // Verify mvlan is set + assert_eq!( + created_group.mvlan, + Some(VlanID::new(3000).unwrap()), + "MVLAN should be set on group creation" + ); + + // Create and start instance with multicast group membership + let instance = instance_for_multicast_groups( + cptestctx, + project_name, + instance_name, + true, // start the instance + &[group_name], + ) + .await; + + let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); + + // Simulate instance startup and wait for Running state + instance_simulate(nexus, &instance_id).await; + instance_wait_for_state(client, instance_id, InstanceState::Running).await; + + // Wait for instance to reach "Joined" state (member creation is processed by reconciler) + wait_for_member_state( + cptestctx, + group_name, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + let pre_migration_members = + list_multicast_group_members(client, group_name).await; + assert_eq!(pre_migration_members.len(), 1); + assert_eq!(pre_migration_members[0].instance_id, instance.identity.id); + assert_eq!(pre_migration_members[0].state, "Joined"); + + // Verify mvlan is in DPD before migration + let dpd_client = nexus_test_utils::dpd_client(cptestctx); + let pre_migration_dpd_group = dpd_client + .multicast_group_get(&multicast_ip) + .await + .expect("Multicast group should exist in DPD before migration"); + + match pre_migration_dpd_group.into_inner() { + dpd_client::types::MulticastGroupResponse::External { + external_forwarding, + .. + } => { + assert_eq!( + external_forwarding.vlan_id, + Some(3000), + "DPD should show vlan_id=3000 before migration" + ); + } + dpd_client::types::MulticastGroupResponse::Underlay { .. } => { + panic!("Expected external group, got underlay"); + } + } + + // Get source and target sleds for migration + let source_sled_id = nexus + .active_instance_info(&instance_id, None) + .await + .unwrap() + .expect("Running instance should be on a sled") + .sled_id; + + let target_sled_id = if source_sled_id == cptestctx.first_sled_id() { + cptestctx.second_sled_id() + } else { + cptestctx.first_sled_id() + }; + + // Initiate migration + let migrate_url = format!("/instances/{instance_id}/migrate"); + nexus_test_utils::http_testing::NexusRequest::new( + nexus_test_utils::http_testing::RequestBuilder::new( + lockstep_client, + Method::POST, + &migrate_url, + ) + .body(Some(&InstanceMigrateRequest { dst_sled_id: target_sled_id })) + .expect_status(Some(StatusCode::OK)), + ) + .authn_as(nexus_test_utils::http_testing::AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Should initiate instance migration"); + + // Get propolis IDs for source and target - follow the pattern from existing tests + let info = nexus + .active_instance_info(&instance_id, None) + .await + .unwrap() + .expect("Instance should be on a sled"); + let src_propolis_id = info.propolis_id; + let dst_propolis_id = + info.dst_propolis_id.expect("Instance should have a migration target"); + + // Helper function from instances.rs + async fn vmm_simulate_on_sled( + _cptestctx: &ControlPlaneTestContext, + nexus: &std::sync::Arc, + sled_id: omicron_uuid_kinds::SledUuid, + propolis_id: omicron_uuid_kinds::PropolisUuid, + ) { + let sa = nexus.sled_client(&sled_id).await.unwrap(); + sa.vmm_finish_transition(propolis_id).await; + } + + // Complete migration on source sled and wait for instance to enter "Migrating" + vmm_simulate_on_sled(cptestctx, nexus, source_sled_id, src_propolis_id) + .await; + + // Instance should transition to "Migrating"; membership should remain "Joined" + instance_wait_for_state(client, instance_id, InstanceState::Migrating) + .await; + let migrating_members = + list_multicast_group_members(client, group_name).await; + assert_eq!( + migrating_members.len(), + 1, + "Membership should remain during migration" + ); + assert_eq!(migrating_members[0].instance_id, instance.identity.id); + assert_eq!( + migrating_members[0].state, "Joined", + "Member should stay Joined while migrating" + ); + + // Complete migration on target sled + vmm_simulate_on_sled(cptestctx, nexus, target_sled_id, dst_propolis_id) + .await; + + // Wait for migration to complete + instance_wait_for_state(client, instance_id, InstanceState::Running).await; + + // Verify instance is now on the target sled + let post_migration_sled = nexus + .active_instance_info(&instance_id, None) + .await + .unwrap() + .expect("Migrated instance should still be on a sled") + .sled_id; + + assert_eq!( + post_migration_sled, target_sled_id, + "Instance should be on target sled after migration" + ); + + // Wait for multicast reconciler to process the sled_id change + // The RPW reconciler should detect the sled_id change and re-apply DPD configuration + wait_for_multicast_reconciler(lockstep_client).await; + + // Verify multicast membership persists after migration + let post_migration_members = + list_multicast_group_members(client, group_name).await; + + assert_eq!( + post_migration_members.len(), + 1, + "Multicast membership should persist through migration" + ); + assert_eq!(post_migration_members[0].instance_id, instance.identity.id); + + // Wait for member to reach "Joined" state on target sled + // The RPW reconciler should transition the member back to "Joined" after re-applying DPD configuration + wait_for_member_state( + cptestctx, + group_name, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + let final_member_state = &post_migration_members[0]; + assert_eq!( + final_member_state.state, "Joined", + "Member should be in 'Joined' state after migration completes" + ); + + // Verify inventory-based port mapping updated correctly after migration + // This confirms the RPW reconciler correctly mapped the new sled to its rear port + verify_inventory_based_port_mapping(cptestctx, &instance_id) + .await + .expect("port mapping should be updated after migration"); + + // Verify mvlan persisted in DPD after migration + let post_migration_dpd_group = dpd_client + .multicast_group_get(&multicast_ip) + .await + .expect("Multicast group should exist in DPD after migration"); + + match post_migration_dpd_group.into_inner() { + dpd_client::types::MulticastGroupResponse::External { + external_forwarding, + .. + } => { + assert_eq!( + external_forwarding.vlan_id, + Some(3000), + "DPD should still show vlan_id=3000 after migration - mvlan must persist" + ); + } + dpd_client::types::MulticastGroupResponse::Underlay { .. } => { + panic!("Expected external group, got underlay"); + } + } + + // Cleanup: Stop and delete instance, then cleanup group + let stop_url = + format!("/v1/instances/{instance_name}/stop?project={project_name}"); + nexus_test_utils::http_testing::NexusRequest::new( + nexus_test_utils::http_testing::RequestBuilder::new( + client, + Method::POST, + &stop_url, + ) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(nexus_test_utils::http_testing::AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Should stop instance"); + + // Simulate stop and wait for stopped state + instance_simulate(nexus, &instance_id).await; + instance_wait_for_state(client, instance_id, InstanceState::Stopped).await; + + // Delete instance and cleanup + object_delete( + client, + &format!("/v1/instances/{instance_name}?project={project_name}"), + ) + .await; + + cleanup_multicast_groups(client, &[group_name]).await; +} + +/// Verify the RPW reconciler handles concurrent instance migrations within the same multicast group. +/// +/// Multiple instances in the same multicast group can migrate simultaneously without +/// interfering with each other's membership states. The reconciler correctly processes +/// concurrent sled_id changes for all members, ensuring each reaches Joined state on +/// their respective target sleds. +#[nexus_test(extra_sled_agents = 1)] +async fn test_multicast_group_concurrent_member_migrations( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let lockstep_client = &cptestctx.lockstep_client; + let nexus = &cptestctx.server.server_context().nexus; + let project_name = "concurrent-migration-project"; + let group_name = "concurrent-migration-group"; + + // Setup: project, pools, and multicast group + create_project(client, project_name).await; + create_default_ip_pool(client).await; + let mcast_pool = create_multicast_ip_pool_with_range( + client, + "concurrent-migration-pool", + (224, 62, 0, 1), + (224, 62, 0, 255), + ) + .await; + + // Create multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 62, 0, 100)); + let group_url = "/v1/multicast-groups".to_string(); + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: group_name.parse().unwrap(), + description: "Group for concurrent migration testing".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + object_create::<_, MulticastGroup>(client, &group_url, &group_params).await; + wait_for_group_active(client, group_name).await; + + // Ensure inventory and DPD are ready before creating instances with multicast groups + ensure_multicast_test_ready(cptestctx).await; + + // Create multiple instances all in the same multicast group + let instance_specs = [ + ("concurrent-instance-1", &[group_name][..]), + ("concurrent-instance-2", &[group_name][..]), + ]; + + let instances = create_instances_with_multicast_groups( + client, + project_name, + &instance_specs, + true, // start instances + ) + .await; + + let instance_ids: Vec<_> = instances + .iter() + .map(|i| InstanceUuid::from_untyped_uuid(i.identity.id)) + .collect(); + + // Simulate all instances to Running state in parallel + let simulate_futures = instance_ids.iter().map(|&instance_id| async move { + instance_simulate(nexus, &instance_id).await; + instance_wait_for_state(client, instance_id, InstanceState::Running) + .await; + }); + ops::join_all(simulate_futures).await; + + // Wait for all members to reach "Joined" state + for instance in &instances { + wait_for_member_state( + cptestctx, + group_name, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + } + + // Verify we have 2 members initially + let pre_migration_members = + list_multicast_group_members(client, group_name).await; + assert_eq!(pre_migration_members.len(), 2); + + // Get current sleds for all instances + let mut source_sleds = Vec::new(); + let mut target_sleds = Vec::new(); + + let available_sleds = + [cptestctx.first_sled_id(), cptestctx.second_sled_id()]; + + for &instance_id in &instance_ids { + let current_sled = nexus + .active_instance_info(&instance_id, None) + .await + .unwrap() + .expect("Running instance should be on a sled") + .sled_id; + source_sleds.push(current_sled); + + // Find a different sled for migration target + let target_sled = available_sleds + .iter() + .find(|&&sled| sled != current_sled) + .copied() + .expect("Should have available target sled"); + target_sleds.push(target_sled); + } + + // Initiate both migrations concurrently + let migration_futures = instance_ids.iter().zip(target_sleds.iter()).map( + |(&instance_id, &target_sled)| { + let migrate_url = format!("/instances/{instance_id}/migrate"); + nexus_test_utils::http_testing::NexusRequest::new( + nexus_test_utils::http_testing::RequestBuilder::new( + lockstep_client, + Method::POST, + &migrate_url, + ) + .body(Some(&InstanceMigrateRequest { + dst_sled_id: target_sled, + })) + .expect_status(Some(StatusCode::OK)), + ) + .authn_as(nexus_test_utils::http_testing::AuthnMode::PrivilegedUser) + .execute() + }, + ); + + // Execute both migrations concurrently + let migration_responses = ops::join_all(migration_futures).await; + + // Verify both migrations were initiated successfully + for response in migration_responses { + response.expect("Migration should initiate successfully"); + } + + // Complete both migrations by simulating on both source and target sleds + for (i, &instance_id) in instance_ids.iter().enumerate() { + // Get propolis IDs for this instance + let info = nexus + .active_instance_info(&instance_id, None) + .await + .unwrap() + .expect("Instance should be on a sled"); + let src_propolis_id = info.propolis_id; + let dst_propolis_id = info + .dst_propolis_id + .expect("Instance should have a migration target"); + + // Helper function from instances.rs + async fn vmm_simulate_on_sled( + _cptestctx: &ControlPlaneTestContext, + nexus: &std::sync::Arc, + sled_id: omicron_uuid_kinds::SledUuid, + propolis_id: omicron_uuid_kinds::PropolisUuid, + ) { + let sa = nexus.sled_client(&sled_id).await.unwrap(); + sa.vmm_finish_transition(propolis_id).await; + } + + // Complete migration on source and target + vmm_simulate_on_sled( + cptestctx, + nexus, + source_sleds[i], + src_propolis_id, + ) + .await; + vmm_simulate_on_sled( + cptestctx, + nexus, + target_sleds[i], + dst_propolis_id, + ) + .await; + + instance_wait_for_state(client, instance_id, InstanceState::Running) + .await; + } + + // Verify all instances are on their target sleds + for (i, &instance_id) in instance_ids.iter().enumerate() { + let current_sled = nexus + .active_instance_info(&instance_id, None) + .await + .unwrap() + .expect("Migrated instance should be on target sled") + .sled_id; + + assert_eq!( + current_sled, + target_sleds[i], + "Instance {} should be on target sled after migration", + i + 1 + ); + } + + // Wait for multicast reconciler to process all sled_id changes + wait_for_multicast_reconciler(lockstep_client).await; + + // Verify all members are still in the group and reach "Joined" state + let post_migration_members = + list_multicast_group_members(client, group_name).await; + + assert_eq!( + post_migration_members.len(), + 2, + "Both instances should remain multicast group members after concurrent migration" + ); + + // Verify both members reach "Joined" state on their new sleds + for instance in &instances { + wait_for_member_state( + cptestctx, + group_name, + instance.identity.id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + } + + // Cleanup + let instance_names = ["concurrent-instance-1", "concurrent-instance-2"]; + cleanup_instances(cptestctx, client, project_name, &instance_names).await; + cleanup_multicast_groups(client, &[group_name]).await; +} diff --git a/nexus/tests/integration_tests/multicast/mod.rs b/nexus/tests/integration_tests/multicast/mod.rs new file mode 100644 index 00000000000..fe818dd60e6 --- /dev/null +++ b/nexus/tests/integration_tests/multicast/mod.rs @@ -0,0 +1,1512 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Multicast integration tests and helper methods. + +use std::future::Future; +use std::net::IpAddr; +use std::sync::{Arc, Mutex}; +use std::time::{Duration, Instant}; + +use dropshot::test_util::ClientTestContext; +use http::{Method, StatusCode}; +use slog::{debug, info, warn}; +use uuid::Uuid; + +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; +use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; +use nexus_test_utils::resource_helpers::{ + link_ip_pool, object_create, object_delete, +}; +use nexus_types::deployment::SledFilter; +use nexus_types::external_api::params::{ + InstanceCreate, InstanceNetworkInterfaceAttachment, IpPoolCreate, + MulticastGroupCreate, +}; +use nexus_types::external_api::shared::{IpRange, Ipv4Range}; +use nexus_types::external_api::views::{ + IpPool, IpPoolRange, IpVersion, MulticastGroup, MulticastGroupMember, +}; +use nexus_types::identity::{Asset, Resource}; +use omicron_common::api::external::{ + ByteCount, Hostname, IdentityMetadataCreateParams, Instance, + InstanceAutoRestartPolicy, InstanceCpuCount, InstanceState, NameOrId, +}; +use omicron_nexus::TestInterfaces; +use omicron_test_utils::dev::poll::{self, CondCheckError, wait_for_condition}; +use omicron_uuid_kinds::{GenericUuid, InstanceUuid, MulticastGroupUuid}; + +use crate::integration_tests::instances as instance_helpers; + +// Shared type alias for all multicast integration tests +pub(crate) type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + +mod api; +mod authorization; +mod cache_invalidation; +mod enablement; +mod failures; +mod groups; +mod instances; +mod networking_integration; + +// Timeout constants for test operations +const POLL_INTERVAL: Duration = Duration::from_millis(80); +const MULTICAST_OPERATION_TIMEOUT: Duration = Duration::from_secs(120); + +/// Build URL for listing all multicast groups (fleet-scoped). +pub(crate) fn mcast_groups_url() -> String { + "/v1/multicast-groups".to_string() +} + +/// Build URL for a specific multicast group by name. +pub(crate) fn mcast_group_url(group_name: &str) -> String { + format!("/v1/multicast-groups/{group_name}") +} + +/// Build URL for listing members of a multicast group. +pub(crate) fn mcast_group_members_url(group_name: &str) -> String { + format!("/v1/multicast-groups/{group_name}/members") +} + +/// Build URL for adding a member to a multicast group. +/// +/// The `?project=` parameter is required when using instance names (for scoping) +/// but must NOT be provided when using instance UUIDs (causes 400 Bad Request). +pub(crate) fn mcast_group_member_add_url( + group_name: &str, + instance: &NameOrId, + project_name: &str, +) -> String { + let base_url = mcast_group_members_url(group_name); + match instance { + NameOrId::Name(_) => format!("{base_url}?project={project_name}"), + NameOrId::Id(_) => base_url, + } +} + +/// Test helper for creating multicast groups in batch operations. +#[derive(Clone)] +pub(crate) struct MulticastGroupForTest { + pub name: &'static str, + pub multicast_ip: IpAddr, + pub description: Option, +} + +/// Create a multicast IP pool for ASM (Any-Source Multicast) testing. +pub(crate) async fn create_multicast_ip_pool( + client: &ClientTestContext, + pool_name: &str, +) -> IpPool { + create_multicast_ip_pool_with_range( + client, + pool_name, + (224, 0, 1, 10), // Default ASM range start + (224, 0, 1, 255), // Default ASM range end + ) + .await +} + +/// Create a multicast IP pool with custom ASM range. +pub(crate) async fn create_multicast_ip_pool_with_range( + client: &ClientTestContext, + pool_name: &str, + range_start: (u8, u8, u8, u8), + range_end: (u8, u8, u8, u8), +) -> IpPool { + let pool_params = IpPoolCreate::new_multicast( + IdentityMetadataCreateParams { + name: pool_name.parse().unwrap(), + description: "Multicast IP pool for testing".to_string(), + }, + IpVersion::V4, + ); + + let pool: IpPool = + object_create(client, "/v1/system/ip-pools", &pool_params).await; + + // Add IPv4 ASM range + let asm_range = IpRange::V4( + Ipv4Range::new( + std::net::Ipv4Addr::new( + range_start.0, + range_start.1, + range_start.2, + range_start.3, + ), + std::net::Ipv4Addr::new( + range_end.0, + range_end.1, + range_end.2, + range_end.3, + ), + ) + .unwrap(), + ); + let range_url = format!("/v1/system/ip-pools/{pool_name}/ranges/add"); + object_create::<_, IpPoolRange>(client, &range_url, &asm_range).await; + + // Link the pool to the silo so it can be found by multicast group creation + link_ip_pool(client, pool_name, &DEFAULT_SILO.id(), false).await; + + pool +} + +/// Waits for the multicast group reconciler to complete. +/// +/// This wraps wait_background_task with the correct task name. +pub(crate) async fn wait_for_multicast_reconciler( + lockstep_client: &ClientTestContext, +) -> nexus_lockstep_client::types::BackgroundTask { + nexus_test_utils::background::wait_background_task( + lockstep_client, + "multicast_reconciler", + ) + .await +} + +/// Wait for a condition to be true, activating the reconciler periodically. +/// +/// This is like `wait_for_condition` but activates the multicast reconciler +/// periodically (not on every poll) to drive state changes. We activate the +/// reconciler every 500ms. +/// +/// Useful for tests that need to wait for reconciler-driven state changes +/// (e.g., member state transitions). +pub(crate) async fn wait_for_condition_with_reconciler( + lockstep_client: &ClientTestContext, + condition: F, + poll_interval: &Duration, + timeout: &Duration, +) -> Result> +where + F: Fn() -> Fut, + Fut: Future>>, +{ + // Activate reconciler less frequently than we check the condition + // This reduces overhead while still driving state changes forward + const RECONCILER_ACTIVATION_INTERVAL: Duration = Duration::from_millis(500); + + let last_reconciler_activation = Arc::new(Mutex::new(Instant::now())); + + // Activate once at the start to kick things off + wait_for_multicast_reconciler(lockstep_client).await; + + wait_for_condition( + || async { + // Only activate reconciler if enough time has passed + let now = Instant::now(); + let should_activate = { + let last = last_reconciler_activation.lock().unwrap(); + now.duration_since(*last) >= RECONCILER_ACTIVATION_INTERVAL + }; + + if should_activate { + wait_for_multicast_reconciler(lockstep_client).await; + *last_reconciler_activation.lock().unwrap() = now; + } + + condition().await + }, + poll_interval, + timeout, + ) + .await +} + +/// Ensure inventory collection has completed with SP data for all sleds. +/// +/// This function verifies that inventory has SP data for EVERY in-service sled, +/// not just that inventory completed. +/// +/// This is required for multicast member operations which map `sled_id` → `sp_slot` +/// → switch ports via inventory. +pub(crate) async fn ensure_inventory_ready( + cptestctx: &ControlPlaneTestContext, +) { + let log = &cptestctx.logctx.log; + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + + info!(log, "waiting for inventory with SP data for all sleds"); + + // Wait for inventory to have SP data for ALL in-service sleds + match wait_for_condition( + || async { + let opctx = OpContext::for_tests(log.clone(), datastore.clone()); + + // Get all in-service sleds + let sleds = match datastore + .sled_list_all_batched(&opctx, SledFilter::InService) + .await + { + Ok(sleds) => sleds, + Err(e) => { + warn!(log, "failed to list sleds: {e}"); + return Err(CondCheckError::::NotYet); + } + }; + + if sleds.is_empty() { + warn!(log, "no in-service sleds found yet"); + return Err(CondCheckError::::NotYet); + } + + // Get latest inventory + let inventory = + match datastore.inventory_get_latest_collection(&opctx).await { + Ok(Some(inv)) => inv, + Ok(None) => { + debug!(log, "no inventory collection yet"); + return Err(CondCheckError::::NotYet); + } + Err(e) => { + warn!(log, "failed to get inventory: {e}"); + return Err(CondCheckError::::NotYet); + } + }; + + // Verify inventory has SP data for each sled + let mut missing_sleds = Vec::new(); + for sled in &sleds { + let has_sp = inventory.sps.iter().any(|(bb, _)| { + (bb.serial_number == sled.serial_number() + && bb.part_number == sled.part_number()) + || bb.serial_number == sled.serial_number() + }); + + if !has_sp { + missing_sleds.push(sled.serial_number().to_string()); + } + } + + if missing_sleds.is_empty() { + info!( + log, + "inventory has SP data for all {} sleds", + sleds.len() + ); + Ok(()) + } else { + debug!( + log, + "inventory missing SP data for {} sleds: {:?}", + missing_sleds.len(), + missing_sleds + ); + Err(CondCheckError::::NotYet) + } + }, + &Duration::from_millis(500), // Check every 500ms + &Duration::from_secs(120), // Wait up to 120s + ) + .await + { + Ok(_) => { + info!(log, "inventory ready with SP data for all sleds"); + } + Err(poll::Error::TimedOut(elapsed)) => { + panic!( + "inventory did not get SP data for all sleds within {elapsed:?}" + ); + } + Err(poll::Error::PermanentError(err)) => { + panic!("failed waiting for inventory: {err}"); + } + } +} + +/// Ensure multicast test prerequisites are ready. +/// +/// This combines inventory collection (for sled → switch port mapping) and +/// DPD readiness (for switch operations) into a single call. Use this at the +/// beginning of multicast tests that will add instances to groups. +pub(crate) async fn ensure_multicast_test_ready( + cptestctx: &ControlPlaneTestContext, +) { + ensure_inventory_ready(cptestctx).await; + ensure_dpd_ready(cptestctx).await; +} + +/// Ensure DPD (switch infrastructure) is ready and responsive. +/// +/// This ensures that switch zones are up and DPD APIs are responding before +/// running tests that depend on dataplane operations. Helps prevent flaky tests +/// where the reconciler tries to contact DPD before switch zones are up. +/// +/// Uses a simple ping by listing groups - any successful response means DPD is ready. +pub(crate) async fn ensure_dpd_ready(cptestctx: &ControlPlaneTestContext) { + let dpd_client = nexus_test_utils::dpd_client(cptestctx); + let log = &cptestctx.logctx.log; + + info!(log, "waiting for DPD/switch infrastructure to be ready"); + + match wait_for_condition( + || async { + // Try to list multicast groups - any successful response means DPD is ready + // limit=None, page_token=None - we don't care about the results, just that DPD responds + match dpd_client.multicast_groups_list(None, None).await { + Ok(_) => { + debug!(log, "DPD is responsive"); + Ok(()) + } + Err(e) => { + debug!( + log, + "DPD not ready yet"; + "error" => %e + ); + Err(CondCheckError::::NotYet) + } + } + }, + &Duration::from_millis(200), // Check every 200ms + &Duration::from_secs(30), // Wait up to 30 seconds for switches + ) + .await + { + Ok(_) => { + info!(log, "DPD/switch infrastructure is ready"); + } + Err(poll::Error::TimedOut(elapsed)) => { + panic!( + "DPD/switch infrastructure did not become ready within {elapsed:?}" + ); + } + Err(poll::Error::PermanentError(err)) => { + panic!("Failed waiting for DPD to be ready: {err}"); + } + } +} + +/// Wait for DPD multicast group state to match a condition. +/// +/// Generic helper that polls DPD state and calls the provided predicate +/// to determine if the expected state has been reached. This is useful when +/// the reconciler runs sagas asynchronously and tests need to wait for DPD +/// to reflect the changes. +/// +/// # Usage Examples +/// +/// Check for a specific vlan_id: +/// ```rust,ignore +/// wait_for_dpd_state( +/// cptestctx, +/// &multicast_ip, +/// |response| match response { +/// MulticastGroupResponse::External { external_forwarding, .. } => { +/// if external_forwarding.vlan_id == Some(3500) { +/// Ok(()) +/// } else { +/// Err(CondCheckError::NotYet) +/// } +/// } +/// _ => Err(CondCheckError::Failed("Expected external group".to_string())) +/// }, +/// "vlan_id = Some(3500)", +/// ).await; +/// ``` +/// +/// Check for source IP changes: +/// ```rust,ignore +/// wait_for_dpd_state( +/// cptestctx, +/// &multicast_ip, +/// |response| match response { +/// MulticastGroupResponse::External { sources, .. } => { +/// if sources.contains(&expected_source) { +/// Ok(()) +/// } else { +/// Err(CondCheckError::NotYet) +/// } +/// } +/// _ => Err(CondCheckError::Failed("Expected external group".to_string())) +/// }, +/// "sources contains expected IP", +/// ).await; +/// ``` +pub(crate) async fn wait_for_dpd_state( + cptestctx: &ControlPlaneTestContext, + multicast_ip: &IpAddr, + predicate: F, + description: &str, +) where + F: Fn( + &dpd_client::types::MulticastGroupResponse, + ) -> Result<(), CondCheckError>, +{ + let dpd_client = nexus_test_utils::dpd_client(cptestctx); + + match wait_for_condition( + || async { + match dpd_client.multicast_group_get(multicast_ip).await { + Ok(response) => predicate(&response.into_inner()), + Err(e) => Err(CondCheckError::Failed(format!( + "DPD query failed: {e}" + ))), + } + }, + &POLL_INTERVAL, + &MULTICAST_OPERATION_TIMEOUT, + ) + .await + { + Ok(_) => {} + Err(poll::Error::TimedOut(elapsed)) => panic!( + "DPD state for {multicast_ip} did not reach expected condition '{description}' within {elapsed:?}" + ), + Err(poll::Error::PermanentError(err)) => { + panic!("Failed waiting for DPD state '{description}': {err}") + } + } +} + +/// Wait for a multicast group DPD update to complete. +/// +/// This is a composite helper that combines activating the reconciler +/// and waiting for DPD state to match a condition. Use this instead of +/// calling `wait_for_multicast_reconciler()` + `wait_for_dpd_state()` +/// separately. +/// +/// # Usage Examples +/// +/// After a metadata-only update (name/description): +/// ```rust,ignore +/// wait_for_group_dpd_update( +/// cptestctx, +/// &multicast_ip, +/// dpd_predicates::expect_external_group(), +/// "name update saga completed", +/// ).await; +/// ``` +/// +/// After an mvlan update: +/// ```rust,ignore +/// wait_for_group_dpd_update( +/// cptestctx, +/// &multicast_ip, +/// dpd_predicates::expect_vlan_id(3500), +/// "vlan_id updated to 3500", +/// ).await; +/// ``` +pub(crate) async fn wait_for_group_dpd_update( + cptestctx: &ControlPlaneTestContext, + multicast_ip: &IpAddr, + predicate: F, + description: &str, +) where + F: Fn( + &dpd_client::types::MulticastGroupResponse, + ) -> Result<(), CondCheckError>, +{ + // Activate reconciler to ensure saga is launched + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Wait for DPD to reflect the changes (saga completion) + wait_for_dpd_state(cptestctx, multicast_ip, predicate, description).await; +} + +/// Get a single multicast group by name. +pub(crate) async fn get_multicast_group( + client: &ClientTestContext, + group_name: &str, +) -> MulticastGroup { + let url = mcast_group_url(group_name); + NexusRequest::object_get(client, &url) + .authn_as(AuthnMode::PrivilegedUser) + .execute_and_parse_unwrap::() + .await +} + +/// List all multicast groups. +pub(crate) async fn list_multicast_groups( + client: &ClientTestContext, +) -> Vec { + let url = mcast_groups_url(); + nexus_test_utils::resource_helpers::objects_list_page_authz::< + MulticastGroup, + >(client, &url) + .await + .items +} + +/// List members of a multicast group. +pub(crate) async fn list_multicast_group_members( + client: &ClientTestContext, + group_name: &str, +) -> Vec { + let url = mcast_group_members_url(group_name); + nexus_test_utils::resource_helpers::objects_list_page_authz::< + MulticastGroupMember, + >(client, &url) + .await + .items +} + +/// Wait for a multicast group to transition to the specified state. +pub(crate) async fn wait_for_group_state( + client: &ClientTestContext, + group_name: &str, + expected_state: nexus_db_model::MulticastGroupState, +) -> MulticastGroup { + let expected_state_as_str = expected_state.to_string(); + match wait_for_condition( + || async { + let group = get_multicast_group(client, group_name).await; + if group.state == expected_state_as_str { + Ok(group) + } else { + Err(CondCheckError::<()>::NotYet) + } + }, + &POLL_INTERVAL, + &MULTICAST_OPERATION_TIMEOUT, + ) + .await + { + Ok(group) => group, + Err(poll::Error::TimedOut(elapsed)) => { + panic!( + "group {group_name} did not reach state '{expected_state_as_str}' within {elapsed:?}", + ); + } + Err(poll::Error::PermanentError(err)) => { + panic!( + "failed waiting for group {group_name} to reach state '{expected_state_as_str}': {err:?}", + ); + } + } +} + +/// Convenience function to wait for a group to become "Active". +pub(crate) async fn wait_for_group_active( + client: &ClientTestContext, + group_name: &str, +) -> MulticastGroup { + wait_for_group_state( + client, + group_name, + nexus_db_model::MulticastGroupState::Active, + ) + .await +} + +/// Wait for a specific member to reach the expected state +/// (e.g., Joined, Joining, Left). +/// +/// For "Joined" state, this function uses `wait_for_condition_with_reconciler` +/// to ensure the reconciler processes member state transitions. +pub(crate) async fn wait_for_member_state( + cptestctx: &ControlPlaneTestContext, + group_name: &str, + instance_id: Uuid, + expected_state: nexus_db_model::MulticastGroupMemberState, +) -> MulticastGroupMember { + let client = &cptestctx.external_client; + let lockstep_client = &cptestctx.lockstep_client; + let expected_state_as_str = expected_state.to_string(); + + // For "Joined" state, ensure instance has a sled_id assigned + // (no need to check inventory again since ensure_inventory_ready() already + // verified all sleds have SP data at test setup) + if expected_state == nexus_db_model::MulticastGroupMemberState::Joined { + let instance_uuid = InstanceUuid::from_untyped_uuid(instance_id); + wait_for_instance_sled_assignment(cptestctx, &instance_uuid).await; + } + + let check_member = || async { + let members = list_multicast_group_members(client, group_name).await; + + // If we're looking for "Joined" state, we need to ensure the member exists first + // and then wait for the reconciler to process it + if expected_state == nexus_db_model::MulticastGroupMemberState::Joined { + if let Some(member) = + members.iter().find(|m| m.instance_id == instance_id) + { + match member.state.as_str() { + "Joined" => Ok(member.clone()), + "Joining" => { + // Member exists and is in transition - wait a bit more + Err(CondCheckError::NotYet) + } + "Left" => { + // Member in Left state, reconciler needs to process instance start - wait more + Err(CondCheckError::NotYet) + } + other_state => Err(CondCheckError::Failed(format!( + "Member {instance_id} in group {group_name} has unexpected state '{other_state}', expected 'Left', 'Joining' or 'Joined'" + ))), + } + } else { + // Member doesn't exist yet - wait for it to be created + Err(CondCheckError::NotYet) + } + } else { + // For other states, just look for exact match + if let Some(member) = + members.iter().find(|m| m.instance_id == instance_id) + { + if member.state == expected_state_as_str { + Ok(member.clone()) + } else { + Err(CondCheckError::NotYet) + } + } else { + Err(CondCheckError::NotYet) + } + } + }; + + // Use reconciler-activating wait for "Joined" state + let result = if expected_state + == nexus_db_model::MulticastGroupMemberState::Joined + { + wait_for_condition_with_reconciler( + lockstep_client, + check_member, + &POLL_INTERVAL, + &MULTICAST_OPERATION_TIMEOUT, + ) + .await + } else { + wait_for_condition( + check_member, + &POLL_INTERVAL, + &MULTICAST_OPERATION_TIMEOUT, + ) + .await + }; + + match result { + Ok(member) => member, + Err(poll::Error::TimedOut(elapsed)) => { + panic!( + "member {instance_id} in group {group_name} did not reach state '{expected_state_as_str}' within {elapsed:?}", + ); + } + Err(poll::Error::PermanentError(err)) => { + panic!( + "failed waiting for member {instance_id} in group {group_name} to reach state '{expected_state_as_str}': {err:?}", + ); + } + } +} + +/// Wait for an instance to have a sled_id assigned. +/// +/// This is a stricter check than `instance_wait_for_vmm_registration` - it ensures +/// that not only does the VMM exist and is not in "Creating" state, but also that +/// the VMM has been assigned to a specific sled. This is critical for multicast +/// member join operations which need the sled_id to program switch ports. +pub(crate) async fn wait_for_instance_sled_assignment( + cptestctx: &ControlPlaneTestContext, + instance_id: &InstanceUuid, +) { + let datastore = cptestctx.server.server_context().nexus.datastore(); + let log = &cptestctx.logctx.log; + let opctx = OpContext::for_tests(log.clone(), datastore.clone()); + + info!( + log, + "waiting for instance to have sled_id assigned"; + "instance_id" => %instance_id, + ); + + match wait_for_condition( + || async { + // Use the same batch fetch method the reconciler uses + let instance_vmm_data = datastore + .instance_and_vmm_batch_fetch(&opctx, &[*instance_id]) + .await + .map_err(|e| { + CondCheckError::Failed(format!( + "Failed to fetch instance data: {e}" + )) + })?; + + let instance_uuid = instance_id.into_untyped_uuid(); + if let Some((instance, vmm_opt)) = + instance_vmm_data.get(&instance_uuid) + { + if let Some(vmm) = vmm_opt { + debug!( + log, + "instance VMM found, checking sled assignment"; + "instance_id" => %instance_id, + "vmm_id" => %vmm.id, + "vmm_state" => ?vmm.runtime.state, + "sled_id" => %vmm.sled_id + ); + + // VMM exists and has a sled_id - we're good + Ok(()) + } else { + debug!( + log, + "instance exists but has no VMM yet"; + "instance_id" => %instance_id, + "instance_state" => ?instance.runtime_state.nexus_state.state() + ); + Err(CondCheckError::::NotYet) + } + } else { + warn!( + log, + "instance not found in batch fetch"; + "instance_id" => %instance_id + ); + Err(CondCheckError::::NotYet) + } + }, + &POLL_INTERVAL, + &MULTICAST_OPERATION_TIMEOUT, + ) + .await + { + Ok(_) => { + info!( + log, + "instance has sled_id assigned"; + "instance_id" => %instance_id + ); + } + Err(poll::Error::TimedOut(elapsed)) => { + panic!( + "instance {instance_id} did not get sled_id assigned within {elapsed:?}" + ); + } + Err(poll::Error::PermanentError(err)) => { + panic!( + "failed waiting for instance {instance_id} sled assignment: {err}" + ); + } + } +} + +/// Verify that inventory-based sled-to-switch-port mapping is correct. +/// +/// This validates the entire flow: +/// instance → sled → inventory → sp_slot → rear{N} → DPD underlay member +pub(crate) async fn verify_inventory_based_port_mapping( + cptestctx: &ControlPlaneTestContext, + instance_uuid: &InstanceUuid, +) -> Result<(), String> { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.clone(), datastore.clone()); + + // Get sled_id for the running instance + let sled_id = nexus + .active_instance_info(instance_uuid, None) + .await + .map_err(|e| format!("active_instance_info failed: {e}"))? + .ok_or_else(|| "instance not on a sled".to_string())? + .sled_id; + + // Get the multicast member for this instance to find its external_group_id + let members = datastore + .multicast_group_members_list_by_instance(&opctx, *instance_uuid, false) + .await + .map_err(|e| format!("list members failed: {e}"))?; + + let member = members + .first() + .ok_or_else(|| "no multicast membership found".to_string())?; + + let external_group_id = member.external_group_id; + + // Fetch the external multicast group to get underlay_group_id + let external_group = datastore + .multicast_group_fetch( + &opctx, + MulticastGroupUuid::from_untyped_uuid(external_group_id), + ) + .await + .map_err(|e| format!("fetch external group failed: {e}"))?; + + let underlay_group_id = external_group + .underlay_group_id + .ok_or_else(|| "external group has no underlay_group_id".to_string())?; + + // Fetch the underlay group to get its multicast IP + let underlay_group = datastore + .underlay_multicast_group_fetch(&opctx, underlay_group_id) + .await + .map_err(|e| format!("fetch underlay group failed: {e}"))?; + + let underlay_multicast_ip = underlay_group.multicast_ip.ip(); + + // Fetch latest inventory collection + let inventory = datastore + .inventory_get_latest_collection(&opctx) + .await + .map_err(|e| format!("fetch inventory failed: {e}"))? + .ok_or_else(|| "no inventory collection".to_string())?; + + // Get the sled record to find its baseboard info + let sleds = datastore + .sled_list_all_batched(&opctx, SledFilter::InService) + .await + .map_err(|e| format!("list sleds failed: {e}"))?; + let sled = sleds + .into_iter() + .find(|s| s.id() == sled_id) + .ok_or_else(|| "sled not found".to_string())?; + + // Find SP for this sled using baseboard matching (serial + part number) + let sp = inventory + .sps + .iter() + .find(|(bb, _)| { + bb.serial_number == sled.serial_number() + && bb.part_number == sled.part_number() + }) + .or_else(|| { + // Fallback to serial-only match if exact match not found + inventory + .sps + .iter() + .find(|(bb, _)| bb.serial_number == sled.serial_number()) + }) + .map(|(_, sp)| sp) + .ok_or_else(|| "SP not found for sled".to_string())?; + + let expected_rear_port = sp.sp_slot; + + // Fetch DPD underlay group configuration using the underlay multicast IP + let dpd_client = nexus_test_utils::dpd_client(cptestctx); + let underlay_group_response = dpd_client + .multicast_group_get(&underlay_multicast_ip) + .await + .map_err(|e| format!("DPD query failed: {e}"))? + .into_inner(); + + // Extract underlay members from the response + let members = match underlay_group_response { + dpd_client::types::MulticastGroupResponse::Underlay { + members, .. + } => members, + dpd_client::types::MulticastGroupResponse::External { .. } => { + return Err("Expected Underlay group, got External".to_string()); + } + }; + + // Construct the expected `PortId` for comparison + let expected_port_id = dpd_client::types::PortId::Rear( + dpd_client::types::Rear::try_from(format!("rear{expected_rear_port}")) + .map_err(|e| format!("invalid rear port: {e}"))?, + ); + + // Check if DPD has an underlay member with the expected rear port + let has_expected_member = members.iter().any(|m| { + matches!(m.direction, dpd_client::types::Direction::Underlay) + && m.port_id == expected_port_id + }); + + if has_expected_member { + Ok(()) + } else { + Err(format!("DPD does not have member on rear{expected_rear_port}")) + } +} + +/// Wait for a multicast group to have a specific number of members. +pub(crate) async fn wait_for_member_count( + client: &ClientTestContext, + group_name: &str, + expected_count: usize, +) { + match wait_for_condition( + || async { + let members = + list_multicast_group_members(client, group_name).await; + if members.len() == expected_count { + Ok(()) + } else { + Err(CondCheckError::::NotYet) + } + }, + &POLL_INTERVAL, + &MULTICAST_OPERATION_TIMEOUT, + ) + .await + { + Ok(_) => {} + Err(poll::Error::TimedOut(elapsed)) => { + panic!( + "group {group_name} did not reach member count {expected_count} within {elapsed:?}", + ); + } + Err(poll::Error::PermanentError(err)) => { + panic!( + "failed waiting for group {group_name} to reach member count {expected_count}: {err:?}", + ); + } + } +} + +/// Wait for a multicast group to be deleted (returns 404). +pub(crate) async fn wait_for_group_deleted( + client: &ClientTestContext, + group_name: &str, +) { + match wait_for_condition( + || async { + let group_url = mcast_group_url(group_name); + match NexusRequest::object_get(client, &group_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + { + Ok(response) => { + if response.status == StatusCode::NOT_FOUND { + Ok(()) + } else { + Err(CondCheckError::<()>::NotYet) + } + } + Err(_) => Ok(()), // Assume 404 or similar error means deleted + } + }, + &POLL_INTERVAL, + &MULTICAST_OPERATION_TIMEOUT, + ) + .await + { + Ok(_) => {} + Err(poll::Error::TimedOut(elapsed)) => { + panic!("group {group_name} was not deleted within {elapsed:?}",); + } + Err(poll::Error::PermanentError(err)) => { + panic!( + "failed waiting for group {group_name} to be deleted: {err:?}", + ); + } + } +} + +/// Create an instance with multicast groups. +pub(crate) async fn instance_for_multicast_groups( + cptestctx: &ControlPlaneTestContext, + project_name: &str, + instance_name: &str, + start: bool, + multicast_group_names: &[&str], +) -> Instance { + // Ensure inventory and DPD are ready before creating instances with multicast groups + // Inventory is needed for sled → switch port mapping, DPD for switch operations + if !multicast_group_names.is_empty() { + ensure_inventory_ready(cptestctx).await; + ensure_dpd_ready(cptestctx).await; + } + + let client = &cptestctx.external_client; + let multicast_groups: Vec = multicast_group_names + .iter() + .map(|name| NameOrId::Name(name.parse().unwrap())) + .collect(); + + let url = format!("/v1/instances?project={project_name}"); + + object_create( + client, + &url, + &InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: format!( + "Instance for multicast group testing: {instance_name}" + ), + }, + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(1), + hostname: instance_name.parse::().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + multicast_groups, + disks: vec![], + boot_disk: None, + cpu_platform: None, + start, + auto_restart_policy: Default::default(), + anti_affinity_groups: Vec::new(), + }, + ) + .await +} + +/// Create multiple instances with multicast groups attached at creation time. +pub(crate) async fn create_instances_with_multicast_groups( + client: &ClientTestContext, + project_name: &str, + instance_specs: &[(&str, &[&str])], // (instance_name, group_names) + start: bool, +) -> Vec { + let create_futures = + instance_specs.iter().map(|(instance_name, group_names)| { + let url = format!("/v1/instances?project={project_name}"); + let multicast_groups: Vec = group_names + .iter() + .map(|name| NameOrId::Name(name.parse().unwrap())) + .collect(); + + async move { + object_create::<_, Instance>( + client, + &url, + &InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: format!( + "multicast test instance {instance_name}" + ), + }, + ncpus: InstanceCpuCount::try_from(2).unwrap(), + memory: ByteCount::from_gibibytes_u32(4), + hostname: instance_name.parse().unwrap(), + user_data: b"#cloud-config".to_vec(), + ssh_public_keys: None, + network_interfaces: + InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + disks: vec![], + boot_disk: None, + cpu_platform: None, + start, + auto_restart_policy: Some( + InstanceAutoRestartPolicy::Never, + ), + anti_affinity_groups: Vec::new(), + multicast_groups, + }, + ) + .await + } + }); + + ops::join_all(create_futures).await +} + +/// Attach an instance to a multicast group. +pub(crate) async fn multicast_group_attach( + cptestctx: &ControlPlaneTestContext, + project_name: &str, + instance_name: &str, + group_name: &str, +) { + let client = &cptestctx.external_client; + let url = format!( + "/v1/instances/{instance_name}/multicast-groups/{group_name}?project={project_name}" + ); + + // Use PUT to attach instance to multicast group + NexusRequest::new( + RequestBuilder::new(client, Method::PUT, &url) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Should attach instance to multicast group"); +} + +/// Create multiple multicast groups from the same pool. +pub(crate) async fn create_multicast_groups( + client: &ClientTestContext, + pool: &IpPool, + group_specs: &[MulticastGroupForTest], +) -> Vec { + let create_futures = group_specs.iter().map(|spec| { + let group_url = mcast_groups_url(); + let params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: spec.name.parse().unwrap(), + description: spec + .description + .clone() + .unwrap_or_else(|| format!("Test group {}", spec.name)), + }, + multicast_ip: Some(spec.multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(pool.identity.name.clone())), + mvlan: None, + }; + + async move { + object_create::<_, MulticastGroup>(client, &group_url, ¶ms) + .await + } + }); + + ops::join_all(create_futures).await +} + +/// Wait for multiple groups to become "Active". +pub(crate) async fn wait_for_groups_active( + client: &ClientTestContext, + group_names: &[&str], +) -> Vec { + let wait_futures = + group_names.iter().map(|name| wait_for_group_active(client, name)); + + ops::join_all(wait_futures).await +} + +/// Clean up multiple groups. +pub(crate) async fn cleanup_multicast_groups( + client: &ClientTestContext, + group_names: &[&str], +) { + let delete_futures = group_names.iter().map(|name| { + let url = mcast_group_url(name); + async move { object_delete(client, &url).await } + }); + + ops::join_all(delete_futures).await; +} + +/// Clean up multiple instances, handling various states properly. +/// +/// This function handles the complete instance lifecycle for cleanup: +/// 1. Starting instances: simulate -> wait for Running -> stop -> delete +/// 2. Running instances: stop -> delete +/// 3. Stopped instances: delete +/// 4. Other states: attempt delete as-is +/// +/// Required for concurrent tests where instances may be in Starting state +/// and need simulation to complete state transitions. +pub(crate) async fn cleanup_instances( + cptestctx: &ControlPlaneTestContext, + client: &ClientTestContext, + project_name: &str, + instance_names: &[&str], +) { + let mut instances_to_stop = Vec::new(); + let mut instances_to_wait_then_stop = Vec::new(); + + // Categorize instances by their current state + for name in instance_names { + let url = format!("/v1/instances/{name}?project={project_name}"); + let instance: Instance = NexusRequest::object_get(client, &url) + .authn_as(AuthnMode::PrivilegedUser) + .execute_and_parse_unwrap() + .await; + + match instance.runtime.run_state { + InstanceState::Running => instances_to_stop.push(*name), + InstanceState::Starting => { + instances_to_wait_then_stop.push(*name); + eprintln!( + "Instance {name} in Starting state - will wait for Running then stop", + ); + } + InstanceState::Stopped => { + eprintln!("Instance {name} already stopped") + } + _ => eprintln!( + "Instance {name} in state {:?} - will attempt to delete as-is", + instance.runtime.run_state + ), + } + } + + // Handle Starting instances: simulate -> wait -> add to stop list + if !instances_to_wait_then_stop.is_empty() { + eprintln!( + "Waiting for {} instances to finish starting...", + instances_to_wait_then_stop.len() + ); + + for name in &instances_to_wait_then_stop { + let url = format!("/v1/instances/{name}?project={project_name}"); + let instance: Instance = NexusRequest::object_get(client, &url) + .authn_as(AuthnMode::PrivilegedUser) + .execute_and_parse_unwrap() + .await; + let instance_id = + InstanceUuid::from_untyped_uuid(instance.identity.id); + + // Simulate and wait for Running state + instance_helpers::instance_simulate( + &cptestctx.server.server_context().nexus, + &instance_id, + ) + .await; + instance_helpers::instance_wait_for_state_as( + client, + AuthnMode::PrivilegedUser, + instance_id, + InstanceState::Running, + ) + .await; + + eprintln!("Instance {name} reached Running state"); + } + + instances_to_stop.extend(&instances_to_wait_then_stop); + } + + // Stop all running instances + if !instances_to_stop.is_empty() { + stop_instances(cptestctx, client, project_name, &instances_to_stop) + .await; + } + + // Delete all instances in parallel (now that we fixed the double-delete bug) + let delete_futures = instance_names.iter().map(|name| { + let url = format!("/v1/instances/{name}?project={project_name}"); + async move { object_delete(client, &url).await } + }); + ops::join_all(delete_futures).await; +} + +/// Stop multiple instances using the exact same pattern as groups.rs. +pub(crate) async fn stop_instances( + cptestctx: &ControlPlaneTestContext, + client: &ClientTestContext, + project_name: &str, + instance_names: &[&str], +) { + let nexus = &cptestctx.server.server_context().nexus; + + // First, fetch all instances in parallel + let fetch_futures = instance_names.iter().map(|name| { + let url = format!("/v1/instances/{name}?project={project_name}"); + async move { + let instance_result = NexusRequest::object_get(client, &url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await; + + match instance_result { + Ok(response) => match response.parsed_body::() { + Ok(instance) => { + let id = InstanceUuid::from_untyped_uuid( + instance.identity.id, + ); + Some((*name, instance, id)) + } + Err(e) => { + eprintln!( + "Warning: Failed to parse instance {name}: {e:?}" + ); + None + } + }, + Err(e) => { + eprintln!( + "Warning: Instance {name} not found or error: {e:?}" + ); + None + } + } + } + }); + + let instances: Vec<_> = + ops::join_all(fetch_futures).await.into_iter().flatten().collect(); + + // Stop all running instances in parallel + let stop_futures = + instances.iter().filter_map(|(name, instance, instance_id)| { + if instance.runtime.run_state == InstanceState::Running { + Some(async move { + let stop_url = format!( + "/v1/instances/{name}/stop?project={project_name}" + ); + let stop_result = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &stop_url) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await; + + match stop_result { + Ok(_) => { + instance_helpers::instance_simulate( + nexus, + instance_id, + ) + .await; + instance_helpers::instance_wait_for_state( + client, + *instance_id, + InstanceState::Stopped, + ) + .await; + } + Err(e) => { + eprintln!( + "Warning: Failed to stop instance {name}: {e:?}" + ); + } + } + }) + } else { + eprintln!( + "Skipping instance {name} - current state: {:?}", + instance.runtime.run_state + ); + None + } + }); + + ops::join_all(stop_futures).await; +} + +/// Attach multiple instances to a multicast group in parallel. +/// +/// Ensures inventory and DPD are ready once before attaching all instances, avoiding redundant checks. +pub(crate) async fn multicast_group_attach_bulk( + cptestctx: &ControlPlaneTestContext, + project_name: &str, + instance_names: &[&str], + group_name: &str, +) { + // Check inventory and DPD readiness once for all attachments + ensure_inventory_ready(cptestctx).await; + ensure_dpd_ready(cptestctx).await; + + let attach_futures = instance_names.iter().map(|instance_name| { + multicast_group_attach( + cptestctx, + project_name, + instance_name, + group_name, + ) + }); + ops::join_all(attach_futures).await; +} + +/// Detach multiple instances from a multicast group in parallel. +pub(crate) async fn multicast_group_detach_bulk( + client: &ClientTestContext, + project_name: &str, + instance_names: &[&str], + group_name: &str, +) { + let detach_futures = instance_names.iter().map(|instance_name| { + multicast_group_detach(client, project_name, instance_name, group_name) + }); + ops::join_all(detach_futures).await; +} + +/// Detach an instance from a multicast group. +pub(crate) async fn multicast_group_detach( + client: &ClientTestContext, + project_name: &str, + instance_name: &str, + group_name: &str, +) { + let url = format!( + "/v1/instances/{instance_name}/multicast-groups/{group_name}?project={project_name}" + ); + + // Use DELETE to detach instance from multicast group + NexusRequest::new( + RequestBuilder::new(client, Method::DELETE, &url) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Should detach instance from multicast group"); +} + +/// Utility functions for running multiple async operations in parallel. +pub(crate) mod ops { + use std::future::Future; + + /// Execute a collection of independent async operations in parallel + pub(crate) async fn join_all( + ops: impl IntoIterator>, + ) -> Vec { + futures::future::join_all(ops).await + } + + /// Execute 2 independent async operations in parallel + pub(crate) async fn join2( + op1: impl Future, + op2: impl Future, + ) -> (T1, T2) { + tokio::join!(op1, op2) + } + + /// Execute 3 independent async operations in parallel + pub(crate) async fn join3( + op1: impl Future, + op2: impl Future, + op3: impl Future, + ) -> (T1, T2, T3) { + tokio::join!(op1, op2, op3) + } + + /// Execute 4 independent async operations in parallel + pub(crate) async fn join4( + op1: impl Future, + op2: impl Future, + op3: impl Future, + op4: impl Future, + ) -> (T1, T2, T3, T4) { + tokio::join!(op1, op2, op3, op4) + } +} + +/// Common DPD state predicates for use with `wait_for_dpd_state()`. +/// +/// These predicates provide pre-built conditions for common DPD state checks. +pub(crate) mod dpd_predicates { + use super::*; + + /// Predicate that checks if a group exists in DPD as an external group. + /// + /// Used for metadata-only updates (name, description) where DPD state + /// doesn't change but we need to verify the saga completed without errors. + pub fn expect_external_group() -> impl Fn( + &dpd_client::types::MulticastGroupResponse, + ) + -> Result<(), CondCheckError> { + |response| match response { + dpd_client::types::MulticastGroupResponse::External { .. } => { + Ok(()) + } + dpd_client::types::MulticastGroupResponse::Underlay { .. } => Err( + CondCheckError::Failed("Expected external group".to_string()), + ), + } + } + + /// Predicate that checks if a group has a specific vlan_id in DPD. + /// + /// Used for mvlan updates where we need to verify the vlan_id was + /// applied to the dataplane. + pub fn expect_vlan_id( + vlan: u16, + ) -> impl Fn( + &dpd_client::types::MulticastGroupResponse, + ) -> Result<(), CondCheckError> { + move |response| match response { + dpd_client::types::MulticastGroupResponse::External { + external_forwarding, + .. + } => { + if external_forwarding.vlan_id == Some(vlan) { + Ok(()) + } else { + Err(CondCheckError::NotYet) + } + } + dpd_client::types::MulticastGroupResponse::Underlay { .. } => Err( + CondCheckError::Failed("Expected external group".to_string()), + ), + } + } +} diff --git a/nexus/tests/integration_tests/multicast/networking_integration.rs b/nexus/tests/integration_tests/multicast/networking_integration.rs new file mode 100644 index 00000000000..1ed2b1138d7 --- /dev/null +++ b/nexus/tests/integration_tests/multicast/networking_integration.rs @@ -0,0 +1,773 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Integration tests for multicast groups with other networking features +//! +//! This module contains tests that verify multicast functionality works correctly +//! when combined with other networking features like external IPs, floating IPs, +//! and complex network configurations. + +use std::net::{IpAddr, Ipv4Addr}; + +use http::{Method, StatusCode}; +use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; +use nexus_test_utils::resource_helpers::create_floating_ip; +use nexus_test_utils::resource_helpers::{ + create_default_ip_pool, create_project, object_create, object_delete, +}; +use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::params::{ + EphemeralIpCreate, ExternalIpCreate, FloatingIpAttach, InstanceCreate, + InstanceNetworkInterfaceAttachment, MulticastGroupCreate, + MulticastGroupMemberAdd, +}; +use nexus_types::external_api::views::{ + FloatingIp, MulticastGroup, MulticastGroupMember, +}; + +use omicron_common::api::external::{ + ByteCount, IdentityMetadataCreateParams, Instance, InstanceCpuCount, + InstanceState, NameOrId, +}; +use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; + +use super::*; +use crate::integration_tests::instances::{ + fetch_instance_external_ips, instance_simulate, instance_wait_for_state, +}; + +/// Verify instances can have both external IPs and multicast group membership. +/// +/// External IP allocation works for multicast group members, multicast state persists +/// through external IP operations, and no conflicts occur between external IP and multicast +/// DPD configuration. +#[nexus_test] +async fn test_multicast_with_external_ip_basic( + cptestctx: &nexus_test_utils::ControlPlaneTestContext< + omicron_nexus::Server, + >, +) { + let client = &cptestctx.external_client; + let project_name = "external-ip-mcast-project"; + let group_name = "external-ip-mcast-group"; + let instance_name = "external-ip-mcast-instance"; + + // Setup: project and IP pools in parallel + let (_, _, mcast_pool) = ops::join3( + create_project(client, project_name), + create_default_ip_pool(client), // For external IPs + create_multicast_ip_pool_with_range( + client, + "external-ip-mcast-pool", + (224, 100, 0, 1), + (224, 100, 0, 255), + ), + ) + .await; + + // Create multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 100, 0, 50)); + let group_url = "/v1/multicast-groups".to_string(); + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: group_name.parse().unwrap(), + description: "Group for external IP integration test".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + object_create::<_, MulticastGroup>(client, &group_url, &group_params).await; + wait_for_group_active(client, group_name).await; + + // Create instance (will start by default) + let instance_params = InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: "Instance with external IP and multicast".to_string(), + }, + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(1), + hostname: instance_name.parse().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], // Start without external IP + multicast_groups: vec![], + disks: vec![], + boot_disk: None, + cpu_platform: None, + start: true, // Start the instance + auto_restart_policy: Default::default(), + anti_affinity_groups: Vec::new(), + }; + + let instance_url = format!("/v1/instances?project={project_name}"); + let instance: Instance = + object_create(client, &instance_url, &instance_params).await; + let instance_id = instance.identity.id; + + // Transition instance to Running state + let nexus = &cptestctx.server.server_context().nexus; + let instance_uuid = InstanceUuid::from_untyped_uuid(instance_id); + instance_simulate(nexus, &instance_uuid).await; + instance_wait_for_state(client, instance_uuid, InstanceState::Running) + .await; + + // Ensure multicast test prerequisites (inventory + DPD) are ready + ensure_multicast_test_ready(cptestctx).await; + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Add instance to multicast group + let member_add_url = format!( + "{}?project={project_name}", + mcast_group_members_url(group_name) + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + + object_create::<_, MulticastGroupMember>( + client, + &member_add_url, + &member_params, + ) + .await; + + // Wait for multicast member to reach "Joined" state + wait_for_member_state( + cptestctx, + group_name, + instance_id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + // Verify member count + let members = list_multicast_group_members(client, group_name).await; + assert_eq!(members.len(), 1, "Should have one multicast member"); + + // Allocate ephemeral external IP to the same instance + let ephemeral_ip_url = format!( + "/v1/instances/{instance_name}/external-ips/ephemeral?project={project_name}" + ); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &ephemeral_ip_url) + .body(Some(&EphemeralIpCreate { + pool: None, // Use default pool + })) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Verify both multicast and external IP work together + + // Check that multicast membership is preserved + let members_after_ip = + list_multicast_group_members(client, group_name).await; + assert_eq!( + members_after_ip.len(), + 1, + "Multicast member should still exist after external IP allocation" + ); + assert_eq!(members_after_ip[0].instance_id, instance_id); + assert_eq!( + members_after_ip[0].state, "Joined", + "Member state should remain Joined" + ); + + // Check that external IP is properly attached + let external_ips_after_attach = + fetch_instance_external_ips(client, instance_name, project_name).await; + assert!( + !external_ips_after_attach.is_empty(), + "Instance should have external IP" + ); + // Note: external_ip.ip() from the response may differ from what's actually attached, + // so we just verify that an external IP exists + + // Remove ephemeral external IP and verify multicast is unaffected + let external_ip_detach_url = format!( + "/v1/instances/{instance_name}/external-ips/ephemeral?project={project_name}" + ); + object_delete(client, &external_ip_detach_url).await; + + // Wait for operations to settle + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Verify multicast membership is still intact after external IP removal + let members_after_detach = + list_multicast_group_members(client, group_name).await; + assert_eq!( + members_after_detach.len(), + 1, + "Multicast member should persist after external IP removal" + ); + assert_eq!(members_after_detach[0].instance_id, instance_id); + assert_eq!( + members_after_detach[0].state, "Joined", + "Member should remain Joined" + ); + + // Verify ephemeral external IP is removed (SNAT IP may still be present) + let external_ips_after_detach = + fetch_instance_external_ips(client, instance_name, project_name).await; + // Instance should have at most 1 IP left (the SNAT IP), not the ephemeral IP we attached + assert!( + external_ips_after_detach.len() <= 1, + "Instance should have at most SNAT IP remaining" + ); + + // Cleanup + cleanup_instances(cptestctx, client, project_name, &[instance_name]).await; + cleanup_multicast_groups(client, &[group_name]).await; +} + +/// Verify external IP allocation/deallocation lifecycle for multicast group members. +/// +/// Multiple external IP attach/detach cycles don't affect multicast state, concurrent +/// operations don't cause race conditions, and dataplane configuration remains consistent +/// throughout the lifecycle. +#[nexus_test] +async fn test_multicast_external_ip_lifecycle( + cptestctx: &nexus_test_utils::ControlPlaneTestContext< + omicron_nexus::Server, + >, +) { + let client = &cptestctx.external_client; + let project_name = "external-ip-lifecycle-project"; + let group_name = "external-ip-lifecycle-group"; + let instance_name = "external-ip-lifecycle-instance"; + + // Setup in parallel + let (_, _, mcast_pool) = ops::join3( + create_project(client, project_name), + create_default_ip_pool(client), + create_multicast_ip_pool_with_range( + client, + "external-ip-lifecycle-pool", + (224, 101, 0, 1), + (224, 101, 0, 255), + ), + ) + .await; + + // Create multicast group and instance (similar to previous test) + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 101, 0, 75)); + let group_url = "/v1/multicast-groups".to_string(); + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: group_name.parse().unwrap(), + description: "Group for external IP lifecycle test".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + object_create::<_, MulticastGroup>(client, &group_url, &group_params).await; + wait_for_group_active(client, group_name).await; + + let instance_params = InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: "Instance for external IP lifecycle test".to_string(), + }, + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(1), + hostname: instance_name.parse().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + multicast_groups: vec![], + disks: vec![], + boot_disk: None, + cpu_platform: None, + start: true, + auto_restart_policy: Default::default(), + anti_affinity_groups: Vec::new(), + }; + + let instance_url = format!("/v1/instances?project={project_name}"); + let instance: Instance = + object_create(client, &instance_url, &instance_params).await; + let instance_id = instance.identity.id; + + // Start instance and add to multicast group + let nexus = &cptestctx.server.server_context().nexus; + let instance_uuid = InstanceUuid::from_untyped_uuid(instance_id); + instance_simulate(nexus, &instance_uuid).await; + instance_wait_for_state(client, instance_uuid, InstanceState::Running) + .await; + + // Ensure multicast test prerequisites (inventory + DPD) are ready + ensure_multicast_test_ready(cptestctx).await; + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + let member_add_url = format!( + "{}?project={project_name}", + mcast_group_members_url(group_name) + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + + object_create::<_, MulticastGroupMember>( + client, + &member_add_url, + &member_params, + ) + .await; + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Verify initial multicast state + let initial_members = + list_multicast_group_members(client, group_name).await; + assert_eq!(initial_members.len(), 1); + assert_eq!(initial_members[0].state, "Joined"); + + // Test multiple external IP allocation/deallocation cycles + for cycle in 1..=3 { + // Allocate ephemeral external IP + let ephemeral_ip_url = format!( + "/v1/instances/{instance_name}/external-ips/ephemeral?project={project_name}" + ); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &ephemeral_ip_url) + .body(Some(&EphemeralIpCreate { + pool: None, // Use default pool + })) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Wait for dataplane configuration to settle + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Verify multicast state is preserved + let members_with_ip = + list_multicast_group_members(client, group_name).await; + assert_eq!( + members_with_ip.len(), + 1, + "Cycle {cycle}: Multicast member should persist during external IP allocation" + ); + assert_eq!( + members_with_ip[0].state, "Joined", + "Cycle {cycle}: Member should remain Joined" + ); + + // Verify external IP is attached + let external_ips_with_ip = + fetch_instance_external_ips(client, instance_name, project_name) + .await; + assert!( + !external_ips_with_ip.is_empty(), + "Cycle {cycle}: Instance should have external IP" + ); + + // Deallocate ephemeral external IP + let external_ip_detach_url = format!( + "/v1/instances/{instance_name}/external-ips/ephemeral?project={project_name}" + ); + object_delete(client, &external_ip_detach_url).await; + + // Wait for operations to settle + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Verify multicast state is still preserved + let members_without_ip = + list_multicast_group_members(client, group_name).await; + assert_eq!( + members_without_ip.len(), + 1, + "Cycle {cycle}: Multicast member should persist after external IP removal" + ); + assert_eq!( + members_without_ip[0].state, "Joined", + "Cycle {cycle}: Member should remain Joined after IP removal" + ); + + // Verify ephemeral external IP is removed (SNAT IP may still be present) + let external_ips_without_ip = + fetch_instance_external_ips(client, instance_name, project_name) + .await; + assert!( + external_ips_without_ip.len() <= 1, + "Cycle {cycle}: Instance should have at most SNAT IP remaining" + ); + } + + // Cleanup + cleanup_instances(cptestctx, client, project_name, &[instance_name]).await; + cleanup_multicast_groups(client, &[group_name]).await; +} + +/// Verify instances can be created with both external IP and multicast group simultaneously. +/// +/// Instance creation with both features works without conflicts during initial setup, +/// and both features are properly configured from creation. +#[nexus_test] +async fn test_multicast_with_external_ip_at_creation( + cptestctx: &nexus_test_utils::ControlPlaneTestContext< + omicron_nexus::Server, + >, +) { + let client = &cptestctx.external_client; + let project_name = "creation-mixed-project"; + let group_name = "creation-mixed-group"; + let instance_name = "creation-mixed-instance"; + + // Setup - parallelize project and pool creation + let (_, _, mcast_pool) = ops::join3( + create_project(client, project_name), + create_default_ip_pool(client), + create_multicast_ip_pool_with_range( + client, + "creation-mixed-pool", + (224, 102, 0, 1), + (224, 102, 0, 255), + ), + ) + .await; + + // Create multicast group first + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 102, 0, 100)); + let group_url = "/v1/multicast-groups".to_string(); + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: group_name.parse().unwrap(), + description: "Group for creation test".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + object_create::<_, MulticastGroup>(client, &group_url, &group_params).await; + wait_for_group_active(client, group_name).await; + + // Create instance with external IP specified at creation + let external_ip_param = ExternalIpCreate::Ephemeral { pool: None }; + let instance_params = InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: "Instance created with external IP and multicast" + .to_string(), + }, + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(1), + hostname: instance_name.parse().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![external_ip_param], // External IP at creation + multicast_groups: vec![], // Will add to multicast group after creation + disks: vec![], + boot_disk: None, + cpu_platform: None, + start: true, + auto_restart_policy: Default::default(), + anti_affinity_groups: Vec::new(), + }; + + let instance_url = format!("/v1/instances?project={project_name}"); + let instance: Instance = + object_create(client, &instance_url, &instance_params).await; + let instance_id = instance.identity.id; + + // Transition to running + let nexus = &cptestctx.server.server_context().nexus; + let instance_uuid = InstanceUuid::from_untyped_uuid(instance_id); + instance_simulate(nexus, &instance_uuid).await; + instance_wait_for_state(client, instance_uuid, InstanceState::Running) + .await; + + // Ensure multicast test prerequisites (inventory + DPD) are ready + ensure_multicast_test_ready(cptestctx).await; + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Verify external IP was allocated at creation + let external_ips_after_start = + fetch_instance_external_ips(client, instance_name, project_name).await; + assert!( + !external_ips_after_start.is_empty(), + "Instance should have external IP from creation" + ); + + // Add to multicast group + let member_add_url = format!( + "{}?project={project_name}", + mcast_group_members_url(group_name) + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + + object_create::<_, MulticastGroupMember>( + client, + &member_add_url, + &member_params, + ) + .await; + + // Verify both features work together - wait for member to reach Joined state + wait_for_member_state( + cptestctx, + group_name, + instance_id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + let members = list_multicast_group_members(client, group_name).await; + assert_eq!(members.len(), 1, "Should have multicast member"); + + let external_ips_final = + fetch_instance_external_ips(client, instance_name, project_name).await; + assert!( + !external_ips_final.is_empty(), + "Instance should retain external IP" + ); + + // Cleanup + cleanup_instances(cptestctx, client, project_name, &[instance_name]).await; + cleanup_multicast_groups(client, &[group_name]).await; +} + +/// Verify instances can have both floating IPs and multicast group membership. +/// +/// Floating IP attachment works for multicast group members, multicast state persists +/// through floating IP operations, and no conflicts occur between floating IP and +/// multicast DPD configuration. +#[nexus_test] +async fn test_multicast_with_floating_ip_basic( + cptestctx: &nexus_test_utils::ControlPlaneTestContext< + omicron_nexus::Server, + >, +) { + let client = &cptestctx.external_client; + let project_name = "floating-ip-mcast-project"; + let group_name = "floating-ip-mcast-group"; + let instance_name = "floating-ip-mcast-instance"; + let floating_ip_name = "floating-ip-mcast-ip"; + + // Setup: project and IP pools - parallelize creation + let (_, _, mcast_pool) = ops::join3( + create_project(client, project_name), + create_default_ip_pool(client), // For floating IPs + create_multicast_ip_pool_with_range( + client, + "floating-ip-mcast-pool", + (224, 200, 0, 1), + (224, 200, 0, 255), + ), + ) + .await; + + // Create floating IP + let floating_ip = + create_floating_ip(client, floating_ip_name, project_name, None, None) + .await; + + // Create multicast group + let multicast_ip = IpAddr::V4(Ipv4Addr::new(224, 200, 0, 50)); + let group_url = "/v1/multicast-groups".to_string(); + let group_params = MulticastGroupCreate { + identity: IdentityMetadataCreateParams { + name: group_name.parse().unwrap(), + description: "Group for floating IP integration test".to_string(), + }, + multicast_ip: Some(multicast_ip), + source_ips: None, + pool: Some(NameOrId::Name(mcast_pool.identity.name.clone())), + mvlan: None, + }; + + object_create::<_, MulticastGroup>(client, &group_url, &group_params).await; + wait_for_group_active(client, group_name).await; + + // Create instance (will start by default) + let instance_params = InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: "Instance with floating IP and multicast".to_string(), + }, + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(1), + hostname: instance_name.parse().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], // Start without external IP + multicast_groups: vec![], + disks: vec![], + boot_disk: None, + cpu_platform: None, + start: true, // Start the instance + auto_restart_policy: Default::default(), + anti_affinity_groups: Vec::new(), + }; + + let instance_url = format!("/v1/instances?project={project_name}"); + let instance: Instance = + object_create(client, &instance_url, &instance_params).await; + let instance_id = instance.identity.id; + + // Transition instance to Running state + let nexus = &cptestctx.server.server_context().nexus; + let instance_uuid = InstanceUuid::from_untyped_uuid(instance_id); + instance_simulate(nexus, &instance_uuid).await; + instance_wait_for_state(client, instance_uuid, InstanceState::Running) + .await; + + // Ensure multicast test prerequisites (inventory + DPD) are ready + ensure_multicast_test_ready(cptestctx).await; + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Add instance to multicast group + let member_add_url = format!( + "{}?project={project_name}", + mcast_group_members_url(group_name) + ); + let member_params = MulticastGroupMemberAdd { + instance: NameOrId::Name(instance_name.parse().unwrap()), + }; + + object_create::<_, MulticastGroupMember>( + client, + &member_add_url, + &member_params, + ) + .await; + + // Wait for multicast member to reach "Joined" state + wait_for_member_state( + cptestctx, + group_name, + instance_id, + nexus_db_model::MulticastGroupMemberState::Joined, + ) + .await; + + // Verify member count + let members = list_multicast_group_members(client, group_name).await; + assert_eq!(members.len(), 1, "Should have one multicast member"); + + // Verify that inventory-based mapping correctly mapped sled → switch port + verify_inventory_based_port_mapping(cptestctx, &instance_uuid) + .await + .expect("port mapping verification should succeed"); + + // Attach floating IP to the same instance + let attach_url = format!( + "/v1/floating-ips/{floating_ip_name}/attach?project={project_name}" + ); + let attach_params = FloatingIpAttach { + kind: nexus_types::external_api::params::FloatingIpParentKind::Instance, + parent: NameOrId::Name(instance_name.parse().unwrap()), + }; + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &attach_url) + .body(Some(&attach_params)) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .unwrap(); + + // Verify both multicast and floating IP work together + + // Check that multicast membership is preserved + let members_after_ip = + list_multicast_group_members(client, group_name).await; + assert_eq!( + members_after_ip.len(), + 1, + "Multicast member should still exist after floating IP attachment" + ); + assert_eq!(members_after_ip[0].instance_id, instance_id); + assert_eq!( + members_after_ip[0].state, "Joined", + "Member state should remain Joined" + ); + + // Check that floating IP is properly attached + let external_ips_after_attach = + fetch_instance_external_ips(client, instance_name, project_name).await; + assert!( + !external_ips_after_attach.is_empty(), + "Instance should have external IP" + ); + // Find the floating IP among the external IPs (there may also be SNAT IP) + let has_floating_ip = + external_ips_after_attach.iter().any(|ip| ip.ip() == floating_ip.ip); + assert!(has_floating_ip, "Instance should have the floating IP attached"); + + // Detach floating IP and verify multicast is unaffected + let detach_url = format!( + "/v1/floating-ips/{floating_ip_name}/detach?project={project_name}" + ); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &detach_url) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .unwrap(); + + // Wait for operations to settle + wait_for_multicast_reconciler(&cptestctx.lockstep_client).await; + + // Verify multicast membership is still intact after floating IP removal + let members_after_detach = + list_multicast_group_members(client, group_name).await; + assert_eq!( + members_after_detach.len(), + 1, + "Multicast member should persist after floating IP detachment" + ); + assert_eq!(members_after_detach[0].instance_id, instance_id); + assert_eq!( + members_after_detach[0].state, "Joined", + "Member should remain Joined" + ); + + // Verify floating IP is detached (SNAT IP may still be present) + let external_ips_after_detach = + fetch_instance_external_ips(client, instance_name, project_name).await; + let still_has_floating_ip = + external_ips_after_detach.iter().any(|ip| ip.ip() == floating_ip.ip); + assert!( + !still_has_floating_ip, + "Instance should not have the floating IP attached anymore" + ); + + // Cleanup floating IP + let fip_delete_url = + format!("/v1/floating-ips/{floating_ip_name}?project={project_name}"); + object_delete(client, &fip_delete_url).await; + + // Cleanup + cleanup_instances(cptestctx, client, project_name, &[instance_name]).await; + cleanup_multicast_groups(client, &[group_name]).await; +} diff --git a/nexus/tests/integration_tests/projects.rs b/nexus/tests/integration_tests/projects.rs index d2fab648877..f221f8f8ffb 100644 --- a/nexus/tests/integration_tests/projects.rs +++ b/nexus/tests/integration_tests/projects.rs @@ -179,6 +179,7 @@ async fn test_project_deletion_with_instance( start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, ) .await; diff --git a/nexus/tests/integration_tests/quotas.rs b/nexus/tests/integration_tests/quotas.rs index 53baee4ae34..ee718245961 100644 --- a/nexus/tests/integration_tests/quotas.rs +++ b/nexus/tests/integration_tests/quotas.rs @@ -114,6 +114,7 @@ impl ResourceAllocator { start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, ) .authn_as(self.auth.clone()) diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 484b94c48c5..11cc874287a 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -1400,6 +1400,7 @@ fn at_current_101_0_0<'a>(ctx: &'a MigrationContext<'a>) -> BoxFuture<'a, ()> { start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, )) .execute_async(&*pool_and_conn.conn) diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index bbae7053abf..0c4a94870c0 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -151,6 +151,7 @@ async fn test_snapshot_basic(cptestctx: &ControlPlaneTestContext) { start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, ) .await; @@ -358,6 +359,7 @@ async fn test_snapshot_stopped_instance(cptestctx: &ControlPlaneTestContext) { start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }, ) .await; diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index 7f5d699ff98..a31f99a8206 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -68,6 +68,7 @@ async fn create_instance_expect_failure( start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; NexusRequest::new( @@ -160,6 +161,7 @@ async fn test_subnet_allocation(cptestctx: &ControlPlaneTestContext) { true, Default::default(), None, + Vec::new(), ) .await; } diff --git a/nexus/tests/integration_tests/unauthorized.rs b/nexus/tests/integration_tests/unauthorized.rs index 6448f71610c..a3f57b409ac 100644 --- a/nexus/tests/integration_tests/unauthorized.rs +++ b/nexus/tests/integration_tests/unauthorized.rs @@ -359,6 +359,32 @@ static SETUP_REQUESTS: LazyLock> = LazyLock::new(|| { body: serde_json::to_value(&*DEMO_STOPPED_INSTANCE_CREATE).unwrap(), id_routes: vec!["/v1/instances/{id}"], }, + // Create a multicast IP pool + SetupReq::Post { + url: &DEMO_IP_POOLS_URL, + body: serde_json::to_value(&*DEMO_MULTICAST_IP_POOL_CREATE) + .unwrap(), + id_routes: vec!["/v1/ip-pools/{id}"], + }, + // Create a multicast IP pool range + SetupReq::Post { + url: &DEMO_MULTICAST_IP_POOL_RANGES_ADD_URL, + body: serde_json::to_value(&*DEMO_MULTICAST_IP_POOL_RANGE).unwrap(), + id_routes: vec![], + }, + // Link multicast pool to default silo + SetupReq::Post { + url: &DEMO_MULTICAST_IP_POOL_SILOS_URL, + body: serde_json::to_value(&*DEMO_MULTICAST_IP_POOL_SILOS_BODY) + .unwrap(), + id_routes: vec![], + }, + // Create a multicast group in the Project + SetupReq::Post { + url: &MULTICAST_GROUPS_URL, + body: serde_json::to_value(&*DEMO_MULTICAST_GROUP_CREATE).unwrap(), + id_routes: vec!["/v1/multicast-groups/{id}"], + }, // Create an affinity group in the Project SetupReq::Post { url: &DEMO_PROJECT_URL_AFFINITY_GROUPS, diff --git a/nexus/tests/integration_tests/utilization.rs b/nexus/tests/integration_tests/utilization.rs index f5e4958502d..4e583301c6e 100644 --- a/nexus/tests/integration_tests/utilization.rs +++ b/nexus/tests/integration_tests/utilization.rs @@ -235,6 +235,7 @@ async fn create_resources_in_test_suite_silo(client: &ClientTestContext) { start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + multicast_groups: Vec::new(), }; NexusRequest::objects_post( diff --git a/nexus/tests/integration_tests/vpc_routers.rs b/nexus/tests/integration_tests/vpc_routers.rs index ce72e605c56..b1653150679 100644 --- a/nexus/tests/integration_tests/vpc_routers.rs +++ b/nexus/tests/integration_tests/vpc_routers.rs @@ -518,6 +518,7 @@ async fn test_vpc_routers_custom_delivered_to_instance( true, Default::default(), None, + Vec::new(), ) .await; instance_simulate( diff --git a/nexus/tests/integration_tests/vpcs.rs b/nexus/tests/integration_tests/vpcs.rs index 9416fe36e60..3d2dd7b83f2 100644 --- a/nexus/tests/integration_tests/vpcs.rs +++ b/nexus/tests/integration_tests/vpcs.rs @@ -466,6 +466,7 @@ async fn test_limited_collaborator_can_create_instance( network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], + multicast_groups: vec![], disks: vec![], boot_disk: None, cpu_platform: None, diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index a285d863d05..91f2e867383 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -17,6 +17,7 @@ use omicron_common::api::external::{ Nullable, PaginationOrder, RouteDestination, RouteTarget, UserId, }; use omicron_common::disk::DiskVariant; +use omicron_common::vlan::VlanID; use omicron_uuid_kinds::*; use oxnet::{IpNet, Ipv4Net, Ipv6Net}; use parse_display::Display; @@ -29,7 +30,10 @@ use serde::{ use std::collections::BTreeMap; use std::collections::BTreeSet; use std::num::NonZeroU32; -use std::{net::IpAddr, str::FromStr}; +use std::{ + net::{IpAddr, Ipv4Addr, Ipv6Addr}, + str::FromStr, +}; use url::Url; use uuid::Uuid; @@ -79,6 +83,7 @@ pub struct UninitializedSledId { path_param!(AffinityGroupPath, affinity_group, "affinity group"); path_param!(AntiAffinityGroupPath, anti_affinity_group, "anti affinity group"); +path_param!(MulticastGroupPath, multicast_group, "multicast group"); path_param!(ProjectPath, project, "project"); path_param!(InstancePath, instance, "instance"); path_param!(NetworkInterfacePath, interface, "network interface"); @@ -232,6 +237,19 @@ pub struct FloatingIpSelector { pub floating_ip: NameOrId, } +#[derive(Deserialize, JsonSchema, Clone)] +pub struct MulticastGroupSelector { + /// Name or ID of the multicast group (fleet-scoped) + pub multicast_group: NameOrId, +} + +/// Path parameter for multicast group lookup by IP address. +#[derive(Deserialize, Serialize, JsonSchema)] +pub struct MulticastGroupIpLookupPath { + /// IP address of the multicast group + pub address: IpAddr, +} + #[derive(Deserialize, JsonSchema)] pub struct DiskSelector { /// Name or ID of the project, only required if `disk` is provided as a `Name` @@ -1226,7 +1244,7 @@ pub struct InstanceCreate { /// Must be a Base64-encoded string, as specified in RFC 4648 § 4 (+ and / /// characters with padding). Maximum 32 KiB unencoded data. // While serde happily accepts #[serde(with = "")] as a shorthand for - // specifing `serialize_with` and `deserialize_with`, schemars requires the + // specifying `serialize_with` and `deserialize_with`, schemars requires the // argument to `with` to be a type rather than merely a path prefix (i.e. a // mod or type). It's admittedly a bit tricky for schemars to address; // unlike `serialize` or `deserialize`, `JsonSchema` requires several @@ -1248,6 +1266,14 @@ pub struct InstanceCreate { #[serde(default)] pub external_ips: Vec, + /// The multicast groups this instance should join. + /// + /// The instance will be automatically added as a member of the specified + /// multicast groups during creation, enabling it to send and receive + /// multicast traffic for those groups. + #[serde(default)] + pub multicast_groups: Vec, + /// A list of disks to be attached to the instance. /// /// Disk attachments of type "create" will be created, while those of type @@ -1362,6 +1388,17 @@ pub struct InstanceUpdate { /// instance will have the most general CPU platform supported by the sled /// it is initially placed on. pub cpu_platform: Nullable, + + /// Multicast groups this instance should join. + /// + /// When specified, this replaces the instance's current multicast group + /// membership with the new set of groups. The instance will leave any + /// groups not listed here and join any new groups that are specified. + /// + /// If not provided (None), the instance's multicast group membership + /// will not be changed. + #[serde(default)] + pub multicast_groups: Option>, } #[inline] @@ -1784,7 +1821,7 @@ pub struct LoopbackAddressCreate { /// address from. pub address_lot: NameOrId, - /// The containing the switch this loopback address will be configured on. + /// The rack containing the switch this loopback address will be configured on. pub rack_id: Uuid, // TODO: #3604 Consider using `SwitchLocation` type instead of `Name` for `LoopbackAddressCreate.switch_location` @@ -2732,7 +2769,7 @@ pub struct AlertReceiverProbe { pub resend: bool, } -// Audit log has its own pagination scheme because it paginates by timestamp. +/// Audit log has its own pagination scheme because it paginates by timestamp. #[derive(Deserialize, JsonSchema, Serialize, PartialEq, Debug, Clone)] pub struct AuditLog { /// Required, inclusive @@ -2741,6 +2778,290 @@ pub struct AuditLog { pub end_time: Option>, } +/// Create-time parameters for a multicast group. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct MulticastGroupCreate { + #[serde(flatten)] + pub identity: IdentityMetadataCreateParams, + /// The multicast IP address to allocate. If None, one will be allocated + /// from the default pool. + #[serde(default, deserialize_with = "validate_multicast_ip_param")] + pub multicast_ip: Option, + /// Source IP addresses for Source-Specific Multicast (SSM). + /// + /// None uses default behavior (Any-Source Multicast). + /// Empty list explicitly allows any source (Any-Source Multicast). + /// Non-empty list restricts to specific sources (SSM). + #[serde(default, deserialize_with = "validate_source_ips_param")] + pub source_ips: Option>, + /// Name or ID of the IP pool to allocate from. If None, uses the default + /// multicast pool. + #[serde(default)] + pub pool: Option, + /// Multicast VLAN (MVLAN) for egress multicast traffic to upstream networks. + /// Tags packets leaving the rack to traverse VLAN-segmented upstream networks. + /// + /// Valid range: 2-4094 (VLAN IDs 0-1 are reserved by IEEE 802.1Q standard). + #[serde(default, deserialize_with = "validate_mvlan_option")] + pub mvlan: Option, +} + +/// Update-time parameters for a multicast group. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct MulticastGroupUpdate { + #[serde(flatten)] + pub identity: IdentityMetadataUpdateParams, + #[serde( + default, + deserialize_with = "validate_source_ips_param", + skip_serializing_if = "Option::is_none" + )] + pub source_ips: Option>, + /// Multicast VLAN (MVLAN) for egress multicast traffic to upstream networks. + /// Set to null to clear the MVLAN. Valid range: 2-4094 when provided. + /// Omit the field to leave mvlan unchanged. + #[serde( + default, + deserialize_with = "validate_mvlan_option_nullable", + skip_serializing_if = "Option::is_none" + )] + pub mvlan: Option>, +} + +/// Parameters for adding an instance to a multicast group. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct MulticastGroupMemberAdd { + /// Name or ID of the instance to add to the multicast group + pub instance: NameOrId, +} + +// MVLAN validators + +/// Dendrite requires VLAN IDs >= 2 (rejects 0 and 1) +/// +/// Valid range is 2-4094 +fn validate_mvlan(vlan_id: VlanID) -> Result { + let value: u16 = vlan_id.into(); + if value >= 2 { + Ok(vlan_id) + } else { + Err(format!( + "invalid mvlan: {value} (must be >= 2, VLAN IDs 0-1 are reserved)" + )) + } +} + +fn validate_mvlan_option<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + let opt = Option::::deserialize(deserializer)?; + match opt { + Some(v) => { + validate_mvlan(v).map(Some).map_err(serde::de::Error::custom) + } + None => Ok(None), + } +} + +fn validate_mvlan_option_nullable<'de, D>( + deserializer: D, +) -> Result>, D::Error> +where + D: serde::Deserializer<'de>, +{ + // Deserialize as Nullable directly, which handles null properly + // When field has null value, Nullable deserializer returns Nullable(None) + // We always wrap in Some because if field is present, we got here + let nullable = Nullable::::deserialize(deserializer)?; + match nullable.0 { + Some(v) => validate_mvlan(v) + .map(|vv| Some(Nullable(Some(vv)))) + .map_err(serde::de::Error::custom), + None => Ok(Some(Nullable(None))), // Explicit null to clear + } +} + +/// Parameters for removing an instance from a multicast group. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct MulticastGroupMemberRemove { + /// Name or ID of the instance to remove from the multicast group + pub instance: NameOrId, +} + +/// Path parameters for multicast group member operations. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct MulticastGroupMemberPath { + /// Name or ID of the multicast group + pub multicast_group: NameOrId, + /// Name or ID of the instance + pub instance: NameOrId, +} + +/// Path parameters for instance multicast group operations. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct InstanceMulticastGroupPath { + /// Name or ID of the instance + pub instance: NameOrId, + /// Name or ID of the multicast group + pub multicast_group: NameOrId, +} + +/// Validate that an IP address is suitable for use as a SSM source. +/// +/// For specifics, follow-up on RFC 4607: +/// +pub fn validate_source_ip(ip: IpAddr) -> Result<(), String> { + match ip { + IpAddr::V4(ipv4) => validate_ipv4_source(ipv4), + IpAddr::V6(ipv6) => validate_ipv6_source(ipv6), + } +} + +/// Validate that an IPv4 address is suitable for use as a multicast source. +fn validate_ipv4_source(addr: Ipv4Addr) -> Result<(), String> { + // Must be a unicast address + if !is_unicast_v4(&addr) { + return Err(format!("{} is not a unicast address", addr)); + } + + // Exclude problematic addresses (mostly align with Dendrite, but block link-local) + if addr.is_loopback() + || addr.is_broadcast() + || addr.is_unspecified() + || addr.is_link_local() + { + return Err(format!("{} is a special-use address", addr)); + } + + Ok(()) +} + +/// Validate that an IPv6 address is suitable for use as a multicast source. +fn validate_ipv6_source(addr: Ipv6Addr) -> Result<(), String> { + // Must be a unicast address + if !is_unicast_v6(&addr) { + return Err(format!("{} is not a unicast address", addr)); + } + + // Exclude problematic addresses (align with Dendrite validation, but block link-local) + if addr.is_loopback() + || addr.is_unspecified() + || ((addr.segments()[0] & 0xffc0) == 0xfe80) + // fe80::/10 link-local + { + return Err(format!("{} is a special-use address", addr)); + } + + Ok(()) +} + +/// Validate that an IP address is a proper multicast address for API validation. +pub fn validate_multicast_ip(ip: IpAddr) -> Result<(), String> { + match ip { + IpAddr::V4(ipv4) => validate_ipv4_multicast(ipv4), + IpAddr::V6(ipv6) => validate_ipv6_multicast(ipv6), + } +} + +// IPv4 link-local multicast range reserved for local network control. +const RESERVED_IPV4_MULTICAST_LINK_LOCAL: Ipv4Addr = + Ipv4Addr::new(224, 0, 0, 0); +const RESERVED_IPV4_MULTICAST_LINK_LOCAL_PREFIX: u8 = 24; + +/// Validates IPv4 multicast addresses. +fn validate_ipv4_multicast(addr: Ipv4Addr) -> Result<(), String> { + // Verify this is actually a multicast address + if !addr.is_multicast() { + return Err(format!("{} is not a multicast address", addr)); + } + + // Block link-local multicast (224.0.0.0/24) as it's reserved for local network control + let link_local = Ipv4Net::new( + RESERVED_IPV4_MULTICAST_LINK_LOCAL, + RESERVED_IPV4_MULTICAST_LINK_LOCAL_PREFIX, + ) + .unwrap(); + if link_local.contains(addr) { + return Err(format!( + "{addr} is in the link-local multicast range (224.0.0.0/24)" + )); + } + + Ok(()) +} + +/// Validates IPv6 multicast addresses. +fn validate_ipv6_multicast(addr: Ipv6Addr) -> Result<(), String> { + if !addr.is_multicast() { + return Err(format!("{addr} is not a multicast address")); + } + + // Define reserved IPv6 multicast subnets using oxnet + let reserved_subnets = [ + // Interface-local scope (ff01::/16) + Ipv6Net::new(Ipv6Addr::new(0xff01, 0, 0, 0, 0, 0, 0, 0), 16).unwrap(), + // Link-local scope (ff02::/16) + Ipv6Net::new(Ipv6Addr::new(0xff02, 0, 0, 0, 0, 0, 0, 0), 16).unwrap(), + ]; + + // Check reserved subnets + for subnet in &reserved_subnets { + if subnet.contains(addr) { + return Err(format!( + "{} is in the reserved multicast subnet {}", + addr, subnet + )); + } + } + + // Note: Admin-local scope (ff04::/16) is allowed for on-premises deployments. + // Collision avoidance with underlay addresses is handled by the mapping + // function which sets a collision-avoidance bit in the underlay space. + + Ok(()) +} + +/// Deserializer for validating multicast IP addresses. +fn validate_multicast_ip_param<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let ip_opt = Option::::deserialize(deserializer)?; + if let Some(ip) = ip_opt { + validate_multicast_ip(ip).map_err(|e| de::Error::custom(e))?; + } + Ok(ip_opt) +} + +/// Deserializer for validating source IP addresses. +fn validate_source_ips_param<'de, D>( + deserializer: D, +) -> Result>, D::Error> +where + D: Deserializer<'de>, +{ + let ips_opt = Option::>::deserialize(deserializer)?; + if let Some(ref ips) = ips_opt { + for ip in ips { + validate_source_ip(*ip).map_err(|e| de::Error::custom(e))?; + } + } + Ok(ips_opt) +} + +const fn is_unicast_v4(ip: &Ipv4Addr) -> bool { + !ip.is_multicast() +} + +const fn is_unicast_v6(ip: &Ipv6Addr) -> bool { + !ip.is_multicast() +} + // SCIM #[derive(Deserialize, JsonSchema)] @@ -2757,3 +3078,478 @@ pub struct ScimV2UserPathParam { pub struct ScimV2GroupPathParam { pub group_id: String, } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_validate_multicast_ip_v4() { + // Valid IPv4 multicast addresses + assert!( + validate_multicast_ip(IpAddr::V4(Ipv4Addr::new(224, 1, 0, 1))) + .is_ok() + ); + assert!( + validate_multicast_ip(IpAddr::V4(Ipv4Addr::new(225, 2, 3, 4))) + .is_ok() + ); + assert!( + validate_multicast_ip(IpAddr::V4(Ipv4Addr::new(231, 5, 6, 7))) + .is_ok() + ); + assert!( + validate_multicast_ip(IpAddr::V4(Ipv4Addr::new(233, 1, 1, 1))) + .is_ok() + ); // GLOP addressing - allowed + assert!( + validate_multicast_ip(IpAddr::V4(Ipv4Addr::new(239, 1, 1, 1))) + .is_ok() + ); // Admin-scoped - allowed + + // Invalid IPv4 multicast addresses - reserved ranges + assert!( + validate_multicast_ip(IpAddr::V4(Ipv4Addr::new(224, 0, 0, 1))) + .is_err() + ); // Link-local control + assert!( + validate_multicast_ip(IpAddr::V4(Ipv4Addr::new(224, 0, 0, 255))) + .is_err() + ); // Link-local control + + // Non-multicast addresses + assert!( + validate_multicast_ip(IpAddr::V4(Ipv4Addr::new(192, 168, 1, 1))) + .is_err() + ); + assert!( + validate_multicast_ip(IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))) + .is_err() + ); + } + + #[test] + fn test_validate_multicast_ip_v6() { + // Valid IPv6 multicast addresses + assert!( + validate_multicast_ip(IpAddr::V6(Ipv6Addr::new( + 0xff0e, 0, 0, 0, 0, 0, 0, 1 + ))) + .is_ok() + ); // Global scope + assert!( + validate_multicast_ip(IpAddr::V6(Ipv6Addr::new( + 0xff0d, 0, 0, 0, 0, 0, 0, 1 + ))) + .is_ok() + ); // Site-local scope + assert!( + validate_multicast_ip(IpAddr::V6(Ipv6Addr::new( + 0xff05, 0, 0, 0, 0, 0, 0, 1 + ))) + .is_ok() + ); // Site-local admin scope - allowed + assert!( + validate_multicast_ip(IpAddr::V6(Ipv6Addr::new( + 0xff08, 0, 0, 0, 0, 0, 0, 1 + ))) + .is_ok() + ); // Org-local admin scope - allowed + + // Invalid IPv6 multicast addresses - reserved ranges + assert!( + validate_multicast_ip(IpAddr::V6(Ipv6Addr::new( + 0xff01, 0, 0, 0, 0, 0, 0, 1 + ))) + .is_err() + ); // Interface-local + assert!( + validate_multicast_ip(IpAddr::V6(Ipv6Addr::new( + 0xff02, 0, 0, 0, 0, 0, 0, 1 + ))) + .is_err() + ); // Link-local + + // Admin-local (ff04::/16) is allowed for on-premises deployments. + // Collision avoidance is handled by the mapping function which sets + // a collision-avoidance bit to separate external and underlay spaces. + assert!( + validate_multicast_ip(IpAddr::V6(Ipv6Addr::new( + 0xff04, 0, 0, 0, 0, 0, 0, 1 + ))) + .is_ok() + ); + + // Non-multicast addresses + assert!( + validate_multicast_ip(IpAddr::V6(Ipv6Addr::new( + 0x2001, 0xdb8, 0, 0, 0, 0, 0, 1 + ))) + .is_err() + ); + } + + #[test] + fn test_validate_source_ip_v4() { + // Valid IPv4 source addresses + assert!( + validate_source_ip(IpAddr::V4(Ipv4Addr::new(192, 168, 1, 1))) + .is_ok() + ); + assert!( + validate_source_ip(IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))).is_ok() + ); + assert!( + validate_source_ip(IpAddr::V4(Ipv4Addr::new(203, 0, 113, 1))) + .is_ok() + ); // TEST-NET-3 + + // Invalid IPv4 source addresses + assert!( + validate_source_ip(IpAddr::V4(Ipv4Addr::new(224, 1, 1, 1))) + .is_err() + ); // Multicast + assert!( + validate_source_ip(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0))).is_err() + ); // Unspecified + assert!( + validate_source_ip(IpAddr::V4(Ipv4Addr::new(255, 255, 255, 255))) + .is_err() + ); // Broadcast + assert!( + validate_source_ip(IpAddr::V4(Ipv4Addr::new(169, 254, 1, 1))) + .is_err() + ); // Link-local + } + + #[test] + fn test_validate_source_ip_v6() { + // Valid IPv6 source addresses + assert!( + validate_source_ip(IpAddr::V6(Ipv6Addr::new( + 0x2001, 0xdb8, 0, 0, 0, 0, 0, 1 + ))) + .is_ok() + ); + assert!( + validate_source_ip(IpAddr::V6(Ipv6Addr::new( + 0x2001, 0x4860, 0x4860, 0, 0, 0, 0, 0x8888 + ))) + .is_ok() + ); + + // Invalid IPv6 source addresses + assert!( + validate_source_ip(IpAddr::V6(Ipv6Addr::new( + 0xff0e, 0, 0, 0, 0, 0, 0, 1 + ))) + .is_err() + ); // Multicast + assert!( + validate_source_ip(IpAddr::V6(Ipv6Addr::new( + 0, 0, 0, 0, 0, 0, 0, 0 + ))) + .is_err() + ); // Unspecified + assert!( + validate_source_ip(IpAddr::V6(Ipv6Addr::new( + 0, 0, 0, 0, 0, 0, 0, 1 + ))) + .is_err() + ); // Loopback + } + + #[test] + fn test_multicast_group_create_deserialization_with_all_fields() { + let json = r#"{ + "name": "test-group", + "description": "Test multicast group", + "multicast_ip": "224.1.2.3", + "source_ips": ["10.0.0.1", "10.0.0.2"], + "pool": "default", + "mvlan": 10 + }"#; + + let result: Result = + serde_json::from_str(json); + assert!(result.is_ok()); + let params = result.unwrap(); + assert_eq!(params.identity.name.as_str(), "test-group"); + assert_eq!( + params.multicast_ip, + Some(IpAddr::V4(Ipv4Addr::new(224, 1, 2, 3))) + ); + assert_eq!( + params.source_ips, + Some(vec![ + IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1)), + IpAddr::V4(Ipv4Addr::new(10, 0, 0, 2)) + ]) + ); + } + + #[test] + fn test_multicast_group_create_deserialization_without_optional_fields() { + // This is the critical test - multicast_ip, source_ips, pool, and mvlan are all optional + let json = r#"{ + "name": "test-group", + "description": "Test multicast group" + }"#; + + let result: Result = + serde_json::from_str(json); + assert!( + result.is_ok(), + "Failed to deserialize without optional fields: {:?}", + result.err() + ); + let params = result.unwrap(); + assert_eq!(params.identity.name.as_str(), "test-group"); + assert_eq!(params.multicast_ip, None); + assert_eq!(params.source_ips, None); + assert_eq!(params.pool, None); + assert_eq!(params.mvlan, None); + } + + #[test] + fn test_multicast_group_create_deserialization_with_empty_source_ips() { + let json = r#"{ + "name": "test-group", + "description": "Test multicast group", + "multicast_ip": "224.1.2.3", + "source_ips": [] + }"#; + + let result: Result = + serde_json::from_str(json); + assert!(result.is_ok()); + let params = result.unwrap(); + assert_eq!(params.source_ips, Some(vec![])); + } + + #[test] + fn test_multicast_group_create_deserialization_invalid_multicast_ip() { + // Non-multicast IP should be rejected + let json = r#"{ + "name": "test-group", + "description": "Test multicast group", + "multicast_ip": "192.168.1.1" + }"#; + + let result: Result = + serde_json::from_str(json); + assert!(result.is_err()); + } + + #[test] + fn test_multicast_group_create_deserialization_invalid_source_ip() { + // Multicast address in source_ips should be rejected + let json = r#"{ + "name": "test-group", + "description": "Test multicast group", + "multicast_ip": "224.1.2.3", + "source_ips": ["224.0.0.1"] + }"#; + + let result: Result = + serde_json::from_str(json); + assert!(result.is_err()); + } + + #[test] + fn test_multicast_group_create_deserialization_only_multicast_ip() { + // Test with only multicast_ip, no source_ips + let json = r#"{ + "name": "test-group", + "description": "Test multicast group", + "multicast_ip": "224.1.2.3" + }"#; + + let result: Result = + serde_json::from_str(json); + assert!(result.is_ok()); + let params = result.unwrap(); + assert_eq!( + params.multicast_ip, + Some(IpAddr::V4(Ipv4Addr::new(224, 1, 2, 3))) + ); + assert_eq!(params.source_ips, None); + } + + #[test] + fn test_multicast_group_create_deserialization_only_source_ips() { + // Test with only source_ips, no multicast_ip (will be auto-allocated) + let json = r#"{ + "name": "test-group", + "description": "Test multicast group", + "source_ips": ["10.0.0.1"] + }"#; + + let result: Result = + serde_json::from_str(json); + assert!(result.is_ok()); + let params = result.unwrap(); + assert_eq!(params.multicast_ip, None); + assert_eq!( + params.source_ips, + Some(vec![IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))]) + ); + } + + #[test] + fn test_multicast_group_create_deserialization_explicit_null_fields() { + // Test with explicit null values for optional fields + // This is what the CLI sends when fields are not provided + let json = r#"{ + "name": "test-group", + "description": "Test multicast group", + "multicast_ip": null, + "source_ips": null, + "pool": null, + "mvlan": null + }"#; + + let result: Result = + serde_json::from_str(json); + assert!( + result.is_ok(), + "Failed to deserialize with explicit null fields: {:?}", + result.err() + ); + let params = result.unwrap(); + assert_eq!(params.multicast_ip, None); + assert_eq!(params.source_ips, None); + assert_eq!(params.pool, None); + assert_eq!(params.mvlan, None); + } + + #[test] + fn test_multicast_group_create_deserialization_mixed_null_and_values() { + // Test with some nulls and some values + let json = r#"{ + "name": "test-group", + "description": "Test multicast group", + "multicast_ip": "224.1.2.3", + "source_ips": [], + "pool": null, + "mvlan": 30 + }"#; + + let result: Result = + serde_json::from_str(json); + assert!(result.is_ok()); + let params = result.unwrap(); + assert_eq!( + params.multicast_ip, + Some(IpAddr::V4(Ipv4Addr::new(224, 1, 2, 3))) + ); + assert_eq!(params.source_ips, Some(vec![])); + assert_eq!(params.pool, None); + assert_eq!(params.mvlan, Some(VlanID::new(30).unwrap())); + } + + #[test] + fn test_multicast_group_update_deserialization_omit_all_fields() { + // When fields are omitted, they should be None (no change) + let json = r#"{ + "name": "test-group" + }"#; + + let result: Result = + serde_json::from_str(json); + assert!( + result.is_ok(), + "Failed to deserialize update with omitted fields: {:?}", + result.err() + ); + let params = result.unwrap(); + assert_eq!(params.source_ips, None); + assert_eq!(params.mvlan, None); + } + + #[test] + fn test_multicast_group_update_deserialization_explicit_null_mvlan() { + // When mvlan is explicitly null, it should be Some(Nullable(None)) (clearing the field) + let json = r#"{ + "name": "test-group", + "mvlan": null + }"#; + + let result: Result = + serde_json::from_str(json); + assert!( + result.is_ok(), + "Failed to deserialize update with null mvlan: {:?}", + result.err() + ); + let params = result.unwrap(); + assert_eq!(params.mvlan, Some(Nullable(None))); + } + + #[test] + fn test_multicast_group_update_deserialization_set_mvlan() { + // When mvlan has a value, it should be Some(Nullable(Some(value))) + let json = r#"{ + "name": "test-group", + "mvlan": 100 + }"#; + + let result: Result = + serde_json::from_str(json); + assert!(result.is_ok()); + let params = result.unwrap(); + assert_eq!( + params.mvlan, + Some(Nullable(Some(VlanID::new(100).unwrap()))) + ); + } + + #[test] + fn test_multicast_group_update_deserialization_update_source_ips() { + // Test updating source_ips + let json = r#"{ + "name": "test-group", + "source_ips": ["10.0.0.5", "10.0.0.6"] + }"#; + + let result: Result = + serde_json::from_str(json); + assert!(result.is_ok()); + let params = result.unwrap(); + assert_eq!( + params.source_ips, + Some(vec![ + IpAddr::V4(Ipv4Addr::new(10, 0, 0, 5)), + IpAddr::V4(Ipv4Addr::new(10, 0, 0, 6)) + ]) + ); + } + + #[test] + fn test_multicast_group_update_deserialization_clear_source_ips() { + // Empty array should clear source_ips (Any-Source Multicast) + let json = r#"{ + "name": "test-group", + "source_ips": [] + }"#; + + let result: Result = + serde_json::from_str(json); + assert!(result.is_ok()); + let params = result.unwrap(); + assert_eq!(params.source_ips, Some(vec![])); + } + + #[test] + fn test_multicast_group_update_deserialization_invalid_mvlan() { + // VLAN ID 1 should be rejected (reserved) + let json = r#"{ + "name": "test-group", + "mvlan": 1 + }"#; + + let result: Result = + serde_json::from_str(json); + assert!(result.is_err(), "Should reject reserved VLAN ID 1"); + } +} diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index fb1c5ac1402..7f91e98d286 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -19,6 +19,7 @@ use omicron_common::api::external::{ Digest, Error, FailureDomain, IdentityMetadata, InstanceState, Name, Nullable, ObjectIdentity, SimpleIdentity, SimpleIdentityOrName, }; +use omicron_common::vlan::VlanID; use omicron_uuid_kinds::*; use oxnet::{Ipv4Net, Ipv6Net}; use schemars::JsonSchema; @@ -535,6 +536,44 @@ impl TryFrom for FloatingIp { } } +// MULTICAST GROUPS + +/// View of a Multicast Group +#[derive( + ObjectIdentity, Debug, PartialEq, Clone, Deserialize, Serialize, JsonSchema, +)] +pub struct MulticastGroup { + #[serde(flatten)] + pub identity: IdentityMetadata, + /// The multicast IP address held by this resource. + pub multicast_ip: IpAddr, + /// Source IP addresses for Source-Specific Multicast (SSM). + /// Empty array means any source is allowed. + pub source_ips: Vec, + /// Multicast VLAN (MVLAN) for egress multicast traffic to upstream networks. + /// None means no VLAN tagging on egress. + pub mvlan: Option, + /// The ID of the IP pool this resource belongs to. + pub ip_pool_id: Uuid, + /// Current state of the multicast group. + pub state: String, +} + +/// View of a Multicast Group Member (instance belonging to a multicast group) +#[derive( + ObjectIdentity, Debug, PartialEq, Clone, Deserialize, Serialize, JsonSchema, +)] +pub struct MulticastGroupMember { + #[serde(flatten)] + pub identity: IdentityMetadata, + /// The ID of the multicast group this member belongs to. + pub multicast_group_id: Uuid, + /// The ID of the instance that is a member of this group. + pub instance_id: Uuid, + /// Current state of the multicast group membership. + pub state: String, +} + // RACKS /// View of an Rack diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs index f9bc3e0aacc..bbf97b60de8 100644 --- a/nexus/types/src/internal_api/background.rs +++ b/nexus/types/src/internal_api/background.rs @@ -144,6 +144,39 @@ impl InstanceUpdaterStatus { } } +/// The status of a `multicast_reconciler` background task activation. +#[derive(Default, Serialize, Deserialize, Debug)] +pub struct MulticastGroupReconcilerStatus { + /// Whether the multicast reconciler is disabled due to the feature not + /// being enabled. + /// + /// We use disabled here to match other background task status structs. + pub disabled: bool, + /// Number of multicast groups transitioned from "Creating" to "Active" state. + pub groups_created: usize, + /// Number of multicast groups cleaned up (fully removed after "Deleting"). + pub groups_deleted: usize, + /// Number of active multicast groups verified on dataplane switches. + pub groups_verified: usize, + /// Number of members processed ("Joining"→"Joined", "Left" with + /// time_deleted→hard-deleted cleanup). + pub members_processed: usize, + /// Number of members deleted (Left + time_deleted). + pub members_deleted: usize, + /// Errors that occurred during reconciliation operations. + pub errors: Vec, +} + +impl MulticastGroupReconcilerStatus { + pub fn total_groups_processed(&self) -> usize { + self.groups_created + self.groups_deleted + self.groups_verified + } + + pub fn has_errors(&self) -> bool { + !self.errors.is_empty() + } +} + /// The status of an `instance_reincarnation` background task activation. #[derive(Default, Serialize, Deserialize, Debug)] pub struct InstanceReincarnationStatus { @@ -159,7 +192,7 @@ pub struct InstanceReincarnationStatus { /// UUIDs of instances which changed state before they could be /// reincarnated. pub changed_state: Vec, - /// Any errors that occured while finding instances in need of reincarnation. + /// Any errors that occurred while finding instances in need of reincarnation. pub errors: Vec, /// Errors that occurred while restarting individual instances. pub restart_errors: Vec<(ReincarnatableInstance, String)>, diff --git a/openapi/nexus.json b/openapi/nexus.json index 5800b1c9274..621cb8ccaf5 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -4278,6 +4278,155 @@ } } }, + "/v1/instances/{instance}/multicast-groups": { + "get": { + "tags": [ + "experimental" + ], + "summary": "List multicast groups for instance", + "operationId": "instance_multicast_group_list", + "parameters": [ + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "path", + "name": "instance", + "description": "Name or ID of the instance", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MulticastGroupMemberResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/instances/{instance}/multicast-groups/{multicast_group}": { + "put": { + "tags": [ + "experimental" + ], + "summary": "Join multicast group.", + "description": "This is functionally equivalent to adding the instance via the group's member management endpoint or updating the instance's `multicast_groups` field. All approaches modify the same membership and trigger reconciliation.", + "operationId": "instance_multicast_group_join", + "parameters": [ + { + "in": "path", + "name": "instance", + "description": "Name or ID of the instance", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "path", + "name": "multicast_group", + "description": "Name or ID of the multicast group", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "201": { + "description": "successful creation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MulticastGroupMember" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "tags": [ + "experimental" + ], + "summary": "Leave multicast group.", + "description": "This is functionally equivalent to removing the instance via the group's member management endpoint or updating the instance's `multicast_groups` field. All approaches modify the same membership and trigger reconciliation.", + "operationId": "instance_multicast_group_leave", + "parameters": [ + { + "in": "path", + "name": "instance", + "description": "Name or ID of the instance", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "path", + "name": "multicast_group", + "description": "Name or ID of the multicast group", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "204": { + "description": "successful deletion" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/instances/{instance}/reboot": { "post": { "tags": [ @@ -5879,6 +6028,386 @@ } } }, + "/v1/multicast-groups": { + "get": { + "tags": [ + "experimental" + ], + "summary": "List all multicast groups.", + "operationId": "multicast_group_list", + "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/NameOrIdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MulticastGroupResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + }, + "post": { + "tags": [ + "experimental" + ], + "summary": "Create a multicast group.", + "description": "Multicast groups are fleet-scoped resources that can be joined by instances across projects and silos. A single multicast IP serves all group members regardless of project or silo boundaries.", + "operationId": "multicast_group_create", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MulticastGroupCreate" + } + } + }, + "required": true + }, + "responses": { + "201": { + "description": "successful creation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MulticastGroup" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/multicast-groups/{multicast_group}": { + "get": { + "tags": [ + "experimental" + ], + "summary": "Fetch a multicast group.", + "operationId": "multicast_group_view", + "parameters": [ + { + "in": "path", + "name": "multicast_group", + "description": "Name or ID of the multicast group", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MulticastGroup" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "tags": [ + "experimental" + ], + "summary": "Update a multicast group.", + "operationId": "multicast_group_update", + "parameters": [ + { + "in": "path", + "name": "multicast_group", + "description": "Name or ID of the multicast group", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MulticastGroupUpdate" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MulticastGroup" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "tags": [ + "experimental" + ], + "summary": "Delete a multicast group.", + "operationId": "multicast_group_delete", + "parameters": [ + { + "in": "path", + "name": "multicast_group", + "description": "Name or ID of the multicast group", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "204": { + "description": "successful deletion" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/multicast-groups/{multicast_group}/members": { + "get": { + "tags": [ + "experimental" + ], + "summary": "List members of a multicast group.", + "operationId": "multicast_group_member_list", + "parameters": [ + { + "in": "path", + "name": "multicast_group", + "description": "Name or ID of the multicast group", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/IdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MulticastGroupMemberResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + }, + "post": { + "tags": [ + "experimental" + ], + "summary": "Add instance to a multicast group.", + "description": "Functionally equivalent to updating the instance's `multicast_groups` field. Both approaches modify the same underlying membership and trigger the same reconciliation logic.\n\nSpecify instance by name (requires `?project=`) or UUID.", + "operationId": "multicast_group_member_add", + "parameters": [ + { + "in": "path", + "name": "multicast_group", + "description": "Name or ID of the multicast group", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MulticastGroupMemberAdd" + } + } + }, + "required": true + }, + "responses": { + "201": { + "description": "successful creation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MulticastGroupMember" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/multicast-groups/{multicast_group}/members/{instance}": { + "delete": { + "tags": [ + "experimental" + ], + "summary": "Remove instance from a multicast group.", + "description": "Functionally equivalent to removing the group from the instance's `multicast_groups` field. Both approaches modify the same underlying membership and trigger reconciliation.\n\nSpecify instance by name (requires `?project=`) or UUID.", + "operationId": "multicast_group_member_remove", + "parameters": [ + { + "in": "path", + "name": "instance", + "description": "Name or ID of the instance", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "path", + "name": "multicast_group", + "description": "Name or ID of the multicast group", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "204": { + "description": "successful deletion" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/network-interfaces": { "get": { "tags": [ @@ -9084,12 +9613,51 @@ "5XX": { "$ref": "#/components/responses/Error" } - }, - "x-dropshot-pagination": { - "required": [ - "end_time", - "start_time" - ] + }, + "x-dropshot-pagination": { + "required": [ + "end_time", + "start_time" + ] + } + } + }, + "/v1/system/multicast-groups/by-ip/{address}": { + "get": { + "tags": [ + "experimental" + ], + "summary": "Look up multicast group by IP address.", + "operationId": "lookup_multicast_group_by_ip", + "parameters": [ + { + "in": "path", + "name": "address", + "description": "IP address of the multicast group", + "required": true, + "schema": { + "type": "string", + "format": "ip" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MulticastGroup" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } } } }, @@ -20552,6 +21120,14 @@ } ] }, + "multicast_groups": { + "description": "The multicast groups this instance should join.\n\nThe instance will be automatically added as a member of the specified multicast groups during creation, enabling it to send and receive multicast traffic for those groups.", + "default": [], + "type": "array", + "items": { + "$ref": "#/components/schemas/NameOrId" + } + }, "name": { "$ref": "#/components/schemas/Name" }, @@ -21075,6 +21651,15 @@ } ] }, + "multicast_groups": { + "nullable": true, + "description": "Multicast groups this instance should join.\n\nWhen specified, this replaces the instance's current multicast group membership with the new set of groups. The instance will leave any groups not listed here and join any new groups that are specified.\n\nIf not provided (None), the instance's multicast group membership will not be changed.", + "default": null, + "type": "array", + "items": { + "$ref": "#/components/schemas/NameOrId" + } + }, "ncpus": { "description": "The number of vCPUs to be allocated to the instance", "allOf": [ @@ -22219,7 +22804,7 @@ "minimum": 0 }, "rack_id": { - "description": "The containing the switch this loopback address will be configured on.", + "description": "The rack containing the switch this loopback address will be configured on.", "type": "string", "format": "uuid" }, @@ -22375,6 +22960,279 @@ "datum_type" ] }, + "MulticastGroup": { + "description": "View of a Multicast Group", + "type": "object", + "properties": { + "description": { + "description": "human-readable free-form text about a resource", + "type": "string" + }, + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" + }, + "ip_pool_id": { + "description": "The ID of the IP pool this resource belongs to.", + "type": "string", + "format": "uuid" + }, + "multicast_ip": { + "description": "The multicast IP address held by this resource.", + "type": "string", + "format": "ip" + }, + "mvlan": { + "nullable": true, + "description": "Multicast VLAN (MVLAN) for egress multicast traffic to upstream networks. None means no VLAN tagging on egress.", + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "name": { + "description": "unique, mutable, user-controlled identifier for each resource", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "source_ips": { + "description": "Source IP addresses for Source-Specific Multicast (SSM). Empty array means any source is allowed.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "state": { + "description": "Current state of the multicast group.", + "type": "string" + }, + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "description", + "id", + "ip_pool_id", + "multicast_ip", + "name", + "source_ips", + "state", + "time_created", + "time_modified" + ] + }, + "MulticastGroupCreate": { + "description": "Create-time parameters for a multicast group.", + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "multicast_ip": { + "nullable": true, + "description": "The multicast IP address to allocate. If None, one will be allocated from the default pool.", + "default": null, + "type": "string", + "format": "ip" + }, + "mvlan": { + "nullable": true, + "description": "Multicast VLAN (MVLAN) for egress multicast traffic to upstream networks. Tags packets leaving the rack to traverse VLAN-segmented upstream networks.\n\nValid range: 2-4094 (VLAN IDs 0-1 are reserved by IEEE 802.1Q standard).", + "default": null, + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "name": { + "$ref": "#/components/schemas/Name" + }, + "pool": { + "nullable": true, + "description": "Name or ID of the IP pool to allocate from. If None, uses the default multicast pool.", + "default": null, + "allOf": [ + { + "$ref": "#/components/schemas/NameOrId" + } + ] + }, + "source_ips": { + "nullable": true, + "description": "Source IP addresses for Source-Specific Multicast (SSM).\n\nNone uses default behavior (Any-Source Multicast). Empty list explicitly allows any source (Any-Source Multicast). Non-empty list restricts to specific sources (SSM).", + "default": null, + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + } + }, + "required": [ + "description", + "name" + ] + }, + "MulticastGroupMember": { + "description": "View of a Multicast Group Member (instance belonging to a multicast group)", + "type": "object", + "properties": { + "description": { + "description": "human-readable free-form text about a resource", + "type": "string" + }, + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" + }, + "instance_id": { + "description": "The ID of the instance that is a member of this group.", + "type": "string", + "format": "uuid" + }, + "multicast_group_id": { + "description": "The ID of the multicast group this member belongs to.", + "type": "string", + "format": "uuid" + }, + "name": { + "description": "unique, mutable, user-controlled identifier for each resource", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "state": { + "description": "Current state of the multicast group membership.", + "type": "string" + }, + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "description", + "id", + "instance_id", + "multicast_group_id", + "name", + "state", + "time_created", + "time_modified" + ] + }, + "MulticastGroupMemberAdd": { + "description": "Parameters for adding an instance to a multicast group.", + "type": "object", + "properties": { + "instance": { + "description": "Name or ID of the instance to add to the multicast group", + "allOf": [ + { + "$ref": "#/components/schemas/NameOrId" + } + ] + } + }, + "required": [ + "instance" + ] + }, + "MulticastGroupMemberResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/MulticastGroupMember" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, + "MulticastGroupResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/MulticastGroup" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, + "MulticastGroupUpdate": { + "description": "Update-time parameters for a multicast group.", + "type": "object", + "properties": { + "description": { + "nullable": true, + "type": "string" + }, + "mvlan": { + "nullable": true, + "description": "Multicast VLAN (MVLAN) for egress multicast traffic to upstream networks. Set to null to clear the MVLAN. Valid range: 2-4094 when provided. Omit the field to leave mvlan unchanged.", + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "name": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "source_ips": { + "nullable": true, + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + } + } + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", @@ -28209,6 +29067,13 @@ "url": "http://docs.oxide.computer/api/metrics" } }, + { + "name": "multicast-groups", + "description": "Multicast groups provide efficient one-to-many network communication.", + "externalDocs": { + "url": "http://docs.oxide.computer/api/multicast-groups" + } + }, { "name": "policy", "description": "System-wide IAM policy", diff --git a/openapi/sled-agent/sled-agent-7.0.0-62acb3.json b/openapi/sled-agent/sled-agent-7.0.0-62acb3.json new file mode 100644 index 00000000000..ddb7f61f616 --- /dev/null +++ b/openapi/sled-agent/sled-agent-7.0.0-62acb3.json @@ -0,0 +1,8734 @@ +{ + "openapi": "3.0.3", + "info": { + "title": "Oxide Sled Agent API", + "description": "API for interacting with individual sleds", + "contact": { + "url": "https://oxide.computer", + "email": "api@oxide.computer" + }, + "version": "7.0.0" + }, + "paths": { + "/artifacts": { + "get": { + "operationId": "artifact_list", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ArtifactListResponse" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/artifacts/{sha256}": { + "put": { + "operationId": "artifact_put", + "parameters": [ + { + "in": "path", + "name": "sha256", + "required": true, + "schema": { + "type": "string", + "format": "hex string (32 bytes)" + } + }, + { + "in": "query", + "name": "generation", + "required": true, + "schema": { + "$ref": "#/components/schemas/Generation" + } + } + ], + "requestBody": { + "content": { + "application/octet-stream": { + "schema": { + "type": "string", + "format": "binary" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ArtifactPutResponse" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/artifacts/{sha256}/copy-from-depot": { + "post": { + "operationId": "artifact_copy_from_depot", + "parameters": [ + { + "in": "path", + "name": "sha256", + "required": true, + "schema": { + "type": "string", + "format": "hex string (32 bytes)" + } + }, + { + "in": "query", + "name": "generation", + "required": true, + "schema": { + "$ref": "#/components/schemas/Generation" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ArtifactCopyFromDepotBody" + } + } + }, + "required": true + }, + "responses": { + "202": { + "description": "successfully enqueued operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ArtifactCopyFromDepotResponse" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/artifacts-config": { + "get": { + "operationId": "artifact_config_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ArtifactConfig" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "operationId": "artifact_config_put", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ArtifactConfig" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/bootstore/status": { + "get": { + "summary": "Get the internal state of the local bootstore node", + "operationId": "bootstore_status", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BootstoreStatus" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/debug/switch-zone-policy": { + "get": { + "summary": "A debugging endpoint only used by `omdb` that allows us to test", + "description": "restarting the switch zone without restarting sled-agent. See for context.", + "operationId": "debug_operator_switch_zone_policy_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OperatorSwitchZonePolicy" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "summary": "A debugging endpoint only used by `omdb` that allows us to test", + "description": "restarting the switch zone without restarting sled-agent. See for context.\n\nSetting the switch zone policy is asynchronous and inherently racy with the standard process of starting the switch zone. If the switch zone is in the process of being started or stopped when this policy is changed, the new policy may not take effect until that transition completes.", + "operationId": "debug_operator_switch_zone_policy_put", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OperatorSwitchZonePolicy" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/disks/{disk_id}": { + "put": { + "operationId": "disk_put", + "parameters": [ + { + "in": "path", + "name": "disk_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DiskEnsureBody" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DiskRuntimeState" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/eip-gateways": { + "put": { + "summary": "Update per-NIC IP address <-> internet gateway mappings.", + "operationId": "set_eip_gateways", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ExternalIpGatewayMap" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/inventory": { + "get": { + "summary": "Fetch basic information about this sled", + "operationId": "inventory", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Inventory" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/network-bootstore-config": { + "get": { + "summary": "This API endpoint is only reading the local sled agent's view of the", + "description": "bootstore. The boostore is a distributed data store that is eventually consistent. Reads from individual nodes may not represent the latest state.", + "operationId": "read_network_bootstore_config_cache", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EarlyNetworkConfig" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "operationId": "write_network_bootstore_config", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EarlyNetworkConfig" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/omicron-config": { + "put": { + "operationId": "omicron_config_put", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OmicronSledConfig" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/probes": { + "put": { + "summary": "Update the entire set of probe zones on this sled.", + "description": "Probe zones are used to debug networking configuration. They look similar to instances, in that they have an OPTE port on a VPC subnet and external addresses, but no actual VM.", + "operationId": "probes_put", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProbeSet" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/sled-identifiers": { + "get": { + "summary": "Fetch sled identifiers", + "operationId": "sled_identifiers", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledIdentifiers" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/sled-role": { + "get": { + "operationId": "sled_role_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledRole" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/sleds": { + "put": { + "summary": "Add a sled to a rack that was already initialized via RSS", + "operationId": "sled_add", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AddSledRequest" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support/dladm-info": { + "get": { + "operationId": "support_dladm_info", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_SledDiagnosticsQueryOutput", + "type": "array", + "items": { + "$ref": "#/components/schemas/SledDiagnosticsQueryOutput" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support/health-check": { + "get": { + "operationId": "support_health_check", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_SledDiagnosticsQueryOutput", + "type": "array", + "items": { + "$ref": "#/components/schemas/SledDiagnosticsQueryOutput" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support/ipadm-info": { + "get": { + "operationId": "support_ipadm_info", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_SledDiagnosticsQueryOutput", + "type": "array", + "items": { + "$ref": "#/components/schemas/SledDiagnosticsQueryOutput" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support/logs/download/{zone}": { + "get": { + "summary": "This endpoint returns a zip file of a zone's logs organized by service.", + "operationId": "support_logs_download", + "parameters": [ + { + "in": "path", + "name": "zone", + "description": "The zone for which one would like to collect logs for", + "required": true, + "schema": { + "type": "string" + } + }, + { + "in": "query", + "name": "max_rotated", + "description": "The max number of rotated logs to include in the final support bundle", + "required": true, + "schema": { + "type": "integer", + "format": "uint", + "minimum": 0 + } + } + ], + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } + } + } + } + }, + "/support/logs/zones": { + "get": { + "summary": "This endpoint returns a list of known zones on a sled that have service", + "description": "logs that can be collected into a support bundle.", + "operationId": "support_logs", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_String", + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support/nvmeadm-info": { + "get": { + "operationId": "support_nvmeadm_info", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledDiagnosticsQueryOutput" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support/pargs-info": { + "get": { + "operationId": "support_pargs_info", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_SledDiagnosticsQueryOutput", + "type": "array", + "items": { + "$ref": "#/components/schemas/SledDiagnosticsQueryOutput" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support/pfiles-info": { + "get": { + "operationId": "support_pfiles_info", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_SledDiagnosticsQueryOutput", + "type": "array", + "items": { + "$ref": "#/components/schemas/SledDiagnosticsQueryOutput" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support/pstack-info": { + "get": { + "operationId": "support_pstack_info", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_SledDiagnosticsQueryOutput", + "type": "array", + "items": { + "$ref": "#/components/schemas/SledDiagnosticsQueryOutput" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support/zfs-info": { + "get": { + "operationId": "support_zfs_info", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledDiagnosticsQueryOutput" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support/zoneadm-info": { + "get": { + "operationId": "support_zoneadm_info", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledDiagnosticsQueryOutput" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support/zpool-info": { + "get": { + "operationId": "support_zpool_info", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledDiagnosticsQueryOutput" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support-bundles/{zpool_id}/{dataset_id}": { + "get": { + "summary": "List all support bundles within a particular dataset", + "operationId": "support_bundle_list", + "parameters": [ + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/DatasetUuid" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/ZpoolUuid" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_SupportBundleMetadata", + "type": "array", + "items": { + "$ref": "#/components/schemas/SupportBundleMetadata" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}": { + "post": { + "summary": "Starts creation of a support bundle within a particular dataset", + "description": "Callers should transfer chunks of the bundle with \"support_bundle_transfer\", and then call \"support_bundle_finalize\" once the bundle has finished transferring.\n\nIf a support bundle was previously created without being finalized successfully, this endpoint will reset the state.\n\nIf a support bundle was previously created and finalized successfully, this endpoint will return metadata indicating that it already exists.", + "operationId": "support_bundle_start_creation", + "parameters": [ + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/DatasetUuid" + } + }, + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/SupportBundleUuid" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/ZpoolUuid" + } + } + ], + "responses": { + "201": { + "description": "successful creation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SupportBundleMetadata" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "summary": "Delete a support bundle from a particular dataset", + "operationId": "support_bundle_delete", + "parameters": [ + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/DatasetUuid" + } + }, + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/SupportBundleUuid" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/ZpoolUuid" + } + } + ], + "responses": { + "204": { + "description": "successful deletion" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/download": { + "get": { + "summary": "Fetch a support bundle from a particular dataset", + "operationId": "support_bundle_download", + "parameters": [ + { + "in": "header", + "name": "range", + "description": "A request to access a portion of the resource, such as `bytes=0-499`\n\nSee: ", + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/DatasetUuid" + } + }, + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/SupportBundleUuid" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/ZpoolUuid" + } + } + ], + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } + } + } + }, + "head": { + "summary": "Fetch metadata about a support bundle from a particular dataset", + "operationId": "support_bundle_head", + "parameters": [ + { + "in": "header", + "name": "range", + "description": "A request to access a portion of the resource, such as `bytes=0-499`\n\nSee: ", + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/DatasetUuid" + } + }, + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/SupportBundleUuid" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/ZpoolUuid" + } + } + ], + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } + } + } + } + }, + "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/download/{file}": { + "get": { + "summary": "Fetch a file within a support bundle from a particular dataset", + "operationId": "support_bundle_download_file", + "parameters": [ + { + "in": "header", + "name": "range", + "description": "A request to access a portion of the resource, such as `bytes=0-499`\n\nSee: ", + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/DatasetUuid" + } + }, + { + "in": "path", + "name": "file", + "description": "The path of the file within the support bundle to query", + "required": true, + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/SupportBundleUuid" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/ZpoolUuid" + } + } + ], + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } + } + } + }, + "head": { + "summary": "Fetch metadata about a file within a support bundle from a particular dataset", + "operationId": "support_bundle_head_file", + "parameters": [ + { + "in": "header", + "name": "range", + "description": "A request to access a portion of the resource, such as `bytes=0-499`\n\nSee: ", + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/DatasetUuid" + } + }, + { + "in": "path", + "name": "file", + "description": "The path of the file within the support bundle to query", + "required": true, + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/SupportBundleUuid" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/ZpoolUuid" + } + } + ], + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } + } + } + } + }, + "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/finalize": { + "post": { + "summary": "Finalizes the creation of a support bundle", + "description": "If the requested hash matched the bundle, the bundle is created. Otherwise, an error is returned.", + "operationId": "support_bundle_finalize", + "parameters": [ + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/DatasetUuid" + } + }, + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/SupportBundleUuid" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/ZpoolUuid" + } + }, + { + "in": "query", + "name": "hash", + "required": true, + "schema": { + "type": "string", + "format": "hex string (32 bytes)" + } + } + ], + "responses": { + "201": { + "description": "successful creation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SupportBundleMetadata" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/index": { + "get": { + "summary": "Fetch the index (list of files within a support bundle)", + "operationId": "support_bundle_index", + "parameters": [ + { + "in": "header", + "name": "range", + "description": "A request to access a portion of the resource, such as `bytes=0-499`\n\nSee: ", + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/DatasetUuid" + } + }, + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/SupportBundleUuid" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/ZpoolUuid" + } + } + ], + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } + } + } + }, + "head": { + "summary": "Fetch metadata about the list of files within a support bundle", + "operationId": "support_bundle_head_index", + "parameters": [ + { + "in": "header", + "name": "range", + "description": "A request to access a portion of the resource, such as `bytes=0-499`\n\nSee: ", + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/DatasetUuid" + } + }, + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/SupportBundleUuid" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/ZpoolUuid" + } + } + ], + "responses": { + "default": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } + } + } + } + }, + "/support-bundles/{zpool_id}/{dataset_id}/{support_bundle_id}/transfer": { + "put": { + "summary": "Transfers a chunk of a support bundle within a particular dataset", + "operationId": "support_bundle_transfer", + "parameters": [ + { + "in": "path", + "name": "dataset_id", + "description": "The dataset on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/DatasetUuid" + } + }, + { + "in": "path", + "name": "support_bundle_id", + "description": "The ID of the support bundle itself", + "required": true, + "schema": { + "$ref": "#/components/schemas/SupportBundleUuid" + } + }, + { + "in": "path", + "name": "zpool_id", + "description": "The zpool on which this support bundle was provisioned", + "required": true, + "schema": { + "$ref": "#/components/schemas/ZpoolUuid" + } + }, + { + "in": "query", + "name": "offset", + "required": true, + "schema": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + } + ], + "requestBody": { + "content": { + "application/octet-stream": { + "schema": { + "type": "string", + "format": "binary" + } + } + }, + "required": true + }, + "responses": { + "201": { + "description": "successful creation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SupportBundleMetadata" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/switch-ports": { + "post": { + "operationId": "uplink_ensure", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SwitchPorts" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v2p": { + "get": { + "summary": "List v2p mappings present on sled", + "operationId": "list_v2p", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_VirtualNetworkInterfaceHost", + "type": "array", + "items": { + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "summary": "Create a mapping from a virtual NIC to a physical host", + "operationId": "set_v2p", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "summary": "Delete a mapping from a virtual NIC to a physical host", + "operationId": "del_v2p", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/vmms/{propolis_id}": { + "put": { + "operationId": "vmm_register", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/PropolisUuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceEnsureBody" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledVmmState" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "operationId": "vmm_unregister", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/PropolisUuid" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VmmUnregisterResponse" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/vmms/{propolis_id}/disks/{disk_id}/snapshot": { + "post": { + "summary": "Take a snapshot of a disk that is attached to an instance", + "operationId": "vmm_issue_disk_snapshot_request", + "parameters": [ + { + "in": "path", + "name": "disk_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + }, + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/PropolisUuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VmmIssueDiskSnapshotRequestBody" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VmmIssueDiskSnapshotRequestResponse" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/vmms/{propolis_id}/external-ip": { + "put": { + "operationId": "vmm_put_external_ip", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/PropolisUuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceExternalIpBody" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "operationId": "vmm_delete_external_ip", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/PropolisUuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceExternalIpBody" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/vmms/{propolis_id}/multicast-group": { + "put": { + "operationId": "vmm_join_multicast_group", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/PropolisUuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceMulticastBody" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "operationId": "vmm_leave_multicast_group", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/PropolisUuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceMulticastBody" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/vmms/{propolis_id}/state": { + "get": { + "operationId": "vmm_get_state", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/PropolisUuid" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledVmmState" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "operationId": "vmm_put_state", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/PropolisUuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VmmPutStateBody" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VmmPutStateResponse" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/vpc/{vpc_id}/firewall/rules": { + "put": { + "operationId": "vpc_firewall_rules_put", + "parameters": [ + { + "in": "path", + "name": "vpc_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VpcFirewallRulesEnsureBody" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/vpc-routes": { + "get": { + "summary": "Get the current versions of VPC routing rules.", + "operationId": "list_vpc_routes", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_ResolvedVpcRouteState", + "type": "array", + "items": { + "$ref": "#/components/schemas/ResolvedVpcRouteState" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "summary": "Update VPC routing rules.", + "operationId": "set_vpc_routes", + "requestBody": { + "content": { + "application/json": { + "schema": { + "title": "Array_of_ResolvedVpcRouteSet", + "type": "array", + "items": { + "$ref": "#/components/schemas/ResolvedVpcRouteSet" + } + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/zones": { + "get": { + "summary": "List the zones that are currently managed by the sled agent.", + "operationId": "zones_list", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_String", + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/zones/bundle-cleanup": { + "post": { + "summary": "Trigger a zone bundle cleanup.", + "operationId": "zone_bundle_cleanup", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Map_of_CleanupCount", + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/CleanupCount" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/zones/bundle-cleanup/context": { + "get": { + "summary": "Return context used by the zone-bundle cleanup task.", + "operationId": "zone_bundle_cleanup_context", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CleanupContext" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "summary": "Update context used by the zone-bundle cleanup task.", + "operationId": "zone_bundle_cleanup_context_update", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CleanupContextUpdate" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/zones/bundle-cleanup/utilization": { + "get": { + "summary": "Return utilization information about all zone bundles.", + "operationId": "zone_bundle_utilization", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Map_of_BundleUtilization", + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/BundleUtilization" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/zones/bundles": { + "get": { + "summary": "List all zone bundles that exist, even for now-deleted zones.", + "operationId": "zone_bundle_list_all", + "parameters": [ + { + "in": "query", + "name": "filter", + "description": "An optional substring used to filter zone bundles.", + "schema": { + "nullable": true, + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_ZoneBundleMetadata", + "type": "array", + "items": { + "$ref": "#/components/schemas/ZoneBundleMetadata" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/zones/bundles/{zone_name}": { + "get": { + "summary": "List the zone bundles that are available for a running zone.", + "operationId": "zone_bundle_list", + "parameters": [ + { + "in": "path", + "name": "zone_name", + "description": "The name of the zone.", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_ZoneBundleMetadata", + "type": "array", + "items": { + "$ref": "#/components/schemas/ZoneBundleMetadata" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/zones/bundles/{zone_name}/{bundle_id}": { + "get": { + "summary": "Fetch the binary content of a single zone bundle.", + "operationId": "zone_bundle_get", + "parameters": [ + { + "in": "path", + "name": "bundle_id", + "description": "The ID for this bundle itself.", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + }, + { + "in": "path", + "name": "zone_name", + "description": "The name of the zone this bundle is derived from.", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "summary": "Delete a zone bundle.", + "operationId": "zone_bundle_delete", + "parameters": [ + { + "in": "path", + "name": "bundle_id", + "description": "The ID for this bundle itself.", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + }, + { + "in": "path", + "name": "zone_name", + "description": "The name of the zone this bundle is derived from.", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "204": { + "description": "successful deletion" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + } + }, + "components": { + "schemas": { + "AddSledRequest": { + "description": "A request to Add a given sled after rack initialization has occurred", + "type": "object", + "properties": { + "sled_id": { + "$ref": "#/components/schemas/BaseboardId" + }, + "start_request": { + "$ref": "#/components/schemas/StartSledAgentRequest" + } + }, + "required": [ + "sled_id", + "start_request" + ] + }, + "ArtifactConfig": { + "type": "object", + "properties": { + "artifacts": { + "type": "array", + "items": { + "type": "string", + "format": "hex string (32 bytes)" + }, + "uniqueItems": true + }, + "generation": { + "$ref": "#/components/schemas/Generation" + } + }, + "required": [ + "artifacts", + "generation" + ] + }, + "ArtifactCopyFromDepotBody": { + "type": "object", + "properties": { + "depot_base_url": { + "type": "string" + } + }, + "required": [ + "depot_base_url" + ] + }, + "ArtifactCopyFromDepotResponse": { + "type": "object" + }, + "ArtifactListResponse": { + "type": "object", + "properties": { + "generation": { + "$ref": "#/components/schemas/Generation" + }, + "list": { + "type": "object", + "additionalProperties": { + "type": "integer", + "format": "uint", + "minimum": 0 + } + } + }, + "required": [ + "generation", + "list" + ] + }, + "ArtifactPutResponse": { + "type": "object", + "properties": { + "datasets": { + "description": "The number of valid M.2 artifact datasets we found on the sled. There is typically one of these datasets for each functional M.2.", + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "successful_writes": { + "description": "The number of valid writes to the M.2 artifact datasets. This should be less than or equal to the number of artifact datasets.", + "type": "integer", + "format": "uint", + "minimum": 0 + } + }, + "required": [ + "datasets", + "successful_writes" + ] + }, + "Baseboard": { + "description": "Describes properties that should uniquely identify a Gimlet.", + "oneOf": [ + { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "model": { + "type": "string" + }, + "revision": { + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "type": { + "type": "string", + "enum": [ + "gimlet" + ] + } + }, + "required": [ + "identifier", + "model", + "revision", + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "unknown" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "model": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "pc" + ] + } + }, + "required": [ + "identifier", + "model", + "type" + ] + } + ] + }, + "BaseboardId": { + "description": "A representation of a Baseboard ID as used in the inventory subsystem This type is essentially the same as a `Baseboard` except it doesn't have a revision or HW type (Gimlet, PC, Unknown).", + "type": "object", + "properties": { + "part_number": { + "description": "Oxide Part Number", + "type": "string" + }, + "serial_number": { + "description": "Serial number (unique for a given part number)", + "type": "string" + } + }, + "required": [ + "part_number", + "serial_number" + ] + }, + "BfdMode": { + "description": "BFD connection mode.", + "type": "string", + "enum": [ + "single_hop", + "multi_hop" + ] + }, + "BfdPeerConfig": { + "type": "object", + "properties": { + "detection_threshold": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "local": { + "nullable": true, + "type": "string", + "format": "ip" + }, + "mode": { + "$ref": "#/components/schemas/BfdMode" + }, + "remote": { + "type": "string", + "format": "ip" + }, + "required_rx": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "switch": { + "$ref": "#/components/schemas/SwitchLocation" + } + }, + "required": [ + "detection_threshold", + "mode", + "remote", + "required_rx", + "switch" + ] + }, + "BgpConfig": { + "type": "object", + "properties": { + "asn": { + "description": "The autonomous system number for the BGP configuration.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "checker": { + "nullable": true, + "description": "Checker to apply to incoming messages.", + "default": null, + "type": "string" + }, + "originate": { + "description": "The set of prefixes for the BGP router to originate.", + "type": "array", + "items": { + "$ref": "#/components/schemas/Ipv4Net" + } + }, + "shaper": { + "nullable": true, + "description": "Shaper to apply to outgoing messages.", + "default": null, + "type": "string" + } + }, + "required": [ + "asn", + "originate" + ] + }, + "BgpPeerConfig": { + "type": "object", + "properties": { + "addr": { + "description": "Address of the peer.", + "type": "string", + "format": "ipv4" + }, + "allowed_export": { + "description": "Define export policy for a peer.", + "default": { + "type": "no_filtering" + }, + "allOf": [ + { + "$ref": "#/components/schemas/ImportExportPolicy" + } + ] + }, + "allowed_import": { + "description": "Define import policy for a peer.", + "default": { + "type": "no_filtering" + }, + "allOf": [ + { + "$ref": "#/components/schemas/ImportExportPolicy" + } + ] + }, + "asn": { + "description": "The autonomous system number of the router the peer belongs to.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "communities": { + "description": "Include the provided communities in updates sent to the peer.", + "default": [], + "type": "array", + "items": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + "connect_retry": { + "nullable": true, + "description": "The interval in seconds between peer connection retry attempts.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "delay_open": { + "nullable": true, + "description": "How long to delay sending open messages to a peer. In seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "enforce_first_as": { + "description": "Enforce that the first AS in paths received from this peer is the peer's AS.", + "default": false, + "type": "boolean" + }, + "hold_time": { + "nullable": true, + "description": "How long to keep a session alive without a keepalive in seconds. Defaults to 6.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "idle_hold_time": { + "nullable": true, + "description": "How long to keep a peer in idle after a state machine reset in seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "keepalive": { + "nullable": true, + "description": "The interval to send keepalive messages at.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "local_pref": { + "nullable": true, + "description": "Apply a local preference to routes received from this peer.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "md5_auth_key": { + "nullable": true, + "description": "Use the given key for TCP-MD5 authentication with the peer.", + "default": null, + "type": "string" + }, + "min_ttl": { + "nullable": true, + "description": "Require messages from a peer have a minimum IP time to live field.", + "default": null, + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "multi_exit_discriminator": { + "nullable": true, + "description": "Apply the provided multi-exit discriminator (MED) updates sent to the peer.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "port": { + "description": "Switch port the peer is reachable on.", + "type": "string" + }, + "remote_asn": { + "nullable": true, + "description": "Require that a peer has a specified ASN.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "vlan_id": { + "nullable": true, + "description": "Associate a VLAN ID with a BGP peer session.", + "default": null, + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "addr", + "asn", + "port" + ] + }, + "BlobStorageBackend": { + "description": "A storage backend for a disk whose initial contents are given explicitly by the specification.", + "type": "object", + "properties": { + "base64": { + "description": "The disk's initial contents, encoded as a base64 string.", + "type": "string" + }, + "readonly": { + "description": "Indicates whether the storage is read-only.", + "type": "boolean" + } + }, + "required": [ + "base64", + "readonly" + ], + "additionalProperties": false + }, + "Board": { + "description": "A VM's mainboard.", + "type": "object", + "properties": { + "chipset": { + "description": "The chipset to expose to guest software.", + "allOf": [ + { + "$ref": "#/components/schemas/Chipset" + } + ] + }, + "cpuid": { + "nullable": true, + "description": "The CPUID values to expose to the guest. If `None`, bhyve will derive default values from the host's CPUID values.", + "allOf": [ + { + "$ref": "#/components/schemas/Cpuid" + } + ] + }, + "cpus": { + "description": "The number of virtual logical processors attached to this VM.", + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "guest_hv_interface": { + "description": "The hypervisor platform to expose to the guest. The default is a bhyve-compatible interface with no additional features.\n\nFor compatibility with older versions of Propolis, this field is only serialized if it specifies a non-default interface.", + "allOf": [ + { + "$ref": "#/components/schemas/GuestHypervisorInterface" + } + ] + }, + "memory_mb": { + "description": "The amount of guest RAM attached to this VM.", + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "chipset", + "cpus", + "memory_mb" + ], + "additionalProperties": false + }, + "BootImageHeader": { + "type": "object", + "properties": { + "data_size": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "flags": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "image_name": { + "type": "string" + }, + "image_size": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "sha256": { + "type": "array", + "items": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "minItems": 32, + "maxItems": 32 + }, + "target_size": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "data_size", + "flags", + "image_name", + "image_size", + "sha256", + "target_size" + ] + }, + "BootOrderEntry": { + "description": "An entry in the boot order stored in a [`BootSettings`] component.", + "type": "object", + "properties": { + "id": { + "description": "The ID of another component in the spec that Propolis should try to boot from.\n\nCurrently, only disk device components are supported.", + "allOf": [ + { + "$ref": "#/components/schemas/SpecKey" + } + ] + } + }, + "required": [ + "id" + ] + }, + "BootPartitionContents": { + "type": "object", + "properties": { + "boot_disk": { + "x-rust-type": { + "crate": "std", + "parameters": [ + { + "$ref": "#/components/schemas/M2Slot" + }, + { + "type": "string" + } + ], + "path": "::std::result::Result", + "version": "*" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "ok": { + "$ref": "#/components/schemas/M2Slot" + } + }, + "required": [ + "ok" + ] + }, + { + "type": "object", + "properties": { + "err": { + "type": "string" + } + }, + "required": [ + "err" + ] + } + ] + }, + "slot_a": { + "x-rust-type": { + "crate": "std", + "parameters": [ + { + "$ref": "#/components/schemas/BootPartitionDetails" + }, + { + "type": "string" + } + ], + "path": "::std::result::Result", + "version": "*" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "ok": { + "$ref": "#/components/schemas/BootPartitionDetails" + } + }, + "required": [ + "ok" + ] + }, + { + "type": "object", + "properties": { + "err": { + "type": "string" + } + }, + "required": [ + "err" + ] + } + ] + }, + "slot_b": { + "x-rust-type": { + "crate": "std", + "parameters": [ + { + "$ref": "#/components/schemas/BootPartitionDetails" + }, + { + "type": "string" + } + ], + "path": "::std::result::Result", + "version": "*" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "ok": { + "$ref": "#/components/schemas/BootPartitionDetails" + } + }, + "required": [ + "ok" + ] + }, + { + "type": "object", + "properties": { + "err": { + "type": "string" + } + }, + "required": [ + "err" + ] + } + ] + } + }, + "required": [ + "boot_disk", + "slot_a", + "slot_b" + ] + }, + "BootPartitionDetails": { + "type": "object", + "properties": { + "artifact_hash": { + "type": "string", + "format": "hex string (32 bytes)" + }, + "artifact_size": { + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "header": { + "$ref": "#/components/schemas/BootImageHeader" + } + }, + "required": [ + "artifact_hash", + "artifact_size", + "header" + ] + }, + "BootSettings": { + "description": "Settings supplied to the guest's firmware image that specify the order in which it should consider its options when selecting a device to try to boot from.", + "type": "object", + "properties": { + "order": { + "description": "An ordered list of components to attempt to boot from.", + "type": "array", + "items": { + "$ref": "#/components/schemas/BootOrderEntry" + } + } + }, + "required": [ + "order" + ], + "additionalProperties": false + }, + "BootstoreStatus": { + "type": "object", + "properties": { + "accepted_connections": { + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true + }, + "established_connections": { + "type": "array", + "items": { + "$ref": "#/components/schemas/EstablishedConnection" + } + }, + "fsm_ledger_generation": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "fsm_state": { + "type": "string" + }, + "negotiating_connections": { + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true + }, + "network_config_ledger_generation": { + "nullable": true, + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "peers": { + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true + } + }, + "required": [ + "accepted_connections", + "established_connections", + "fsm_ledger_generation", + "fsm_state", + "negotiating_connections", + "peers" + ] + }, + "BundleUtilization": { + "description": "The portion of a debug dataset used for zone bundles.", + "type": "object", + "properties": { + "bytes_available": { + "description": "The total number of bytes available for zone bundles.\n\nThis is `dataset_quota` multiplied by the context's storage limit.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "bytes_used": { + "description": "Total bundle usage, in bytes.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "dataset_quota": { + "description": "The total dataset quota, in bytes.", + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "bytes_available", + "bytes_used", + "dataset_quota" + ] + }, + "ByteCount": { + "description": "Byte count to express memory or storage capacity.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "Chipset": { + "description": "A kind of virtual chipset.", + "oneOf": [ + { + "description": "An Intel 440FX-compatible chipset.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "i440_fx" + ] + }, + "value": { + "$ref": "#/components/schemas/I440Fx" + } + }, + "required": [ + "type", + "value" + ], + "additionalProperties": false + } + ] + }, + "CleanupContext": { + "description": "Context provided for the zone bundle cleanup task.", + "type": "object", + "properties": { + "period": { + "description": "The period on which automatic checks and cleanup is performed.", + "allOf": [ + { + "$ref": "#/components/schemas/CleanupPeriod" + } + ] + }, + "priority": { + "description": "The priority ordering for keeping old bundles.", + "allOf": [ + { + "$ref": "#/components/schemas/PriorityOrder" + } + ] + }, + "storage_limit": { + "description": "The limit on the dataset quota available for zone bundles.", + "allOf": [ + { + "$ref": "#/components/schemas/StorageLimit" + } + ] + } + }, + "required": [ + "period", + "priority", + "storage_limit" + ] + }, + "CleanupContextUpdate": { + "description": "Parameters used to update the zone bundle cleanup context.", + "type": "object", + "properties": { + "period": { + "nullable": true, + "description": "The new period on which automatic cleanups are run.", + "allOf": [ + { + "$ref": "#/components/schemas/Duration" + } + ] + }, + "priority": { + "nullable": true, + "description": "The priority ordering for preserving old zone bundles.", + "allOf": [ + { + "$ref": "#/components/schemas/PriorityOrder" + } + ] + }, + "storage_limit": { + "nullable": true, + "description": "The new limit on the underlying dataset quota allowed for bundles.", + "type": "integer", + "format": "uint8", + "minimum": 0 + } + } + }, + "CleanupCount": { + "description": "The count of bundles / bytes removed during a cleanup operation.", + "type": "object", + "properties": { + "bundles": { + "description": "The number of bundles removed.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "bytes": { + "description": "The number of bytes removed.", + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "bundles", + "bytes" + ] + }, + "CleanupPeriod": { + "description": "A period on which bundles are automatically cleaned up.", + "allOf": [ + { + "$ref": "#/components/schemas/Duration" + } + ] + }, + "ComponentV0": { + "oneOf": [ + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/VirtioDisk" + }, + "type": { + "type": "string", + "enum": [ + "virtio_disk" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/NvmeDisk" + }, + "type": { + "type": "string", + "enum": [ + "nvme_disk" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/VirtioNic" + }, + "type": { + "type": "string", + "enum": [ + "virtio_nic" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/SerialPort" + }, + "type": { + "type": "string", + "enum": [ + "serial_port" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/PciPciBridge" + }, + "type": { + "type": "string", + "enum": [ + "pci_pci_bridge" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/QemuPvpanic" + }, + "type": { + "type": "string", + "enum": [ + "qemu_pvpanic" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/BootSettings" + }, + "type": { + "type": "string", + "enum": [ + "boot_settings" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/SoftNpuPciPort" + }, + "type": { + "type": "string", + "enum": [ + "soft_npu_pci_port" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/SoftNpuPort" + }, + "type": { + "type": "string", + "enum": [ + "soft_npu_port" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/SoftNpuP9" + }, + "type": { + "type": "string", + "enum": [ + "soft_npu_p9" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/P9fs" + }, + "type": { + "type": "string", + "enum": [ + "p9fs" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/MigrationFailureInjector" + }, + "type": { + "type": "string", + "enum": [ + "migration_failure_injector" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/CrucibleStorageBackend" + }, + "type": { + "type": "string", + "enum": [ + "crucible_storage_backend" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/FileStorageBackend" + }, + "type": { + "type": "string", + "enum": [ + "file_storage_backend" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/BlobStorageBackend" + }, + "type": { + "type": "string", + "enum": [ + "blob_storage_backend" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/VirtioNetworkBackend" + }, + "type": { + "type": "string", + "enum": [ + "virtio_network_backend" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "component": { + "$ref": "#/components/schemas/DlpiNetworkBackend" + }, + "type": { + "type": "string", + "enum": [ + "dlpi_network_backend" + ] + } + }, + "required": [ + "component", + "type" + ], + "additionalProperties": false + } + ] + }, + "CompressionAlgorithm": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "on" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "off" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "gzip" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "level": { + "$ref": "#/components/schemas/GzipLevel" + }, + "type": { + "type": "string", + "enum": [ + "gzip_n" + ] + } + }, + "required": [ + "level", + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "lz4" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "lzjb" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "zle" + ] + } + }, + "required": [ + "type" + ] + } + ] + }, + "ConfigReconcilerInventory": { + "description": "Describes the last attempt made by the sled-agent-config-reconciler to reconcile the current sled config against the actual state of the sled.", + "type": "object", + "properties": { + "boot_partitions": { + "$ref": "#/components/schemas/BootPartitionContents" + }, + "datasets": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ConfigReconcilerInventoryResult" + } + }, + "external_disks": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ConfigReconcilerInventoryResult" + } + }, + "last_reconciled_config": { + "$ref": "#/components/schemas/OmicronSledConfig" + }, + "orphaned_datasets": { + "title": "IdOrdMap", + "x-rust-type": { + "crate": "iddqd", + "parameters": [ + { + "$ref": "#/components/schemas/OrphanedDataset" + } + ], + "path": "iddqd::IdOrdMap", + "version": "*" + }, + "type": "array", + "items": { + "$ref": "#/components/schemas/OrphanedDataset" + }, + "uniqueItems": true + }, + "remove_mupdate_override": { + "nullable": true, + "description": "The result of removing the mupdate override file on disk.\n\n`None` if `remove_mupdate_override` was not provided in the sled config.", + "allOf": [ + { + "$ref": "#/components/schemas/RemoveMupdateOverrideInventory" + } + ] + }, + "zones": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ConfigReconcilerInventoryResult" + } + } + }, + "required": [ + "boot_partitions", + "datasets", + "external_disks", + "last_reconciled_config", + "orphaned_datasets", + "zones" + ] + }, + "ConfigReconcilerInventoryResult": { + "oneOf": [ + { + "type": "object", + "properties": { + "result": { + "type": "string", + "enum": [ + "ok" + ] + } + }, + "required": [ + "result" + ] + }, + { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "result": { + "type": "string", + "enum": [ + "err" + ] + } + }, + "required": [ + "message", + "result" + ] + } + ] + }, + "ConfigReconcilerInventoryStatus": { + "description": "Status of the sled-agent-config-reconciler task.", + "oneOf": [ + { + "description": "The reconciler task has not yet run for the first time since sled-agent started.", + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": [ + "not_yet_run" + ] + } + }, + "required": [ + "status" + ] + }, + { + "description": "The reconciler task is actively running.", + "type": "object", + "properties": { + "config": { + "$ref": "#/components/schemas/OmicronSledConfig" + }, + "running_for": { + "$ref": "#/components/schemas/Duration" + }, + "started_at": { + "type": "string", + "format": "date-time" + }, + "status": { + "type": "string", + "enum": [ + "running" + ] + } + }, + "required": [ + "config", + "running_for", + "started_at", + "status" + ] + }, + { + "description": "The reconciler task is currently idle, but previously did complete a reconciliation attempt.\n\nThis variant does not include the `OmicronSledConfig` used in the last attempt, because that's always available via [`ConfigReconcilerInventory::last_reconciled_config`].", + "type": "object", + "properties": { + "completed_at": { + "type": "string", + "format": "date-time" + }, + "ran_for": { + "$ref": "#/components/schemas/Duration" + }, + "status": { + "type": "string", + "enum": [ + "idle" + ] + } + }, + "required": [ + "completed_at", + "ran_for", + "status" + ] + } + ] + }, + "Cpuid": { + "description": "A set of CPUID values to expose to a guest.", + "type": "object", + "properties": { + "entries": { + "description": "A list of CPUID leaves/subleaves and their associated values.\n\nPropolis servers require that each entry's `leaf` be unique and that it falls in either the \"standard\" (0 to 0xFFFF) or \"extended\" (0x8000_0000 to 0x8000_FFFF) function ranges, since these are the only valid input ranges currently defined by Intel and AMD. See the Intel 64 and IA-32 Architectures Software Developer's Manual (June 2024) Table 3-17 and the AMD64 Architecture Programmer's Manual (March 2024) Volume 3's documentation of the CPUID instruction.", + "type": "array", + "items": { + "$ref": "#/components/schemas/CpuidEntry" + } + }, + "vendor": { + "description": "The CPU vendor to emulate.\n\nCPUID leaves in the extended range (0x8000_0000 to 0x8000_FFFF) have vendor-defined semantics. Propolis uses this value to determine these semantics when deciding whether it needs to specialize the supplied template values for these leaves.", + "allOf": [ + { + "$ref": "#/components/schemas/CpuidVendor" + } + ] + } + }, + "required": [ + "entries", + "vendor" + ], + "additionalProperties": false + }, + "CpuidEntry": { + "description": "A full description of a CPUID leaf/subleaf and the values it produces.", + "type": "object", + "properties": { + "eax": { + "description": "The value to return in eax.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "ebx": { + "description": "The value to return in ebx.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "ecx": { + "description": "The value to return in ecx.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "edx": { + "description": "The value to return in edx.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "leaf": { + "description": "The leaf (function) number for this entry.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "subleaf": { + "nullable": true, + "description": "The subleaf (index) number for this entry, if it uses subleaves.", + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + "required": [ + "eax", + "ebx", + "ecx", + "edx", + "leaf" + ], + "additionalProperties": false + }, + "CpuidVendor": { + "description": "A CPU vendor to use when interpreting the meanings of CPUID leaves in the extended ID range (0x80000000 to 0x8000FFFF).", + "type": "string", + "enum": [ + "amd", + "intel" + ] + }, + "CrucibleStorageBackend": { + "description": "A Crucible storage backend.", + "type": "object", + "properties": { + "readonly": { + "description": "Indicates whether the storage is read-only.", + "type": "boolean" + }, + "request_json": { + "description": "A serialized `[crucible_client_types::VolumeConstructionRequest]`. This is stored in serialized form so that breaking changes to the definition of a `VolumeConstructionRequest` do not inadvertently break instance spec deserialization.\n\nWhen using a spec to initialize a new instance, the spec author must ensure this request is well-formed and can be deserialized by the version of `crucible_client_types` used by the target Propolis.", + "type": "string" + } + }, + "required": [ + "readonly", + "request_json" + ], + "additionalProperties": false + }, + "DatasetConfig": { + "description": "Configuration information necessary to request a single dataset.\n\nThese datasets are tracked directly by Nexus.", + "type": "object", + "properties": { + "compression": { + "description": "The compression mode to be used by the dataset", + "allOf": [ + { + "$ref": "#/components/schemas/CompressionAlgorithm" + } + ] + }, + "id": { + "description": "The UUID of the dataset being requested", + "allOf": [ + { + "$ref": "#/components/schemas/DatasetUuid" + } + ] + }, + "name": { + "description": "The dataset's name", + "allOf": [ + { + "$ref": "#/components/schemas/DatasetName" + } + ] + }, + "quota": { + "nullable": true, + "description": "The upper bound on the amount of storage used by this dataset", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + }, + "reservation": { + "nullable": true, + "description": "The lower bound on the amount of storage usable by this dataset", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + } + }, + "required": [ + "compression", + "id", + "name" + ] + }, + "DatasetKind": { + "description": "The kind of dataset. See the `DatasetKind` enum in omicron-common for possible values.", + "type": "string" + }, + "DatasetName": { + "type": "object", + "properties": { + "kind": { + "$ref": "#/components/schemas/DatasetKind" + }, + "pool_name": { + "$ref": "#/components/schemas/ZpoolName" + } + }, + "required": [ + "kind", + "pool_name" + ] + }, + "DatasetUuid": { + "x-rust-type": { + "crate": "omicron-uuid-kinds", + "path": "omicron_uuid_kinds::DatasetUuid", + "version": "*" + }, + "type": "string", + "format": "uuid" + }, + "DhcpConfig": { + "description": "DHCP configuration for a port\n\nNot present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we use `InstanceRuntimeState::hostname` for this value.", + "type": "object", + "properties": { + "dns_servers": { + "description": "DNS servers to send to the instance\n\n(DHCPv4 option 6; DHCPv6 option 23)", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "host_domain": { + "nullable": true, + "description": "DNS zone this instance's hostname belongs to (e.g. the `project.example` part of `instance1.project.example`)\n\n(DHCPv4 option 15; used in DHCPv6 option 39)", + "type": "string" + }, + "search_domains": { + "description": "DNS search domains\n\n(DHCPv4 option 119; DHCPv6 option 24)", + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "dns_servers", + "search_domains" + ] + }, + "DiskEnsureBody": { + "description": "Sent from to a sled agent to establish the runtime state of a Disk", + "type": "object", + "properties": { + "initial_runtime": { + "description": "Last runtime state of the Disk known to Nexus (used if the agent has never seen this Disk before).", + "allOf": [ + { + "$ref": "#/components/schemas/DiskRuntimeState" + } + ] + }, + "target": { + "description": "requested runtime state of the Disk", + "allOf": [ + { + "$ref": "#/components/schemas/DiskStateRequested" + } + ] + } + }, + "required": [ + "initial_runtime", + "target" + ] + }, + "DiskIdentity": { + "description": "Uniquely identifies a disk.", + "type": "object", + "properties": { + "model": { + "type": "string" + }, + "serial": { + "type": "string" + }, + "vendor": { + "type": "string" + } + }, + "required": [ + "model", + "serial", + "vendor" + ] + }, + "DiskRuntimeState": { + "description": "Runtime state of the Disk, which includes its attach state and some minimal metadata", + "type": "object", + "properties": { + "disk_state": { + "description": "runtime state of the Disk", + "allOf": [ + { + "$ref": "#/components/schemas/DiskState" + } + ] + }, + "gen": { + "description": "generation number for this state", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + }, + "time_updated": { + "description": "timestamp for this information", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "disk_state", + "gen", + "time_updated" + ] + }, + "DiskState": { + "description": "State of a Disk", + "oneOf": [ + { + "description": "Disk is being initialized", + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "creating" + ] + } + }, + "required": [ + "state" + ] + }, + { + "description": "Disk is ready but detached from any Instance", + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "detached" + ] + } + }, + "required": [ + "state" + ] + }, + { + "description": "Disk is ready to receive blocks from an external source", + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "import_ready" + ] + } + }, + "required": [ + "state" + ] + }, + { + "description": "Disk is importing blocks from a URL", + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "importing_from_url" + ] + } + }, + "required": [ + "state" + ] + }, + { + "description": "Disk is importing blocks from bulk writes", + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "importing_from_bulk_writes" + ] + } + }, + "required": [ + "state" + ] + }, + { + "description": "Disk is being finalized to state Detached", + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "finalizing" + ] + } + }, + "required": [ + "state" + ] + }, + { + "description": "Disk is undergoing maintenance", + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "maintenance" + ] + } + }, + "required": [ + "state" + ] + }, + { + "description": "Disk is being attached to the given Instance", + "type": "object", + "properties": { + "instance": { + "type": "string", + "format": "uuid" + }, + "state": { + "type": "string", + "enum": [ + "attaching" + ] + } + }, + "required": [ + "instance", + "state" + ] + }, + { + "description": "Disk is attached to the given Instance", + "type": "object", + "properties": { + "instance": { + "type": "string", + "format": "uuid" + }, + "state": { + "type": "string", + "enum": [ + "attached" + ] + } + }, + "required": [ + "instance", + "state" + ] + }, + { + "description": "Disk is being detached from the given Instance", + "type": "object", + "properties": { + "instance": { + "type": "string", + "format": "uuid" + }, + "state": { + "type": "string", + "enum": [ + "detaching" + ] + } + }, + "required": [ + "instance", + "state" + ] + }, + { + "description": "Disk has been destroyed", + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "destroyed" + ] + } + }, + "required": [ + "state" + ] + }, + { + "description": "Disk is unavailable", + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "faulted" + ] + } + }, + "required": [ + "state" + ] + } + ] + }, + "DiskStateRequested": { + "description": "Used to request a Disk state change", + "oneOf": [ + { + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "detached" + ] + } + }, + "required": [ + "state" + ] + }, + { + "type": "object", + "properties": { + "instance": { + "type": "string", + "format": "uuid" + }, + "state": { + "type": "string", + "enum": [ + "attached" + ] + } + }, + "required": [ + "instance", + "state" + ] + }, + { + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "destroyed" + ] + } + }, + "required": [ + "state" + ] + }, + { + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "faulted" + ] + } + }, + "required": [ + "state" + ] + } + ] + }, + "DiskVariant": { + "type": "string", + "enum": [ + "U2", + "M2" + ] + }, + "DlpiNetworkBackend": { + "description": "A network backend associated with a DLPI VNIC on the host.", + "type": "object", + "properties": { + "vnic_name": { + "description": "The name of the VNIC to use as a backend.", + "type": "string" + } + }, + "required": [ + "vnic_name" + ], + "additionalProperties": false + }, + "Duration": { + "type": "object", + "properties": { + "nanos": { + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "secs": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "nanos", + "secs" + ] + }, + "EarlyNetworkConfig": { + "description": "Network configuration required to bring up the control plane\n\nThe fields in this structure are those from [`crate::rack_init::RackInitializeRequest`] necessary for use beyond RSS. This is just for the initial rack configuration and cold boot purposes. Updates come from Nexus.", + "type": "object", + "properties": { + "body": { + "$ref": "#/components/schemas/EarlyNetworkConfigBody" + }, + "generation": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "schema_version": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + "required": [ + "body", + "generation", + "schema_version" + ] + }, + "EarlyNetworkConfigBody": { + "description": "This is the actual configuration of EarlyNetworking.\n\nWe nest it below the \"header\" of `generation` and `schema_version` so that we can perform partial deserialization of `EarlyNetworkConfig` to only read the header and defer deserialization of the body once we know the schema version. This is possible via the use of [`serde_json::value::RawValue`] in future (post-v1) deserialization paths.", + "type": "object", + "properties": { + "ntp_servers": { + "description": "The external NTP server addresses.", + "type": "array", + "items": { + "type": "string" + } + }, + "rack_network_config": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RackNetworkConfigV2" + } + ] + } + }, + "required": [ + "ntp_servers" + ] + }, + "Error": { + "description": "Error information from a response.", + "type": "object", + "properties": { + "error_code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "request_id": { + "type": "string" + } + }, + "required": [ + "message", + "request_id" + ] + }, + "EstablishedConnection": { + "type": "object", + "properties": { + "addr": { + "type": "string" + }, + "baseboard": { + "$ref": "#/components/schemas/Baseboard" + } + }, + "required": [ + "addr", + "baseboard" + ] + }, + "ExternalIp": { + "description": "An external IP address used by a probe.", + "type": "object", + "properties": { + "first_port": { + "description": "The first port used by the address.", + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "ip": { + "description": "The external IP address.", + "type": "string", + "format": "ip" + }, + "kind": { + "description": "The kind of address this is.", + "allOf": [ + { + "$ref": "#/components/schemas/IpKind" + } + ] + }, + "last_port": { + "description": "The last port used by the address.", + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "first_port", + "ip", + "kind", + "last_port" + ] + }, + "ExternalIpGatewayMap": { + "description": "Per-NIC mappings from external IP addresses to the Internet Gateways which can choose them as a source.", + "type": "object", + "properties": { + "mappings": { + "type": "object", + "additionalProperties": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "type": "string", + "format": "uuid" + }, + "uniqueItems": true + } + } + } + }, + "required": [ + "mappings" + ] + }, + "FileStorageBackend": { + "description": "A storage backend backed by a file in the host system's file system.", + "type": "object", + "properties": { + "block_size": { + "description": "Block size of the backend", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "path": { + "description": "A path to a file that backs a disk.", + "type": "string" + }, + "readonly": { + "description": "Indicates whether the storage is read-only.", + "type": "boolean" + }, + "workers": { + "nullable": true, + "description": "Optional worker threads for the file backend, exposed for testing only.", + "type": "integer", + "format": "uint", + "minimum": 1 + } + }, + "required": [ + "block_size", + "path", + "readonly" + ], + "additionalProperties": false + }, + "Generation": { + "description": "Generation numbers stored in the database, used for optimistic concurrency control", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "GuestHypervisorInterface": { + "description": "A hypervisor interface to expose to the guest.", + "oneOf": [ + { + "description": "Expose a bhyve-like interface (\"bhyve bhyve \" as the hypervisor ID in leaf 0x4000_0000 and no additional leaves or features).", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "bhyve" + ] + } + }, + "required": [ + "type" + ], + "additionalProperties": false + }, + { + "description": "Expose a Hyper-V-compatible hypervisor interface with the supplied features enabled.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "hyper_v" + ] + }, + "value": { + "type": "object", + "properties": { + "features": { + "type": "array", + "items": { + "$ref": "#/components/schemas/HyperVFeatureFlag" + }, + "uniqueItems": true + } + }, + "required": [ + "features" + ], + "additionalProperties": false + } + }, + "required": [ + "type", + "value" + ], + "additionalProperties": false + } + ] + }, + "GzipLevel": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "HostIdentifier": { + "description": "A `HostIdentifier` represents either an IP host or network (v4 or v6), or an entire VPC (identified by its VNI). It is used in firewall rule host filters.", + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ip" + ] + }, + "value": { + "$ref": "#/components/schemas/IpNet" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "vpc" + ] + }, + "value": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "type", + "value" + ] + } + ] + }, + "HostPhase2DesiredContents": { + "description": "Describes the desired contents of a host phase 2 slot (i.e., the boot partition on one of the internal M.2 drives).", + "oneOf": [ + { + "description": "Do not change the current contents.\n\nWe use this value when we've detected a sled has been mupdated (and we don't want to overwrite phase 2 images until we understand how to recover from that mupdate) and as the default value when reading an [`OmicronSledConfig`] that was ledgered before this concept existed.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "current_contents" + ] + } + }, + "required": [ + "type" + ] + }, + { + "description": "Set the phase 2 slot to the given artifact.\n\nThe artifact will come from an unpacked and distributed TUF repo.", + "type": "object", + "properties": { + "hash": { + "type": "string", + "format": "hex string (32 bytes)" + }, + "type": { + "type": "string", + "enum": [ + "artifact" + ] + } + }, + "required": [ + "hash", + "type" + ] + } + ] + }, + "HostPhase2DesiredSlots": { + "description": "Describes the desired contents for both host phase 2 slots.", + "type": "object", + "properties": { + "slot_a": { + "$ref": "#/components/schemas/HostPhase2DesiredContents" + }, + "slot_b": { + "$ref": "#/components/schemas/HostPhase2DesiredContents" + } + }, + "required": [ + "slot_a", + "slot_b" + ] + }, + "HostPortConfig": { + "type": "object", + "properties": { + "addrs": { + "description": "IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport (must be in infra_ip pool). May also include an optional VLAN ID.", + "type": "array", + "items": { + "$ref": "#/components/schemas/UplinkAddressConfig" + } + }, + "lldp": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, + "port": { + "description": "Switchport to use for external connectivity", + "type": "string" + }, + "tx_eq": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/TxEqConfig" + } + ] + } + }, + "required": [ + "addrs", + "port" + ] + }, + "Hostname": { + "title": "An RFC-1035-compliant hostname", + "description": "A hostname identifies a host on a network, and is usually a dot-delimited sequence of labels, where each label contains only letters, digits, or the hyphen. See RFCs 1035 and 952 for more details.", + "type": "string", + "pattern": "^([a-zA-Z0-9]+[a-zA-Z0-9\\-]*(? for background.", + "oneOf": [ + { + "description": "Start the switch zone if a switch is present.\n\nThis is the default policy.", + "type": "object", + "properties": { + "policy": { + "type": "string", + "enum": [ + "start_if_switch_present" + ] + } + }, + "required": [ + "policy" + ] + }, + { + "description": "Even if a switch zone is present, stop the switch zone.", + "type": "object", + "properties": { + "policy": { + "type": "string", + "enum": [ + "stop_despite_switch_presence" + ] + } + }, + "required": [ + "policy" + ] + } + ] + }, + "OrphanedDataset": { + "type": "object", + "properties": { + "available": { + "$ref": "#/components/schemas/ByteCount" + }, + "id": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/DatasetUuid" + } + ] + }, + "mounted": { + "type": "boolean" + }, + "name": { + "$ref": "#/components/schemas/DatasetName" + }, + "reason": { + "type": "string" + }, + "used": { + "$ref": "#/components/schemas/ByteCount" + } + }, + "required": [ + "available", + "mounted", + "name", + "reason", + "used" + ] + }, + "P9fs": { + "description": "Describes a filesystem to expose through a P9 device.\n\nThis is only supported by Propolis servers compiled with the `falcon` feature.", + "type": "object", + "properties": { + "chunk_size": { + "description": "The chunk size to use in the 9P protocol. Vanilla Helios images should use 8192. Falcon Helios base images and Linux can use up to 65536.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "pci_path": { + "description": "The PCI path at which to attach the guest to this P9 filesystem.", + "allOf": [ + { + "$ref": "#/components/schemas/PciPath" + } + ] + }, + "source": { + "description": "The host source path to mount into the guest.", + "type": "string" + }, + "target": { + "description": "The 9P target filesystem tag.", + "type": "string" + } + }, + "required": [ + "chunk_size", + "pci_path", + "source", + "target" + ], + "additionalProperties": false + }, + "PciPath": { + "description": "A PCI bus/device/function tuple.", + "type": "object", + "properties": { + "bus": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "device": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "function": { + "type": "integer", + "format": "uint8", + "minimum": 0 + } + }, + "required": [ + "bus", + "device", + "function" + ] + }, + "PciPciBridge": { + "description": "A PCI-PCI bridge.", + "type": "object", + "properties": { + "downstream_bus": { + "description": "The logical bus number of this bridge's downstream bus. Other devices may use this bus number in their PCI paths to indicate they should be attached to this bridge's bus.", + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "pci_path": { + "description": "The PCI path at which to attach this bridge.", + "allOf": [ + { + "$ref": "#/components/schemas/PciPath" + } + ] + } + }, + "required": [ + "downstream_bus", + "pci_path" + ], + "additionalProperties": false + }, + "PhysicalDiskUuid": { + "x-rust-type": { + "crate": "omicron-uuid-kinds", + "path": "omicron_uuid_kinds::PhysicalDiskUuid", + "version": "*" + }, + "type": "string", + "format": "uuid" + }, + "PortConfigV2": { + "type": "object", + "properties": { + "addresses": { + "description": "This port's addresses and optional vlan IDs", + "type": "array", + "items": { + "$ref": "#/components/schemas/UplinkAddressConfig" + } + }, + "autoneg": { + "description": "Whether or not to set autonegotiation", + "default": false, + "type": "boolean" + }, + "bgp_peers": { + "description": "BGP peers on this port", + "type": "array", + "items": { + "$ref": "#/components/schemas/BgpPeerConfig" + } + }, + "lldp": { + "nullable": true, + "description": "LLDP configuration for this port", + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, + "port": { + "description": "Nmae of the port this config applies to.", + "type": "string" + }, + "routes": { + "description": "The set of routes associated with this port.", + "type": "array", + "items": { + "$ref": "#/components/schemas/RouteConfig" + } + }, + "switch": { + "description": "Switch the port belongs to.", + "allOf": [ + { + "$ref": "#/components/schemas/SwitchLocation" + } + ] + }, + "tx_eq": { + "nullable": true, + "description": "TX-EQ configuration for this port", + "allOf": [ + { + "$ref": "#/components/schemas/TxEqConfig" + } + ] + }, + "uplink_port_fec": { + "nullable": true, + "description": "Port forward error correction type.", + "allOf": [ + { + "$ref": "#/components/schemas/PortFec" + } + ] + }, + "uplink_port_speed": { + "description": "Port speed.", + "allOf": [ + { + "$ref": "#/components/schemas/PortSpeed" + } + ] + } + }, + "required": [ + "addresses", + "bgp_peers", + "port", + "routes", + "switch", + "uplink_port_speed" + ] + }, + "PortFec": { + "description": "Switchport FEC options", + "type": "string", + "enum": [ + "firecode", + "none", + "rs" + ] + }, + "PortSpeed": { + "description": "Switchport Speed options", + "type": "string", + "enum": [ + "speed0_g", + "speed1_g", + "speed10_g", + "speed25_g", + "speed40_g", + "speed50_g", + "speed100_g", + "speed200_g", + "speed400_g" + ] + }, + "PriorityDimension": { + "description": "A dimension along with bundles can be sorted, to determine priority.", + "oneOf": [ + { + "description": "Sorting by time, with older bundles with lower priority.", + "type": "string", + "enum": [ + "time" + ] + }, + { + "description": "Sorting by the cause for creating the bundle.", + "type": "string", + "enum": [ + "cause" + ] + } + ] + }, + "PriorityOrder": { + "description": "The priority order for bundles during cleanup.\n\nBundles are sorted along the dimensions in [`PriorityDimension`], with each dimension appearing exactly once. During cleanup, lesser-priority bundles are pruned first, to maintain the dataset quota. Note that bundles are sorted by each dimension in the order in which they appear, with each dimension having higher priority than the next.", + "type": "array", + "items": { + "$ref": "#/components/schemas/PriorityDimension" + }, + "minItems": 2, + "maxItems": 2 + }, + "ProbeCreate": { + "description": "Parameters used to create a probe.", + "type": "object", + "properties": { + "external_ips": { + "description": "The external IP addresses assigned to the probe.", + "type": "array", + "items": { + "$ref": "#/components/schemas/ExternalIp" + } + }, + "id": { + "description": "The ID for the probe.", + "allOf": [ + { + "$ref": "#/components/schemas/ProbeUuid" + } + ] + }, + "interface": { + "description": "The probe's networking interface.", + "allOf": [ + { + "$ref": "#/components/schemas/NetworkInterface" + } + ] + } + }, + "required": [ + "external_ips", + "id", + "interface" + ] + }, + "ProbeSet": { + "description": "A set of probes that the target sled should run.", + "type": "object", + "properties": { + "probes": { + "title": "IdHashMap", + "description": "The exact set of probes to run.", + "x-rust-type": { + "crate": "iddqd", + "parameters": [ + { + "$ref": "#/components/schemas/ProbeCreate" + } + ], + "path": "iddqd::IdHashMap", + "version": "*" + }, + "type": "array", + "items": { + "$ref": "#/components/schemas/ProbeCreate" + }, + "uniqueItems": true + } + }, + "required": [ + "probes" + ] + }, + "ProbeUuid": { + "x-rust-type": { + "crate": "omicron-uuid-kinds", + "path": "omicron_uuid_kinds::ProbeUuid", + "version": "*" + }, + "type": "string", + "format": "uuid" + }, + "QemuPvpanic": { + "type": "object", + "properties": { + "enable_isa": { + "description": "Enable the QEMU PVPANIC ISA bus device (I/O port 0x505).", + "type": "boolean" + } + }, + "required": [ + "enable_isa" + ], + "additionalProperties": false + }, + "RackNetworkConfigV2": { + "description": "Initial network configuration", + "type": "object", + "properties": { + "bfd": { + "description": "BFD configuration for connecting the rack to external networks", + "default": [], + "type": "array", + "items": { + "$ref": "#/components/schemas/BfdPeerConfig" + } + }, + "bgp": { + "description": "BGP configurations for connecting the rack to external networks", + "type": "array", + "items": { + "$ref": "#/components/schemas/BgpConfig" + } + }, + "infra_ip_first": { + "description": "First ip address to be used for configuring network infrastructure", + "type": "string", + "format": "ipv4" + }, + "infra_ip_last": { + "description": "Last ip address to be used for configuring network infrastructure", + "type": "string", + "format": "ipv4" + }, + "ports": { + "description": "Uplinks for connecting the rack to external networks", + "type": "array", + "items": { + "$ref": "#/components/schemas/PortConfigV2" + } + }, + "rack_subnet": { + "$ref": "#/components/schemas/Ipv6Net" + } + }, + "required": [ + "bgp", + "infra_ip_first", + "infra_ip_last", + "ports", + "rack_subnet" + ] + }, + "RemoveMupdateOverrideBootSuccessInventory": { + "description": "Status of removing the mupdate override on the boot disk.", + "oneOf": [ + { + "description": "The mupdate override was successfully removed.", + "type": "string", + "enum": [ + "removed" + ] + }, + { + "description": "No mupdate override was found.\n\nThis is considered a success for idempotency reasons.", + "type": "string", + "enum": [ + "no_override" + ] + } + ] + }, + "RemoveMupdateOverrideInventory": { + "description": "Status of removing the mupdate override in the inventory.", + "type": "object", + "properties": { + "boot_disk_result": { + "description": "The result of removing the mupdate override on the boot disk.", + "x-rust-type": { + "crate": "std", + "parameters": [ + { + "$ref": "#/components/schemas/RemoveMupdateOverrideBootSuccessInventory" + }, + { + "type": "string" + } + ], + "path": "::std::result::Result", + "version": "*" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "ok": { + "$ref": "#/components/schemas/RemoveMupdateOverrideBootSuccessInventory" + } + }, + "required": [ + "ok" + ] + }, + { + "type": "object", + "properties": { + "err": { + "type": "string" + } + }, + "required": [ + "err" + ] + } + ] + }, + "non_boot_message": { + "description": "What happened on non-boot disks.\n\nWe aren't modeling this out in more detail, because we plan to not try and keep ledgered data in sync across both disks in the future.", + "type": "string" + } + }, + "required": [ + "boot_disk_result", + "non_boot_message" + ] + }, + "ResolvedVpcFirewallRule": { + "description": "VPC firewall rule after object name resolution has been performed by Nexus", + "type": "object", + "properties": { + "action": { + "$ref": "#/components/schemas/VpcFirewallRuleAction" + }, + "direction": { + "$ref": "#/components/schemas/VpcFirewallRuleDirection" + }, + "filter_hosts": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/HostIdentifier" + }, + "uniqueItems": true + }, + "filter_ports": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/L4PortRange" + } + }, + "filter_protocols": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/VpcFirewallRuleProtocol" + } + }, + "priority": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "status": { + "$ref": "#/components/schemas/VpcFirewallRuleStatus" + }, + "targets": { + "type": "array", + "items": { + "$ref": "#/components/schemas/NetworkInterface" + } + } + }, + "required": [ + "action", + "direction", + "priority", + "status", + "targets" + ] + }, + "ResolvedVpcRoute": { + "description": "A VPC route resolved into a concrete target.", + "type": "object", + "properties": { + "dest": { + "$ref": "#/components/schemas/IpNet" + }, + "target": { + "$ref": "#/components/schemas/RouterTarget" + } + }, + "required": [ + "dest", + "target" + ] + }, + "ResolvedVpcRouteSet": { + "description": "An updated set of routes for a given VPC and/or subnet.", + "type": "object", + "properties": { + "id": { + "$ref": "#/components/schemas/RouterId" + }, + "routes": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ResolvedVpcRoute" + }, + "uniqueItems": true + }, + "version": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RouterVersion" + } + ] + } + }, + "required": [ + "id", + "routes" + ] + }, + "ResolvedVpcRouteState": { + "description": "Version information for routes on a given VPC subnet.", + "type": "object", + "properties": { + "id": { + "$ref": "#/components/schemas/RouterId" + }, + "version": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RouterVersion" + } + ] + } + }, + "required": [ + "id" + ] + }, + "RouteConfig": { + "type": "object", + "properties": { + "destination": { + "description": "The destination of the route.", + "allOf": [ + { + "$ref": "#/components/schemas/IpNet" + } + ] + }, + "nexthop": { + "description": "The nexthop/gateway address.", + "type": "string", + "format": "ip" + }, + "rib_priority": { + "nullable": true, + "description": "The RIB priority (i.e. Admin Distance) associated with this route.", + "default": null, + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "vlan_id": { + "nullable": true, + "description": "The VLAN id associated with this route.", + "default": null, + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "destination", + "nexthop" + ] + }, + "RouterId": { + "description": "Identifier for a VPC and/or subnet.", + "type": "object", + "properties": { + "kind": { + "$ref": "#/components/schemas/RouterKind" + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "kind", + "vni" + ] + }, + "RouterKind": { + "description": "The scope of a set of VPC router rules.", + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "system" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "subnet": { + "$ref": "#/components/schemas/IpNet" + }, + "type": { + "type": "string", + "enum": [ + "custom" + ] + } + }, + "required": [ + "subnet", + "type" + ] + } + ] + }, + "RouterTarget": { + "description": "The target for a given router entry.", + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "drop" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "internet_gateway" + ] + }, + "value": { + "$ref": "#/components/schemas/InternetGatewayRouterTarget" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ip" + ] + }, + "value": { + "type": "string", + "format": "ip" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "vpc_subnet" + ] + }, + "value": { + "$ref": "#/components/schemas/IpNet" + } + }, + "required": [ + "type", + "value" + ] + } + ] + }, + "RouterVersion": { + "description": "Information on the current parent router (and version) of a route set according to the control plane.", + "type": "object", + "properties": { + "router_id": { + "type": "string", + "format": "uuid" + }, + "version": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "router_id", + "version" + ] + }, + "SerialPort": { + "description": "A serial port device.", + "type": "object", + "properties": { + "num": { + "description": "The serial port number for this port.", + "allOf": [ + { + "$ref": "#/components/schemas/SerialPortNumber" + } + ] + } + }, + "required": [ + "num" + ], + "additionalProperties": false + }, + "SerialPortNumber": { + "description": "A serial port identifier, which determines what I/O ports a guest can use to access a port.", + "type": "string", + "enum": [ + "com1", + "com2", + "com3", + "com4" + ] + }, + "SledCpuFamily": { + "description": "Identifies the kind of CPU present on a sled, determined by reading CPUID.\n\nThis is intended to broadly support the control plane answering the question \"can I run this instance on that sled?\" given an instance with either no or some CPU platform requirement. It is not enough information for more precise placement questions - for example, is a CPU a high-frequency part or many-core part? We don't include Genoa here, but in that CPU family there are high frequency parts, many-core parts, and large-cache parts. To support those questions (or satisfactorily answer #8730) we would need to collect additional information and send it along.", + "oneOf": [ + { + "description": "The CPU vendor or its family number don't correspond to any of the known family variants.", + "type": "string", + "enum": [ + "unknown" + ] + }, + { + "description": "AMD Milan processors (or very close). Could be an actual Milan in a Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is the greatest common denominator).", + "type": "string", + "enum": [ + "amd_milan" + ] + }, + { + "description": "AMD Turin processors (or very close). Could be an actual Turin in a Cosmo, or a close-to-Turin client Zen 5 part.", + "type": "string", + "enum": [ + "amd_turin" + ] + }, + { + "description": "AMD Turin Dense processors. There are no \"Turin Dense-like\" CPUs unlike other cases, so this means a bona fide Zen 5c Turin Dense part.", + "type": "string", + "enum": [ + "amd_turin_dense" + ] + } + ] + }, + "SledDiagnosticsQueryOutput": { + "oneOf": [ + { + "type": "object", + "properties": { + "success": { + "type": "object", + "properties": { + "command": { + "description": "The command and its arguments.", + "type": "string" + }, + "exit_code": { + "nullable": true, + "description": "The exit code if one was present when the command exited.", + "type": "integer", + "format": "int32" + }, + "exit_status": { + "description": "The exit status of the command. This will be the exit code (if any) and exit reason such as from a signal.", + "type": "string" + }, + "stdio": { + "description": "Any stdout/stderr produced by the command.", + "type": "string" + } + }, + "required": [ + "command", + "exit_status", + "stdio" + ] + } + }, + "required": [ + "success" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "failure": { + "type": "object", + "properties": { + "error": { + "description": "The reason the command failed to execute.", + "type": "string" + } + }, + "required": [ + "error" + ] + } + }, + "required": [ + "failure" + ], + "additionalProperties": false + } + ] + }, + "SledIdentifiers": { + "description": "Identifiers for a single sled.\n\nThis is intended primarily to be used in timeseries, to identify sled from which metric data originates.", + "type": "object", + "properties": { + "model": { + "description": "Model name of the sled", + "type": "string" + }, + "rack_id": { + "description": "Control plane ID of the rack this sled is a member of", + "type": "string", + "format": "uuid" + }, + "revision": { + "description": "Revision number of the sled", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "serial": { + "description": "Serial number of the sled", + "type": "string" + }, + "sled_id": { + "description": "Control plane ID for the sled itself", + "type": "string", + "format": "uuid" + } + }, + "required": [ + "model", + "rack_id", + "revision", + "serial", + "sled_id" + ] + }, + "SledRole": { + "description": "Describes the role of the sled within the rack.\n\nNote that this may change if the sled is physically moved within the rack.", + "oneOf": [ + { + "description": "The sled is a general compute sled.", + "type": "string", + "enum": [ + "gimlet" + ] + }, + { + "description": "The sled is attached to the network switch, and has additional responsibilities.", + "type": "string", + "enum": [ + "scrimlet" + ] + } + ] + }, + "SledUuid": { + "x-rust-type": { + "crate": "omicron-uuid-kinds", + "path": "omicron_uuid_kinds::SledUuid", + "version": "*" + }, + "type": "string", + "format": "uuid" + }, + "SledVmmState": { + "description": "A wrapper type containing a sled's total knowledge of the state of a VMM.", + "type": "object", + "properties": { + "migration_in": { + "nullable": true, + "description": "The current state of any inbound migration to this VMM.", + "allOf": [ + { + "$ref": "#/components/schemas/MigrationRuntimeState" + } + ] + }, + "migration_out": { + "nullable": true, + "description": "The state of any outbound migration from this VMM.", + "allOf": [ + { + "$ref": "#/components/schemas/MigrationRuntimeState" + } + ] + }, + "vmm_state": { + "description": "The most recent state of the sled's VMM process.", + "allOf": [ + { + "$ref": "#/components/schemas/VmmRuntimeState" + } + ] + } + }, + "required": [ + "vmm_state" + ] + }, + "SoftNpuP9": { + "description": "Describes a PCI device that shares host files with the guest using the P9 protocol.\n\nThis is only supported by Propolis servers compiled with the `falcon` feature.", + "type": "object", + "properties": { + "pci_path": { + "description": "The PCI path at which to attach the guest to this port.", + "allOf": [ + { + "$ref": "#/components/schemas/PciPath" + } + ] + } + }, + "required": [ + "pci_path" + ], + "additionalProperties": false + }, + "SoftNpuPciPort": { + "description": "Describes a SoftNPU PCI device.\n\nThis is only supported by Propolis servers compiled with the `falcon` feature.", + "type": "object", + "properties": { + "pci_path": { + "description": "The PCI path at which to attach the guest to this port.", + "allOf": [ + { + "$ref": "#/components/schemas/PciPath" + } + ] + } + }, + "required": [ + "pci_path" + ], + "additionalProperties": false + }, + "SoftNpuPort": { + "description": "Describes a port in a SoftNPU emulated ASIC.\n\nThis is only supported by Propolis servers compiled with the `falcon` feature.", + "type": "object", + "properties": { + "backend_id": { + "description": "The name of the port's associated DLPI backend.", + "allOf": [ + { + "$ref": "#/components/schemas/SpecKey" + } + ] + }, + "link_name": { + "description": "The data link name for this port.", + "type": "string" + } + }, + "required": [ + "backend_id", + "link_name" + ], + "additionalProperties": false + }, + "SourceNatConfig": { + "description": "An IP address and port range used for source NAT, i.e., making outbound network connections from guests or services.", + "type": "object", + "properties": { + "first_port": { + "description": "The first port used for source NAT, inclusive.", + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "ip": { + "description": "The external address provided to the instance or service.", + "type": "string", + "format": "ip" + }, + "last_port": { + "description": "The last port used for source NAT, also inclusive.", + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "first_port", + "ip", + "last_port" + ] + }, + "SpecKey": { + "description": "A key identifying a component in an instance spec.", + "oneOf": [ + { + "title": "uuid", + "allOf": [ + { + "type": "string", + "format": "uuid" + } + ] + }, + { + "title": "name", + "allOf": [ + { + "type": "string" + } + ] + } + ] + }, + "StartSledAgentRequest": { + "description": "Configuration information for launching a Sled Agent.", + "type": "object", + "properties": { + "body": { + "$ref": "#/components/schemas/StartSledAgentRequestBody" + }, + "generation": { + "description": "The current generation number of data as stored in CRDB.\n\nThe initial generation is set during RSS time and then only mutated by Nexus. For now, we don't actually anticipate mutating this data, but we leave open the possiblity.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "schema_version": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + "required": [ + "body", + "generation", + "schema_version" + ] + }, + "StartSledAgentRequestBody": { + "description": "This is the actual app level data of `StartSledAgentRequest`\n\nWe nest it below the \"header\" of `generation` and `schema_version` so that we can perform partial deserialization of `EarlyNetworkConfig` to only read the header and defer deserialization of the body once we know the schema version. This is possible via the use of [`serde_json::value::RawValue`] in future (post-v1) deserialization paths.", + "type": "object", + "properties": { + "id": { + "description": "Uuid of the Sled Agent to be created.", + "allOf": [ + { + "$ref": "#/components/schemas/SledUuid" + } + ] + }, + "is_lrtq_learner": { + "description": "Is this node an LRTQ learner node?\n\nWe only put the node into learner mode if `use_trust_quorum` is also true.", + "type": "boolean" + }, + "rack_id": { + "description": "Uuid of the rack to which this sled agent belongs.", + "type": "string", + "format": "uuid" + }, + "subnet": { + "description": "Portion of the IP space to be managed by the Sled Agent.", + "allOf": [ + { + "$ref": "#/components/schemas/Ipv6Subnet" + } + ] + }, + "use_trust_quorum": { + "description": "Use trust quorum for key generation", + "type": "boolean" + } + }, + "required": [ + "id", + "is_lrtq_learner", + "rack_id", + "subnet", + "use_trust_quorum" + ] + }, + "StorageLimit": { + "description": "The limit on space allowed for zone bundles, as a percentage of the overall dataset's quota.", + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "SupportBundleMetadata": { + "description": "Metadata about a support bundle", + "type": "object", + "properties": { + "state": { + "$ref": "#/components/schemas/SupportBundleState" + }, + "support_bundle_id": { + "$ref": "#/components/schemas/SupportBundleUuid" + } + }, + "required": [ + "state", + "support_bundle_id" + ] + }, + "SupportBundleState": { + "type": "string", + "enum": [ + "complete", + "incomplete" + ] + }, + "SupportBundleUuid": { + "x-rust-type": { + "crate": "omicron-uuid-kinds", + "path": "omicron_uuid_kinds::SupportBundleUuid", + "version": "*" + }, + "type": "string", + "format": "uuid" + }, + "SwitchLocation": { + "description": "Identifies switch physical location", + "oneOf": [ + { + "description": "Switch in upper slot", + "type": "string", + "enum": [ + "switch0" + ] + }, + { + "description": "Switch in lower slot", + "type": "string", + "enum": [ + "switch1" + ] + } + ] + }, + "SwitchPorts": { + "description": "A set of switch uplinks.", + "type": "object", + "properties": { + "uplinks": { + "type": "array", + "items": { + "$ref": "#/components/schemas/HostPortConfig" + } + } + }, + "required": [ + "uplinks" + ] + }, + "TxEqConfig": { + "description": "Per-port tx-eq overrides. This can be used to fine-tune the transceiver equalization settings to improve signal integrity.", + "type": "object", + "properties": { + "main": { + "nullable": true, + "description": "Main tap", + "type": "integer", + "format": "int32" + }, + "post1": { + "nullable": true, + "description": "Post-cursor tap1", + "type": "integer", + "format": "int32" + }, + "post2": { + "nullable": true, + "description": "Post-cursor tap2", + "type": "integer", + "format": "int32" + }, + "pre1": { + "nullable": true, + "description": "Pre-cursor tap1", + "type": "integer", + "format": "int32" + }, + "pre2": { + "nullable": true, + "description": "Pre-cursor tap2", + "type": "integer", + "format": "int32" + } + } + }, + "UplinkAddressConfig": { + "type": "object", + "properties": { + "address": { + "$ref": "#/components/schemas/IpNet" + }, + "vlan_id": { + "nullable": true, + "description": "The VLAN id (if any) associated with this address.", + "default": null, + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "address" + ] + }, + "VirtioDisk": { + "description": "A disk that presents a virtio-block interface to the guest.", + "type": "object", + "properties": { + "backend_id": { + "description": "The name of the disk's backend component.", + "allOf": [ + { + "$ref": "#/components/schemas/SpecKey" + } + ] + }, + "pci_path": { + "description": "The PCI bus/device/function at which this disk should be attached.", + "allOf": [ + { + "$ref": "#/components/schemas/PciPath" + } + ] + } + }, + "required": [ + "backend_id", + "pci_path" + ], + "additionalProperties": false + }, + "VirtioNetworkBackend": { + "description": "A network backend associated with a virtio-net (viona) VNIC on the host.", + "type": "object", + "properties": { + "vnic_name": { + "description": "The name of the viona VNIC to use as a backend.", + "type": "string" + } + }, + "required": [ + "vnic_name" + ], + "additionalProperties": false + }, + "VirtioNic": { + "description": "A network card that presents a virtio-net interface to the guest.", + "type": "object", + "properties": { + "backend_id": { + "description": "The name of the device's backend.", + "allOf": [ + { + "$ref": "#/components/schemas/SpecKey" + } + ] + }, + "interface_id": { + "description": "A caller-defined correlation identifier for this interface. If Propolis is configured to collect network interface kstats in its Oximeter metrics, the metric series for this interface will be associated with this identifier.", + "type": "string", + "format": "uuid" + }, + "pci_path": { + "description": "The PCI path at which to attach this device.", + "allOf": [ + { + "$ref": "#/components/schemas/PciPath" + } + ] + } + }, + "required": [ + "backend_id", + "interface_id", + "pci_path" + ], + "additionalProperties": false + }, + "VirtualNetworkInterfaceHost": { + "description": "A mapping from a virtual NIC to a physical host", + "type": "object", + "properties": { + "physical_host_ip": { + "type": "string", + "format": "ipv6" + }, + "virtual_ip": { + "type": "string", + "format": "ip" + }, + "virtual_mac": { + "$ref": "#/components/schemas/MacAddr" + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "physical_host_ip", + "virtual_ip", + "virtual_mac", + "vni" + ] + }, + "VmmIssueDiskSnapshotRequestBody": { + "type": "object", + "properties": { + "snapshot_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "snapshot_id" + ] + }, + "VmmIssueDiskSnapshotRequestResponse": { + "type": "object", + "properties": { + "snapshot_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "snapshot_id" + ] + }, + "VmmPutStateBody": { + "description": "The body of a request to move a previously-ensured instance into a specific runtime state.", + "type": "object", + "properties": { + "state": { + "description": "The state into which the instance should be driven.", + "allOf": [ + { + "$ref": "#/components/schemas/VmmStateRequested" + } + ] + } + }, + "required": [ + "state" + ] + }, + "VmmPutStateResponse": { + "description": "The response sent from a request to move an instance into a specific runtime state.", + "type": "object", + "properties": { + "updated_runtime": { + "nullable": true, + "description": "The current runtime state of the instance after handling the request to change its state. If the instance's state did not change, this field is `None`.", + "allOf": [ + { + "$ref": "#/components/schemas/SledVmmState" + } + ] + } + } + }, + "VmmRuntimeState": { + "description": "The dynamic runtime properties of an individual VMM process.", + "type": "object", + "properties": { + "gen": { + "description": "The generation number for this VMM's state.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + }, + "state": { + "description": "The last state reported by this VMM.", + "allOf": [ + { + "$ref": "#/components/schemas/VmmState" + } + ] + }, + "time_updated": { + "description": "Timestamp for the VMM's state.", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "gen", + "state", + "time_updated" + ] + }, + "VmmSpec": { + "description": "Specifies the virtual hardware configuration of a new Propolis VMM in the form of a Propolis instance specification.\n\nSled-agent expects that when an instance spec is provided alongside an `InstanceSledLocalConfig` to initialize a new instance, the NIC IDs in that config's network interface list will match the IDs of the virtio network backends in the instance spec.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceSpecV0" + } + ] + }, + "VmmState": { + "description": "One of the states that a VMM can be in.", + "oneOf": [ + { + "description": "The VMM is initializing and has not started running guest CPUs yet.", + "type": "string", + "enum": [ + "starting" + ] + }, + { + "description": "The VMM has finished initializing and may be running guest CPUs.", + "type": "string", + "enum": [ + "running" + ] + }, + { + "description": "The VMM is shutting down.", + "type": "string", + "enum": [ + "stopping" + ] + }, + { + "description": "The VMM's guest has stopped, and the guest will not run again, but the VMM process may not have released all of its resources yet.", + "type": "string", + "enum": [ + "stopped" + ] + }, + { + "description": "The VMM is being restarted or its guest OS is rebooting.", + "type": "string", + "enum": [ + "rebooting" + ] + }, + { + "description": "The VMM is part of a live migration.", + "type": "string", + "enum": [ + "migrating" + ] + }, + { + "description": "The VMM process reported an internal failure.", + "type": "string", + "enum": [ + "failed" + ] + }, + { + "description": "The VMM process has been destroyed and its resources have been released.", + "type": "string", + "enum": [ + "destroyed" + ] + } + ] + }, + "VmmStateRequested": { + "description": "Requestable running state of an Instance.\n\nA subset of [`omicron_common::api::external::InstanceState`].", + "oneOf": [ + { + "description": "Run this instance by migrating in from a previous running incarnation of the instance.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "migration_target" + ] + }, + "value": { + "$ref": "#/components/schemas/InstanceMigrationTargetParams" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "description": "Start the instance if it is not already running.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "running" + ] + } + }, + "required": [ + "type" + ] + }, + { + "description": "Stop the instance.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "stopped" + ] + } + }, + "required": [ + "type" + ] + }, + { + "description": "Immediately reset the instance, as though it had stopped and immediately began to run again.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "reboot" + ] + } + }, + "required": [ + "type" + ] + } + ] + }, + "VmmUnregisterResponse": { + "description": "The response sent from a request to unregister an instance.", + "type": "object", + "properties": { + "updated_runtime": { + "nullable": true, + "description": "The current state of the instance after handling the request to unregister it. If the instance's state did not change, this field is `None`.", + "allOf": [ + { + "$ref": "#/components/schemas/SledVmmState" + } + ] + } + } + }, + "Vni": { + "description": "A Geneve Virtual Network Identifier", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "VpcFirewallIcmpFilter": { + "type": "object", + "properties": { + "code": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/IcmpParamRange" + } + ] + }, + "icmp_type": { + "type": "integer", + "format": "uint8", + "minimum": 0 + } + }, + "required": [ + "icmp_type" + ] + }, + "VpcFirewallRuleAction": { + "type": "string", + "enum": [ + "allow", + "deny" + ] + }, + "VpcFirewallRuleDirection": { + "type": "string", + "enum": [ + "inbound", + "outbound" + ] + }, + "VpcFirewallRuleProtocol": { + "description": "The protocols that may be specified in a firewall rule's filter", + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "tcp" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "udp" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "icmp" + ] + }, + "value": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/VpcFirewallIcmpFilter" + } + ] + } + }, + "required": [ + "type", + "value" + ] + } + ] + }, + "VpcFirewallRuleStatus": { + "type": "string", + "enum": [ + "disabled", + "enabled" + ] + }, + "VpcFirewallRulesEnsureBody": { + "description": "Update firewall rules for a VPC", + "type": "object", + "properties": { + "rules": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ResolvedVpcFirewallRule" + } + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "rules", + "vni" + ] + }, + "ZoneArtifactInventory": { + "description": "Inventory representation of a single zone artifact on a boot disk.\n\nPart of [`ZoneManifestBootInventory`].", + "type": "object", + "properties": { + "expected_hash": { + "description": "The expected digest of the file's contents.", + "type": "string", + "format": "hex string (32 bytes)" + }, + "expected_size": { + "description": "The expected size of the file, in bytes.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "file_name": { + "description": "The name of the zone file on disk, for example `nexus.tar.gz`. Zone files are always \".tar.gz\".", + "type": "string" + }, + "path": { + "description": "The full path to the zone file.", + "type": "string", + "format": "Utf8PathBuf" + }, + "status": { + "description": "The status of the artifact.\n\nThis is `Ok(())` if the artifact is present and matches the expected size and digest, or an error message if it is missing or does not match.", + "x-rust-type": { + "crate": "std", + "parameters": [ + { + "type": "null" + }, + { + "type": "string" + } + ], + "path": "::std::result::Result", + "version": "*" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "ok": { + "type": "string", + "enum": [ + null + ] + } + }, + "required": [ + "ok" + ] + }, + { + "type": "object", + "properties": { + "err": { + "type": "string" + } + }, + "required": [ + "err" + ] + } + ] + } + }, + "required": [ + "expected_hash", + "expected_size", + "file_name", + "path", + "status" + ] + }, + "ZoneBundleCause": { + "description": "The reason or cause for a zone bundle, i.e., why it was created.", + "oneOf": [ + { + "description": "Some other, unspecified reason.", + "type": "string", + "enum": [ + "other" + ] + }, + { + "description": "A zone bundle taken when a sled agent finds a zone that it does not expect to be running.", + "type": "string", + "enum": [ + "unexpected_zone" + ] + }, + { + "description": "An instance zone was terminated.", + "type": "string", + "enum": [ + "terminated_instance" + ] + } + ] + }, + "ZoneBundleId": { + "description": "An identifier for a zone bundle.", + "type": "object", + "properties": { + "bundle_id": { + "description": "The ID for this bundle itself.", + "type": "string", + "format": "uuid" + }, + "zone_name": { + "description": "The name of the zone this bundle is derived from.", + "type": "string" + } + }, + "required": [ + "bundle_id", + "zone_name" + ] + }, + "ZoneBundleMetadata": { + "description": "Metadata about a zone bundle.", + "type": "object", + "properties": { + "cause": { + "description": "The reason or cause a bundle was created.", + "allOf": [ + { + "$ref": "#/components/schemas/ZoneBundleCause" + } + ] + }, + "id": { + "description": "Identifier for this zone bundle", + "allOf": [ + { + "$ref": "#/components/schemas/ZoneBundleId" + } + ] + }, + "time_created": { + "description": "The time at which this zone bundle was created.", + "type": "string", + "format": "date-time" + }, + "version": { + "description": "A version number for this zone bundle.", + "type": "integer", + "format": "uint8", + "minimum": 0 + } + }, + "required": [ + "cause", + "id", + "time_created", + "version" + ] + }, + "ZoneImageResolverInventory": { + "description": "Inventory representation of zone image resolver status and health.", + "type": "object", + "properties": { + "mupdate_override": { + "description": "The mupdate override status.", + "allOf": [ + { + "$ref": "#/components/schemas/MupdateOverrideInventory" + } + ] + }, + "zone_manifest": { + "description": "The zone manifest status.", + "allOf": [ + { + "$ref": "#/components/schemas/ZoneManifestInventory" + } + ] + } + }, + "required": [ + "mupdate_override", + "zone_manifest" + ] + }, + "ZoneManifestBootInventory": { + "description": "Inventory representation of zone artifacts on the boot disk.\n\nPart of [`ZoneManifestInventory`].", + "type": "object", + "properties": { + "artifacts": { + "title": "IdOrdMap", + "description": "The artifacts on disk.", + "x-rust-type": { + "crate": "iddqd", + "parameters": [ + { + "$ref": "#/components/schemas/ZoneArtifactInventory" + } + ], + "path": "iddqd::IdOrdMap", + "version": "*" + }, + "type": "array", + "items": { + "$ref": "#/components/schemas/ZoneArtifactInventory" + }, + "uniqueItems": true + }, + "source": { + "description": "The manifest source.\n\nIn production this is [`OmicronZoneManifestSource::Installinator`], but in some development and testing flows Sled Agent synthesizes zone manifests. In those cases, the source is [`OmicronZoneManifestSource::SledAgent`].", + "allOf": [ + { + "$ref": "#/components/schemas/OmicronZoneManifestSource" + } + ] + } + }, + "required": [ + "artifacts", + "source" + ] + }, + "ZoneManifestInventory": { + "description": "Inventory representation of a zone manifest.\n\nPart of [`ZoneImageResolverInventory`].\n\nA zone manifest is a listing of all the zones present in a system's install dataset. This struct contains information about the install dataset gathered from a system.", + "type": "object", + "properties": { + "boot_disk_path": { + "description": "The full path to the zone manifest file on the boot disk.", + "type": "string", + "format": "Utf8PathBuf" + }, + "boot_inventory": { + "description": "The manifest read from the boot disk, and whether the manifest is valid.", + "x-rust-type": { + "crate": "std", + "parameters": [ + { + "$ref": "#/components/schemas/ZoneManifestBootInventory" + }, + { + "type": "string" + } + ], + "path": "::std::result::Result", + "version": "*" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "ok": { + "$ref": "#/components/schemas/ZoneManifestBootInventory" + } + }, + "required": [ + "ok" + ] + }, + { + "type": "object", + "properties": { + "err": { + "type": "string" + } + }, + "required": [ + "err" + ] + } + ] + }, + "non_boot_status": { + "title": "IdOrdMap", + "description": "Information about the install dataset on non-boot disks.", + "x-rust-type": { + "crate": "iddqd", + "parameters": [ + { + "$ref": "#/components/schemas/ZoneManifestNonBootInventory" + } + ], + "path": "iddqd::IdOrdMap", + "version": "*" + }, + "type": "array", + "items": { + "$ref": "#/components/schemas/ZoneManifestNonBootInventory" + }, + "uniqueItems": true + } + }, + "required": [ + "boot_disk_path", + "boot_inventory", + "non_boot_status" + ] + }, + "ZoneManifestNonBootInventory": { + "description": "Inventory representation of a zone manifest on a non-boot disk.\n\nUnlike [`ZoneManifestBootInventory`] which is structured since Reconfigurator makes decisions based on it, information about non-boot disks is purely advisory. For simplicity, we store information in an unstructured format.", + "type": "object", + "properties": { + "is_valid": { + "description": "Whether the status is valid.", + "type": "boolean" + }, + "message": { + "description": "A message describing the status.\n\nIf `is_valid` is true, then the message describes the list of artifacts found and their hashes.\n\nIf `is_valid` is false, then this message describes the reason for the invalid status. This could include errors reading the zone manifest, or zone file mismatches.", + "type": "string" + }, + "path": { + "description": "The full path to the zone manifest JSON on the non-boot disk.", + "type": "string", + "format": "Utf8PathBuf" + }, + "zpool_id": { + "description": "The ID of the non-boot zpool.", + "allOf": [ + { + "$ref": "#/components/schemas/InternalZpoolUuid" + } + ] + } + }, + "required": [ + "is_valid", + "message", + "path", + "zpool_id" + ] + }, + "ZpoolName": { + "title": "The name of a Zpool", + "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", + "type": "string", + "pattern": "^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + }, + "ZpoolUuid": { + "x-rust-type": { + "crate": "omicron-uuid-kinds", + "path": "omicron_uuid_kinds::ZpoolUuid", + "version": "*" + }, + "type": "string", + "format": "uuid" + }, + "PropolisUuid": { + "x-rust-type": { + "crate": "omicron-uuid-kinds", + "path": "omicron_uuid_kinds::PropolisUuid", + "version": "*" + }, + "type": "string", + "format": "uuid" + } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + } + } +} diff --git a/openapi/sled-agent/sled-agent-latest.json b/openapi/sled-agent/sled-agent-latest.json index a3a44e12369..86e29c6324d 120000 --- a/openapi/sled-agent/sled-agent-latest.json +++ b/openapi/sled-agent/sled-agent-latest.json @@ -1 +1 @@ -sled-agent-6.0.0-d37dd7.json \ No newline at end of file +sled-agent-7.0.0-62acb3.json \ No newline at end of file diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 35d8d162ec3..9ce28021a5f 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2213,6 +2213,7 @@ CREATE TYPE IF NOT EXISTS omicron.public.ip_version AS ENUM ( 'v6' ); + /* Indicates what an IP Pool is reserved for. */ CREATE TYPE IF NOT EXISTS omicron.public.ip_pool_reservation_type AS ENUM ( 'external_silos', @@ -2342,7 +2343,6 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_pool_range_by_last_address ON omicron.p STORING (first_address) WHERE time_deleted IS NULL; - /* The kind of external IP address. */ CREATE TYPE IF NOT EXISTS omicron.public.ip_kind AS ENUM ( /* @@ -7025,6 +7025,350 @@ ON WHERE time_deleted IS NULL; +-- RFD 488: Multicast + +/* Create versioning sequence for multicast group changes */ +CREATE SEQUENCE IF NOT EXISTS omicron.public.multicast_group_version START 1 INCREMENT 1; + +-- Multicast group state for RPW +CREATE TYPE IF NOT EXISTS omicron.public.multicast_group_state AS ENUM ( + 'creating', + 'active', + 'deleting', + 'deleted' +); + +-- Multicast group member state for RPW pattern +CREATE TYPE IF NOT EXISTS omicron.public.multicast_group_member_state AS ENUM ( + 'joining', + 'joined', + 'left' +); + +/* + * External multicast groups (customer-facing, allocated from IP pools) + * Following the bifurcated design from RFD 488 + */ +CREATE TABLE IF NOT EXISTS omicron.public.multicast_group ( + /* Identity metadata (following Resource pattern) */ + id UUID PRIMARY KEY, + name STRING(63) NOT NULL, + description STRING(512) NOT NULL, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + + /* VNI for multicast group (derived or random) */ + vni INT4 NOT NULL, + + /* IP allocation from pools */ + ip_pool_id UUID NOT NULL, + ip_pool_range_id UUID NOT NULL, + multicast_ip INET NOT NULL, + + /* Source-Specific Multicast (SSM) support */ + source_ips INET[] DEFAULT ARRAY[]::INET[], + + /* Multicast VLAN (MVLAN) for egress to upstream networks */ + /* Tags packets leaving the rack to traverse VLAN-segmented upstream networks */ + /* Internal rack traffic uses VNI-based underlay forwarding */ + mvlan INT2, + + /* Associated underlay group for NAT */ + /* We fill this as part of the RPW */ + underlay_group_id UUID, + + /* DPD tag to couple external/underlay state for this group */ + tag STRING(63), + + /* Current state of the multicast group (for RPW) */ + state omicron.public.multicast_group_state NOT NULL DEFAULT 'creating', + + /* Sync versioning */ + version_added INT8 NOT NULL DEFAULT nextval('omicron.public.multicast_group_version'), + version_removed INT8, + + /* Constraints */ + -- External groups: IPv4 multicast or non-admin-scoped IPv6 + CONSTRAINT external_multicast_ip_valid CHECK ( + (family(multicast_ip) = 4 AND multicast_ip << '224.0.0.0/4') OR + (family(multicast_ip) = 6 AND multicast_ip << 'ff00::/8' AND + NOT multicast_ip << 'ff04::/16' AND + NOT multicast_ip << 'ff05::/16' AND + NOT multicast_ip << 'ff08::/16') + ), + + -- Reserved range validation for IPv4 + CONSTRAINT external_ipv4_not_reserved CHECK ( + family(multicast_ip) != 4 OR ( + family(multicast_ip) = 4 AND + NOT multicast_ip << '224.0.0.0/24' AND -- Link-local control block + NOT multicast_ip << '233.0.0.0/8' AND -- GLOP addressing + NOT multicast_ip << '239.0.0.0/8' -- Administratively scoped + ) + ), + + -- Reserved range validation for IPv6 + CONSTRAINT external_ipv6_not_reserved CHECK ( + family(multicast_ip) != 6 OR ( + family(multicast_ip) = 6 AND + NOT multicast_ip << 'ff01::/16' AND -- Interface-local scope + NOT multicast_ip << 'ff02::/16' -- Link-local scope + ) + ), + + -- MVLAN validation (Dendrite requires >= 2) + CONSTRAINT mvlan_valid_range CHECK ( + mvlan IS NULL OR (mvlan >= 2 AND mvlan <= 4094) + ) +); + +/* + * Underlay multicast groups (admin-scoped IPv6 for VPC internal forwarding) + */ +CREATE TABLE IF NOT EXISTS omicron.public.underlay_multicast_group ( + /* Identity */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + + /* Admin-scoped IPv6 multicast address (NAT target) */ + multicast_ip INET NOT NULL, + + /* DPD tag to couple external/underlay state for this group */ + tag STRING(63), + + /* Sync versioning */ + version_added INT8 NOT NULL DEFAULT nextval('omicron.public.multicast_group_version'), + version_removed INT8, + + /* Constraints */ + -- Underlay groups: admin-local scoped IPv6 only (ff04::/16) + CONSTRAINT underlay_ipv6_admin_scoped CHECK ( + family(multicast_ip) = 6 AND multicast_ip << 'ff04::/16' + ) +); + +/* + * Multicast group membership (external groups) + */ +CREATE TABLE IF NOT EXISTS omicron.public.multicast_group_member ( + /* Identity */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + + /* External group for customer/external membership */ + external_group_id UUID NOT NULL, + + /* Parent instance or service (following external_ip pattern) */ + parent_id UUID NOT NULL, + + /* Sled hosting the parent instance (NULL when stopped) */ + sled_id UUID, + + /* RPW state for reliable operations */ + state omicron.public.multicast_group_member_state NOT NULL, + + /* Sync versioning */ + version_added INT8 NOT NULL DEFAULT nextval('omicron.public.multicast_group_version'), + version_removed INT8 +); + +/* External Multicast Group Indexes */ + +-- Version tracking for Omicron internal change detection +-- Supports: SELECT ... WHERE version_added >= ? ORDER BY version_added +CREATE UNIQUE INDEX IF NOT EXISTS multicast_group_version_added ON omicron.public.multicast_group ( + version_added +) STORING ( + name, + multicast_ip, + time_created, + time_deleted +); + +-- Version tracking for Omicron internal change detection +-- Supports: SELECT ... WHERE version_removed >= ? ORDER BY version_removed +CREATE UNIQUE INDEX IF NOT EXISTS multicast_group_version_removed ON omicron.public.multicast_group ( + version_removed +) STORING ( + name, + multicast_ip, + time_created, + time_deleted +); + +-- IP address uniqueness and conflict detection +-- Supports: SELECT ... WHERE multicast_ip = ? AND time_deleted IS NULL +CREATE UNIQUE INDEX IF NOT EXISTS lookup_external_multicast_by_ip ON omicron.public.multicast_group ( + multicast_ip +) WHERE time_deleted IS NULL; + +-- Pool management and allocation queries +-- Supports: SELECT ... WHERE ip_pool_id = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS external_multicast_by_pool ON omicron.public.multicast_group ( + ip_pool_id, + ip_pool_range_id +) WHERE time_deleted IS NULL; + +-- Underlay NAT group association +-- Supports: SELECT ... WHERE underlay_group_id = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS external_multicast_by_underlay ON omicron.public.multicast_group ( + underlay_group_id +) WHERE time_deleted IS NULL AND underlay_group_id IS NOT NULL; + +-- State-based filtering for RPW reconciler +-- Supports: SELECT ... WHERE state = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_group_by_state ON omicron.public.multicast_group ( + state +) WHERE time_deleted IS NULL; + +-- RPW reconciler composite queries (state + pool filtering) +-- Supports: SELECT ... WHERE state = ? AND ip_pool_id = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_group_reconciler_query ON omicron.public.multicast_group ( + state, + ip_pool_id +) WHERE time_deleted IS NULL; + +-- Fleet-wide unique name constraint (groups are fleet-scoped like IP pools) +-- Supports: SELECT ... WHERE name = ? AND time_deleted IS NULL +CREATE UNIQUE INDEX IF NOT EXISTS lookup_multicast_group_by_name ON omicron.public.multicast_group ( + name +) WHERE time_deleted IS NULL; + +/* Underlay Multicast Group Indexes */ + +-- Version tracking for Omicron internal change detection +-- Supports: SELECT ... WHERE version_added >= ? ORDER BY version_added +CREATE UNIQUE INDEX IF NOT EXISTS underlay_multicast_group_version_added ON omicron.public.underlay_multicast_group ( + version_added +) STORING ( + multicast_ip, + time_created, + time_deleted +); + +-- Version tracking for Omicron internal change detection +-- Supports: SELECT ... WHERE version_removed >= ? ORDER BY version_removed +CREATE UNIQUE INDEX IF NOT EXISTS underlay_multicast_group_version_removed ON omicron.public.underlay_multicast_group ( + version_removed +) STORING ( + multicast_ip, + time_created, + time_deleted +); + +-- Admin-scoped IPv6 address uniqueness +-- Supports: SELECT ... WHERE multicast_ip = ? AND time_deleted IS NULL +CREATE UNIQUE INDEX IF NOT EXISTS lookup_underlay_multicast_by_ip ON omicron.public.underlay_multicast_group ( + multicast_ip +) WHERE time_deleted IS NULL; + +-- Lifecycle management via group tags +-- Supports: SELECT ... WHERE tag = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS underlay_multicast_by_tag ON omicron.public.underlay_multicast_group ( + tag +) WHERE time_deleted IS NULL AND tag IS NOT NULL; + +/* Multicast Group Member Indexes */ + +-- Version tracking for Omicron internal change detection +-- Supports: SELECT ... WHERE version_added >= ? ORDER BY version_added +CREATE UNIQUE INDEX IF NOT EXISTS multicast_member_version_added ON omicron.public.multicast_group_member ( + version_added +) STORING ( + external_group_id, + parent_id, + time_created, + time_deleted +); + +-- Version tracking for Omicron internal change detection +-- Supports: SELECT ... WHERE version_removed >= ? ORDER BY version_removed +CREATE UNIQUE INDEX IF NOT EXISTS multicast_member_version_removed ON omicron.public.multicast_group_member ( + version_removed +) STORING ( + external_group_id, + parent_id, + time_created, + time_deleted +); + +-- Group membership listing and pagination +-- Supports: SELECT ... WHERE external_group_id = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_member_by_external_group ON omicron.public.multicast_group_member ( + external_group_id +) WHERE time_deleted IS NULL; + +-- Instance membership queries (all groups for an instance) +-- Supports: SELECT ... WHERE parent_id = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_member_by_parent ON omicron.public.multicast_group_member ( + parent_id +) WHERE time_deleted IS NULL; + +-- RPW reconciler sled-based switch port resolution +-- Supports: SELECT ... WHERE sled_id = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_member_by_sled ON omicron.public.multicast_group_member ( + sled_id +) WHERE time_deleted IS NULL; + +-- Instance-focused composite queries with group filtering +-- Supports: SELECT ... WHERE parent_id = ? AND external_group_id = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_member_by_parent_and_group ON omicron.public.multicast_group_member ( + parent_id, + external_group_id +) WHERE time_deleted IS NULL; + +-- Business logic constraint: one instance per group (also serves queries) +-- Supports: SELECT ... WHERE external_group_id = ? AND parent_id = ? AND time_deleted IS NULL +CREATE UNIQUE INDEX IF NOT EXISTS multicast_member_unique_parent_per_group ON omicron.public.multicast_group_member ( + external_group_id, + parent_id +) WHERE time_deleted IS NULL; + +-- RPW reconciler state processing by group +-- Supports: SELECT ... WHERE external_group_id = ? AND state = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_member_group_state ON omicron.public.multicast_group_member ( + external_group_id, + state +) WHERE time_deleted IS NULL; + +-- RPW cleanup of soft-deleted members +-- Supports: DELETE FROM multicast_group_member WHERE state = 'Left' AND time_deleted IS NOT NULL +CREATE INDEX IF NOT EXISTS multicast_member_cleanup ON omicron.public.multicast_group_member ( + state +) WHERE time_deleted IS NOT NULL; + +-- Saga unwinding hard deletion by group +-- Supports: DELETE FROM multicast_group_member WHERE external_group_id = ? +CREATE INDEX IF NOT EXISTS multicast_member_hard_delete_by_group ON omicron.public.multicast_group_member ( + external_group_id +); + +-- Pagination optimization for group member listing +-- Supports: SELECT ... WHERE external_group_id = ? ORDER BY id LIMIT ? OFFSET ? +CREATE INDEX IF NOT EXISTS multicast_member_group_id_order ON omicron.public.multicast_group_member ( + external_group_id, + id +) WHERE time_deleted IS NULL; + +-- Pagination optimization for instance member listing +-- Supports: SELECT ... WHERE parent_id = ? ORDER BY id LIMIT ? OFFSET ? +CREATE INDEX IF NOT EXISTS multicast_member_parent_id_order ON omicron.public.multicast_group_member ( + parent_id, + id +) WHERE time_deleted IS NULL; + +-- Instance lifecycle state transitions optimization +-- Supports: UPDATE ... WHERE parent_id = ? AND state IN (?, ?) AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_member_parent_state ON omicron.public.multicast_group_member ( + parent_id, + state +) WHERE time_deleted IS NULL; + -- Keep this at the end of file so that the database does not contain a version -- until it is fully populated. INSERT INTO omicron.public.db_metadata ( @@ -7034,7 +7378,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '208.0.0', NULL) + (TRUE, NOW(), NOW(), '209.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/multicast-group-support/up01.sql b/schema/crdb/multicast-group-support/up01.sql new file mode 100644 index 00000000000..c9428f77515 --- /dev/null +++ b/schema/crdb/multicast-group-support/up01.sql @@ -0,0 +1,339 @@ +-- Multicast group support: Add multicast groups and membership (RFD 488) + +-- Create versioning sequence for multicast group changes +CREATE SEQUENCE IF NOT EXISTS omicron.public.multicast_group_version START 1 INCREMENT 1; + +-- Multicast group state for RPW +CREATE TYPE IF NOT EXISTS omicron.public.multicast_group_state AS ENUM ( + 'creating', + 'active', + 'deleting', + 'deleted' +); + +-- Multicast group member state for RPW pattern +CREATE TYPE IF NOT EXISTS omicron.public.multicast_group_member_state AS ENUM ( + 'joining', + 'joined', + 'left' +); + +-- External multicast groups (customer-facing, allocated from IP pools) +CREATE TABLE IF NOT EXISTS omicron.public.multicast_group ( + /* Identity metadata (following Resource pattern) */ + id UUID PRIMARY KEY, + name STRING(63) NOT NULL, + description STRING(512) NOT NULL, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + + /* VNI for multicast group (derived or random) */ + vni INT4 NOT NULL, + + /* IP allocation from pools */ + ip_pool_id UUID NOT NULL, + ip_pool_range_id UUID NOT NULL, + + /* IP assigned to this multicast group */ + multicast_ip INET NOT NULL, + + /* Source-Specific Multicast (SSM) support */ + source_ips INET[] DEFAULT ARRAY[]::INET[], + + /* Multicast VLAN (MVLAN) for egress to upstream networks */ + /* Tags packets leaving the rack to traverse VLAN-segmented upstream networks */ + /* Internal rack traffic uses VNI-based underlay forwarding */ + mvlan INT2, + + /* Associated underlay group for NAT */ + /* We fill this as part of the RPW */ + underlay_group_id UUID, + + /* DPD tag to couple external/underlay state for this group */ + tag STRING(63), + + /* Current state of the multicast group (for RPW) */ + state omicron.public.multicast_group_state NOT NULL DEFAULT 'creating', + + /* Sync versioning */ + version_added INT8 NOT NULL DEFAULT nextval('omicron.public.multicast_group_version'), + version_removed INT8, + + /* Constraints */ + -- External groups: IPv4 multicast or non-admin-scoped IPv6 + CONSTRAINT external_multicast_ip_valid CHECK ( + (family(multicast_ip) = 4 AND multicast_ip << '224.0.0.0/4') OR + (family(multicast_ip) = 6 AND multicast_ip << 'ff00::/8' AND + NOT multicast_ip << 'ff04::/16' AND + NOT multicast_ip << 'ff05::/16' AND + NOT multicast_ip << 'ff08::/16') + ), + + -- Reserved range validation for IPv4 + CONSTRAINT external_ipv4_not_reserved CHECK ( + family(multicast_ip) != 4 OR ( + family(multicast_ip) = 4 AND + NOT multicast_ip << '224.0.0.0/24' AND -- Link-local control block + NOT multicast_ip << '233.0.0.0/8' AND -- GLOP addressing + NOT multicast_ip << '239.0.0.0/8' -- Administratively scoped + ) + ), + + -- Reserved range validation for IPv6 + CONSTRAINT external_ipv6_not_reserved CHECK ( + family(multicast_ip) != 6 OR ( + family(multicast_ip) = 6 AND + NOT multicast_ip << 'ff01::/16' AND -- Interface-local scope + NOT multicast_ip << 'ff02::/16' -- Link-local scope + ) + ), + + -- MVLAN validation (Dendrite requires >= 2) + CONSTRAINT mvlan_valid_range CHECK ( + mvlan IS NULL OR (mvlan >= 2 AND mvlan <= 4094) + ) +); + +-- Underlay multicast groups (admin-scoped IPv6 for VPC internal forwarding) +CREATE TABLE IF NOT EXISTS omicron.public.underlay_multicast_group ( + /* Identity */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + + /* Admin-scoped IPv6 multicast address (NAT target) */ + multicast_ip INET NOT NULL, + + /* DPD tag to couple external/underlay state for this group */ + tag STRING(63), + + /* Sync versioning */ + version_added INT8 NOT NULL DEFAULT nextval('omicron.public.multicast_group_version'), + version_removed INT8, + + /* Constraints */ + -- Underlay groups: admin-local scoped IPv6 only (ff04::/16) + CONSTRAINT underlay_ipv6_admin_scoped CHECK ( + family(multicast_ip) = 6 AND multicast_ip << 'ff04::/16' + ) +); + +-- Multicast group membership (external groups) +CREATE TABLE IF NOT EXISTS omicron.public.multicast_group_member ( + /* Identity */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + + /* External group for customer/external membership */ + external_group_id UUID NOT NULL, + + /* Parent instance or service */ + parent_id UUID NOT NULL, + + /* Sled hosting the parent instance (denormalized for performance) */ + /* NULL when instance is stopped, populated when active */ + sled_id UUID, + + /* RPW state for reliable operations */ + state omicron.public.multicast_group_member_state NOT NULL, + + /* Sync versioning */ + version_added INT8 NOT NULL DEFAULT nextval('omicron.public.multicast_group_version'), + version_removed INT8 +); + +/* External Multicast Group Indexes */ + +-- Version tracking for Omicron internal change detection +-- Supports: SELECT ... WHERE version_added >= ? ORDER BY version_added +CREATE UNIQUE INDEX IF NOT EXISTS multicast_group_version_added ON omicron.public.multicast_group ( + version_added +) STORING ( + name, + multicast_ip, + time_created, + time_deleted +); + +-- Version tracking for Omicron internal change detection +-- Supports: SELECT ... WHERE version_removed >= ? ORDER BY version_removed +CREATE UNIQUE INDEX IF NOT EXISTS multicast_group_version_removed ON omicron.public.multicast_group ( + version_removed +) STORING ( + name, + multicast_ip, + time_created, + time_deleted +); + +-- IP address uniqueness and conflict detection +-- Supports: SELECT ... WHERE multicast_ip = ? AND time_deleted IS NULL +CREATE UNIQUE INDEX IF NOT EXISTS lookup_external_multicast_by_ip ON omicron.public.multicast_group ( + multicast_ip +) WHERE time_deleted IS NULL; + +-- Pool management and allocation queries +-- Supports: SELECT ... WHERE ip_pool_id = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS external_multicast_by_pool ON omicron.public.multicast_group ( + ip_pool_id, + ip_pool_range_id +) WHERE time_deleted IS NULL; + +-- Underlay NAT group association +-- Supports: SELECT ... WHERE underlay_group_id = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS external_multicast_by_underlay ON omicron.public.multicast_group ( + underlay_group_id +) WHERE time_deleted IS NULL AND underlay_group_id IS NOT NULL; + +-- State-based filtering for RPW reconciler +-- Supports: SELECT ... WHERE state = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_group_by_state ON omicron.public.multicast_group ( + state +) WHERE time_deleted IS NULL; + +-- RPW reconciler composite queries (state + pool filtering) +-- Supports: SELECT ... WHERE state = ? AND ip_pool_id = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_group_reconciler_query ON omicron.public.multicast_group ( + state, + ip_pool_id +) WHERE time_deleted IS NULL; + +-- Fleet-wide unique name constraint (groups are fleet-scoped like IP pools) +-- Supports: SELECT ... WHERE name = ? AND time_deleted IS NULL +CREATE UNIQUE INDEX IF NOT EXISTS lookup_multicast_group_by_name ON omicron.public.multicast_group ( + name +) WHERE time_deleted IS NULL; + +/* Underlay Multicast Group Indexes */ + +-- Version tracking for Omicron internal change detection +-- Supports: SELECT ... WHERE version_added >= ? ORDER BY version_added +CREATE UNIQUE INDEX IF NOT EXISTS underlay_multicast_group_version_added ON omicron.public.underlay_multicast_group ( + version_added +) STORING ( + multicast_ip, + time_created, + time_deleted +); + +-- Version tracking for Omicron internal change detection +-- Supports: SELECT ... WHERE version_removed >= ? ORDER BY version_removed +CREATE UNIQUE INDEX IF NOT EXISTS underlay_multicast_group_version_removed ON omicron.public.underlay_multicast_group ( + version_removed +) STORING ( + multicast_ip, + time_created, + time_deleted +); + +-- Admin-scoped IPv6 address uniqueness +-- Supports: SELECT ... WHERE multicast_ip = ? AND time_deleted IS NULL +CREATE UNIQUE INDEX IF NOT EXISTS lookup_underlay_multicast_by_ip ON omicron.public.underlay_multicast_group ( + multicast_ip +) WHERE time_deleted IS NULL; + +-- Lifecycle management via group tags +-- Supports: SELECT ... WHERE tag = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS underlay_multicast_by_tag ON omicron.public.underlay_multicast_group ( + tag +) WHERE time_deleted IS NULL AND tag IS NOT NULL; + +/* Multicast Group Member Indexes */ + +-- Version tracking for Omicron internal change detection +-- Supports: SELECT ... WHERE version_added >= ? ORDER BY version_added +CREATE UNIQUE INDEX IF NOT EXISTS multicast_member_version_added ON omicron.public.multicast_group_member ( + version_added +) STORING ( + external_group_id, + parent_id, + time_created, + time_deleted +); + +-- Version tracking for Omicron internal change detection +-- Supports: SELECT ... WHERE version_removed >= ? ORDER BY version_removed +CREATE UNIQUE INDEX IF NOT EXISTS multicast_member_version_removed ON omicron.public.multicast_group_member ( + version_removed +) STORING ( + external_group_id, + parent_id, + time_created, + time_deleted +); + +-- Group membership listing and pagination +-- Supports: SELECT ... WHERE external_group_id = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_member_by_external_group ON omicron.public.multicast_group_member ( + external_group_id +) WHERE time_deleted IS NULL; + +-- Instance membership queries (all groups for an instance) +-- Supports: SELECT ... WHERE parent_id = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_member_by_parent ON omicron.public.multicast_group_member ( + parent_id +) WHERE time_deleted IS NULL; + +-- RPW reconciler sled-based switch port resolution +-- Supports: SELECT ... WHERE sled_id = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_member_by_sled ON omicron.public.multicast_group_member ( + sled_id +) WHERE time_deleted IS NULL; + +-- Instance-focused composite queries with group filtering +-- Supports: SELECT ... WHERE parent_id = ? AND external_group_id = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_member_by_parent_and_group ON omicron.public.multicast_group_member ( + parent_id, + external_group_id +) WHERE time_deleted IS NULL; + +-- Business logic constraint: one instance per group (also serves queries) +-- Supports: SELECT ... WHERE external_group_id = ? AND parent_id = ? AND time_deleted IS NULL +CREATE UNIQUE INDEX IF NOT EXISTS multicast_member_unique_parent_per_group ON omicron.public.multicast_group_member ( + external_group_id, + parent_id +) WHERE time_deleted IS NULL; + +-- RPW reconciler state processing by group +-- Supports: SELECT ... WHERE external_group_id = ? AND state = ? AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_member_group_state ON omicron.public.multicast_group_member ( + external_group_id, + state +) WHERE time_deleted IS NULL; + +-- RPW cleanup of soft-deleted members +-- Supports: DELETE FROM multicast_group_member WHERE state = 'Left' AND time_deleted IS NOT NULL +CREATE INDEX IF NOT EXISTS multicast_member_cleanup ON omicron.public.multicast_group_member ( + state +) WHERE time_deleted IS NOT NULL; + +-- Saga unwinding hard deletion by group +-- Supports: DELETE FROM multicast_group_member WHERE external_group_id = ? +CREATE INDEX IF NOT EXISTS multicast_member_hard_delete_by_group ON omicron.public.multicast_group_member ( + external_group_id +); + +-- Pagination optimization for group member listing +-- Supports: SELECT ... WHERE external_group_id = ? ORDER BY id LIMIT ? OFFSET ? +CREATE INDEX IF NOT EXISTS multicast_member_group_id_order ON omicron.public.multicast_group_member ( + external_group_id, + id +) WHERE time_deleted IS NULL; + +-- Pagination optimization for instance member listing +-- Supports: SELECT ... WHERE parent_id = ? ORDER BY id LIMIT ? OFFSET ? +CREATE INDEX IF NOT EXISTS multicast_member_parent_id_order ON omicron.public.multicast_group_member ( + parent_id, + id +) WHERE time_deleted IS NULL; + +-- Instance lifecycle state transitions optimization +-- Supports: UPDATE ... WHERE parent_id = ? AND state IN (?, ?) AND time_deleted IS NULL +CREATE INDEX IF NOT EXISTS multicast_member_parent_state ON omicron.public.multicast_group_member ( + parent_id, + state +) WHERE time_deleted IS NULL; diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 0bbb9052409..e29503d3ab4 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -94,6 +94,7 @@ sled-diagnostics.workspace = true sled-hardware.workspace = true sled-hardware-types.workspace = true sled-storage.workspace = true +sp-sim.workspace = true slog.workspace = true slog-async.workspace = true slog-dtrace.workspace = true @@ -131,8 +132,10 @@ http.workspace = true hyper.workspace = true nexus-reconfigurator-blippy.workspace = true omicron-test-utils.workspace = true +progenitor.workspace = true pretty_assertions.workspace = true rcgen.workspace = true +regress.workspace = true reqwest = { workspace = true, features = ["blocking"] } subprocess.workspace = true slog-async.workspace = true diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs index 4ee3de420cf..d37f406af54 100644 --- a/sled-agent/api/src/lib.rs +++ b/sled-agent/api/src/lib.rs @@ -40,8 +40,8 @@ use sled_agent_types::{ early_networking::EarlyNetworkConfig, firewall_rules::VpcFirewallRulesEnsureBody, instance::{ - InstanceEnsureBody, InstanceExternalIpBody, VmmPutStateBody, - VmmPutStateResponse, VmmUnregisterResponse, + InstanceExternalIpBody, VmmPutStateBody, VmmPutStateResponse, + VmmUnregisterResponse, }, sled::AddSledRequest, zone_bundle::{ @@ -55,6 +55,8 @@ use uuid::Uuid; /// Copies of data types that changed between v3 and v4. mod v3; +/// Copies of data types that changed between previous versions and v7. +pub mod v7; api_versions!([ // WHEN CHANGING THE API (part 1 of 2): @@ -68,6 +70,7 @@ api_versions!([ // | example for the next person. // v // (next_int, IDENT), + (7, MULTICAST_SUPPORT), (6, ADD_PROBE_PUT_ENDPOINT), (5, NEWTYPE_UUID_BUMP), (4, ADD_NEXUS_LOCKSTEP_PORT_TO_INVENTORY), @@ -359,16 +362,30 @@ pub trait SledAgentApi { #[endpoint { method = PUT, path = "/vmms/{propolis_id}", + operation_id = "vmm_register", + versions = VERSION_INITIAL..VERSION_MULTICAST_SUPPORT }] - async fn vmm_register( + async fn vmm_register_v1( rqctx: RequestContext, path_params: Path, - body: TypedBody, + body: TypedBody, ) -> Result, HttpError>; #[endpoint { - method = DELETE, + method = PUT, path = "/vmms/{propolis_id}", + operation_id = "vmm_register", + versions = VERSION_MULTICAST_SUPPORT.. + }] + async fn vmm_register_v7( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError>; + + #[endpoint { + method = DELETE, + path = "/vmms/{propolis_id}" }] async fn vmm_unregister( rqctx: RequestContext, @@ -414,6 +431,28 @@ pub trait SledAgentApi { body: TypedBody, ) -> Result; + #[endpoint { + method = PUT, + path = "/vmms/{propolis_id}/multicast-group", + versions = VERSION_MULTICAST_SUPPORT.., + }] + async fn vmm_join_multicast_group( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result; + + #[endpoint { + method = DELETE, + path = "/vmms/{propolis_id}/multicast-group", + versions = VERSION_MULTICAST_SUPPORT.., + }] + async fn vmm_leave_multicast_group( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result; + #[endpoint { method = PUT, path = "/disks/{disk_id}", diff --git a/sled-agent/api/src/v7.rs b/sled-agent/api/src/v7.rs new file mode 100644 index 00000000000..4e096e5415f --- /dev/null +++ b/sled-agent/api/src/v7.rs @@ -0,0 +1,90 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Sled agent API types (version 7) +//! +//! Version 7 adds support for multicast group management on instances. + +use std::net::{IpAddr, SocketAddr}; + +use omicron_common::api::{ + external::Hostname, + internal::{ + nexus::VmmRuntimeState, + shared::{ + DhcpConfig, NetworkInterface, ResolvedVpcFirewallRule, + SourceNatConfig, + }, + }, +}; +use omicron_uuid_kinds::InstanceUuid; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use sled_agent_types::instance::{InstanceMetadata, VmmSpec}; + +/// The body of a request to ensure that a instance and VMM are known to a sled +/// agent (version 7, with multicast support). +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct InstanceEnsureBody { + /// The virtual hardware configuration this virtual machine should have when + /// it is started. + pub vmm_spec: VmmSpec, + + /// Information about the sled-local configuration that needs to be + /// established to make the VM's virtual hardware fully functional. + pub local_config: InstanceSledLocalConfig, + + /// The initial VMM runtime state for the VMM being registered. + pub vmm_runtime: VmmRuntimeState, + + /// The ID of the instance for which this VMM is being created. + pub instance_id: InstanceUuid, + + /// The ID of the migration in to this VMM, if this VMM is being + /// ensured is part of a migration in. If this is `None`, the VMM is not + /// being created due to a migration. + pub migration_id: Option, + + /// The address at which this VMM should serve a Propolis server API. + pub propolis_addr: SocketAddr, + + /// Metadata used to track instance statistics. + pub metadata: InstanceMetadata, +} + +/// Describes sled-local configuration that a sled-agent must establish to make +/// the instance's virtual hardware fully functional (version 7, with multicast). +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +pub struct InstanceSledLocalConfig { + pub hostname: Hostname, + pub nics: Vec, + pub source_nat: SourceNatConfig, + /// Zero or more external IP addresses (either floating or ephemeral), + /// provided to an instance to allow inbound connectivity. + pub ephemeral_ip: Option, + pub floating_ips: Vec, + pub multicast_groups: Vec, + pub firewall_rules: Vec, + pub dhcp_config: DhcpConfig, +} + +/// Represents a multicast group membership for an instance. +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct InstanceMulticastMembership { + pub group_ip: IpAddr, + // For Source-Specific Multicast (SSM) + pub sources: Vec, +} + +/// Request body for multicast group operations. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum InstanceMulticastBody { + Join(InstanceMulticastMembership), + Leave(InstanceMulticastMembership), +} diff --git a/sled-agent/src/bin/sled-agent-sim.rs b/sled-agent/src/bin/sled-agent-sim.rs index 675b5eb77d8..e18ab69c213 100644 --- a/sled-agent/src/bin/sled-agent-sim.rs +++ b/sled-agent/src/bin/sled-agent-sim.rs @@ -116,7 +116,7 @@ async fn do_run() -> Result<(), CmdError> { cpu_family: SledCpuFamily::AmdMilan, baseboard: Baseboard::Gimlet { identifier: format!("sim-{}", args.uuid), - model: String::from("sim-gimlet"), + model: String::from(sp_sim::FAKE_GIMLET_MODEL), revision: 3, }, }, diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 2a9da41f2bb..a3c6a0ec92f 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -32,8 +32,8 @@ use sled_agent_types::disk::DiskEnsureBody; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody; use sled_agent_types::instance::{ - InstanceEnsureBody, InstanceExternalIpBody, VmmPutStateBody, - VmmPutStateResponse, VmmUnregisterResponse, + InstanceExternalIpBody, VmmPutStateBody, VmmPutStateResponse, + VmmUnregisterResponse, }; use sled_agent_types::probes::ProbeSet; use sled_agent_types::sled::AddSledRequest; @@ -489,16 +489,29 @@ impl SledAgentApi for SledAgentImpl { Ok(HttpResponseOk(sa.get_role())) } - async fn vmm_register( + async fn vmm_register_v1( rqctx: RequestContext, path_params: Path, - body: TypedBody, + body: TypedBody, ) -> Result, HttpError> { let sa = rqctx.context(); let propolis_id = path_params.into_inner().propolis_id; let body_args = body.into_inner(); Ok(HttpResponseOk( - sa.instance_ensure_registered(propolis_id, body_args).await?, + sa.instance_ensure_registered_v1(propolis_id, body_args).await?, + )) + } + + async fn vmm_register_v7( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let propolis_id = path_params.into_inner().propolis_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.instance_ensure_registered_v7(propolis_id, body_args).await?, )) } @@ -555,6 +568,30 @@ impl SledAgentApi for SledAgentImpl { Ok(HttpResponseUpdatedNoContent()) } + async fn vmm_join_multicast_group( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let id = path_params.into_inner().propolis_id; + let body_args = body.into_inner(); + sa.instance_join_multicast_group(id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } + + async fn vmm_leave_multicast_group( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let id = path_params.into_inner().propolis_id; + let body_args = body.into_inner(); + sa.instance_leave_multicast_group(id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } + async fn disk_put( rqctx: RequestContext, path_params: Path, diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 12e1c39adf1..9bb95964804 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -14,6 +14,7 @@ use crate::metrics::MetricsRequestQueue; use crate::nexus::NexusClient; use crate::profile::*; use crate::zone_bundle::ZoneBundler; + use chrono::Utc; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; @@ -36,6 +37,9 @@ use propolis_client::Client as PropolisClient; use propolis_client::instance_spec::{ComponentV0, SpecKey}; use rand::SeedableRng; use rand::prelude::IteratorRandom; +use sled_agent_api::v7::{ + InstanceMulticastMembership, InstanceSledLocalConfig, +}; use sled_agent_config_reconciler::AvailableDatasetsReceiver; use sled_agent_types::instance::*; use sled_agent_types::zone_bundle::ZoneBundleCause; @@ -238,6 +242,18 @@ enum InstanceRequest { RefreshExternalIps { tx: oneshot::Sender>, }, + JoinMulticastGroup { + membership: InstanceMulticastMembership, + tx: oneshot::Sender>, + }, + LeaveMulticastGroup { + membership: InstanceMulticastMembership, + tx: oneshot::Sender>, + }, + #[allow(dead_code)] + RefreshMulticastGroups { + tx: oneshot::Sender>, + }, } impl InstanceRequest { @@ -279,7 +295,10 @@ impl InstanceRequest { Self::IssueSnapshotRequest { tx, .. } | Self::AddExternalIp { tx, .. } | Self::DeleteExternalIp { tx, .. } - | Self::RefreshExternalIps { tx } => tx + | Self::RefreshExternalIps { tx } + | Self::JoinMulticastGroup { tx, .. } + | Self::LeaveMulticastGroup { tx, .. } + | Self::RefreshMulticastGroups { tx } => tx .send(Err(error.into())) .map_err(|_| Error::FailedSendClientClosed), } @@ -520,6 +539,8 @@ struct InstanceRunner { source_nat: SourceNatConfig, ephemeral_ip: Option, floating_ips: Vec, + // Multicast groups to which this instance belongs. + multicast_groups: Vec, firewall_rules: Vec, dhcp_config: DhcpCfg, @@ -708,6 +729,18 @@ impl InstanceRunner { RefreshExternalIps { tx } => { tx.send(self.refresh_external_ips().map_err(|e| e.into())) .map_err(|_| Error::FailedSendClientClosed) + }, + JoinMulticastGroup { membership, tx } => { + tx.send(self.join_multicast_group(&membership).await.map_err(|e| e.into())) + .map_err(|_| Error::FailedSendClientClosed) + }, + LeaveMulticastGroup { membership, tx } => { + tx.send(self.leave_multicast_group(&membership).await.map_err(|e| e.into())) + .map_err(|_| Error::FailedSendClientClosed) + }, + RefreshMulticastGroups { tx } => { + tx.send(self.refresh_multicast_groups().map_err(|e| e.into())) + .map_err(|_| Error::FailedSendClientClosed) } } }; @@ -806,6 +839,15 @@ impl InstanceRunner { RefreshExternalIps { tx } => { tx.send(Err(Error::Terminating.into())).map_err(|_| ()) } + JoinMulticastGroup { tx, .. } => { + tx.send(Err(Error::Terminating.into())).map_err(|_| ()) + } + LeaveMulticastGroup { tx, .. } => { + tx.send(Err(Error::Terminating.into())).map_err(|_| ()) + } + RefreshMulticastGroups { tx } => { + tx.send(Err(Error::Terminating.into())).map_err(|_| ()) + } }; } @@ -1640,6 +1682,7 @@ impl Instance { source_nat: local_config.source_nat, ephemeral_ip: local_config.ephemeral_ip, floating_ips: local_config.floating_ips, + multicast_groups: local_config.multicast_groups, firewall_rules: local_config.firewall_rules, dhcp_config, state: InstanceStates::new(vmm_runtime, migration_id), @@ -1773,6 +1816,42 @@ impl Instance { .try_send(InstanceRequest::RefreshExternalIps { tx }) .or_else(InstanceRequest::fail_try_send) } + + pub fn join_multicast_group( + &self, + tx: oneshot::Sender>, + membership: &InstanceMulticastMembership, + ) -> Result<(), Error> { + self.tx + .try_send(InstanceRequest::JoinMulticastGroup { + membership: membership.clone(), + tx, + }) + .or_else(InstanceRequest::fail_try_send) + } + + pub fn leave_multicast_group( + &self, + tx: oneshot::Sender>, + membership: &InstanceMulticastMembership, + ) -> Result<(), Error> { + self.tx + .try_send(InstanceRequest::LeaveMulticastGroup { + membership: membership.clone(), + tx, + }) + .or_else(InstanceRequest::fail_try_send) + } + + #[allow(dead_code)] + pub fn refresh_multicast_groups( + &self, + tx: oneshot::Sender>, + ) -> Result<(), Error> { + self.tx + .try_send(InstanceRequest::RefreshMulticastGroups { tx }) + .or_else(InstanceRequest::fail_try_send) + } } // TODO: Move this implementation higher. I'm just keeping it here to make the @@ -2255,6 +2334,141 @@ impl InstanceRunner { fn refresh_external_ips(&mut self) -> Result<(), Error> { self.refresh_external_ips_inner() } + + async fn join_multicast_group( + &mut self, + membership: &InstanceMulticastMembership, + ) -> Result<(), Error> { + // Similar logic to add_external_ip - save state for rollback + let out = self.join_multicast_group_inner(membership).await; + + if out.is_err() { + // Rollback state on error + self.multicast_groups.retain(|m| m != membership); + } + out + } + + async fn leave_multicast_group( + &mut self, + membership: &InstanceMulticastMembership, + ) -> Result<(), Error> { + // Similar logic to delete_external_ip - save state for rollback + let out = self.leave_multicast_group_inner(membership).await; + + if out.is_err() { + // Rollback state on error - readd the membership if it was removed + if !self.multicast_groups.contains(membership) { + self.multicast_groups.push(membership.clone()); + } + } + out + } + + fn refresh_multicast_groups(&mut self) -> Result<(), Error> { + self.refresh_multicast_groups_inner() + } + + async fn join_multicast_group_inner( + &mut self, + membership: &InstanceMulticastMembership, + ) -> Result<(), Error> { + // Check for duplicate membership (idempotency) + if self.multicast_groups.contains(membership) { + return Ok(()); + } + + // Add to local state + self.multicast_groups.push(membership.clone()); + + // Update OPTE configuration + let Some(primary_nic) = self.primary_nic() else { + return Err(Error::Opte(illumos_utils::opte::Error::NoPrimaryNic)); + }; + + // Convert InstanceMulticastMembership to MulticastGroupCfg + let multicast_cfg: Vec = self + .multicast_groups + .iter() + .map(|membership| illumos_utils::opte::MulticastGroupCfg { + group_ip: membership.group_ip, + sources: membership.sources.clone(), + }) + .collect(); + + // Validate multicast configuration with OPTE + self.port_manager.multicast_groups_ensure( + primary_nic.id, + primary_nic.kind, + &multicast_cfg, + )?; + + // TODO: Configure underlay multicast group addresses on the zone's vNIC. + // This should add the multicast group addresses to the zone's network + // interface so it can receive underlay multicast traffic (physical + // network layer). Rack-wide dataplane forwarding is handled by the + // RPW reconciler + DPD. + // See also: port_manager.rs multicast_groups_ensure() TODO about + // configuring OPTE port-level multicast group membership. + + Ok(()) + } + + async fn leave_multicast_group_inner( + &mut self, + membership: &InstanceMulticastMembership, + ) -> Result<(), Error> { + // Remove from local state + self.multicast_groups.retain(|m| m != membership); + + // Update OPTE configuration + let Some(primary_nic) = self.primary_nic() else { + return Err(Error::Opte(illumos_utils::opte::Error::NoPrimaryNic)); + }; + + // Convert InstanceMulticastMembership to MulticastGroupCfg + let multicast_cfg: Vec = self + .multicast_groups + .iter() + .map(|membership| illumos_utils::opte::MulticastGroupCfg { + group_ip: membership.group_ip, + sources: membership.sources.clone(), + }) + .collect(); + + self.port_manager.multicast_groups_ensure( + primary_nic.id, + primary_nic.kind, + &multicast_cfg, + )?; + + Ok(()) + } + + fn refresh_multicast_groups_inner(&mut self) -> Result<(), Error> { + // Update OPTE configuration + let Some(primary_nic) = self.primary_nic() else { + return Err(Error::Opte(illumos_utils::opte::Error::NoPrimaryNic)); + }; + + // Convert InstanceMulticastMembership to MulticastGroupCfg + let multicast_cfg: Vec = self + .multicast_groups + .iter() + .map(|membership| illumos_utils::opte::MulticastGroupCfg { + group_ip: membership.group_ip, + sources: membership.sources.clone(), + }) + .collect(); + + self.port_manager.multicast_groups_ensure( + primary_nic.id, + primary_nic.kind, + &multicast_cfg, + )?; + + Ok(()) + } } #[cfg(all(test, target_os = "illumos"))] @@ -2277,6 +2491,7 @@ mod tests { use propolis_client::types::{ InstanceMigrateStatusResponse, InstanceStateMonitorResponse, }; + use sled_agent_api::v7::InstanceEnsureBody; use sled_agent_config_reconciler::{ CurrentlyManagedZpoolsReceiver, InternalDiskDetails, InternalDisksReceiver, @@ -2486,6 +2701,7 @@ mod tests { .unwrap(), ephemeral_ip: None, floating_ips: vec![], + multicast_groups: vec![], firewall_rules: vec![], dhcp_config: DhcpConfig { dns_servers: vec![], @@ -3093,6 +3309,7 @@ mod tests { source_nat: local_config.source_nat, ephemeral_ip: local_config.ephemeral_ip, floating_ips: local_config.floating_ips, + multicast_groups: local_config.multicast_groups, firewall_rules: local_config.firewall_rules, dhcp_config, state: InstanceStates::new(vmm_runtime, migration_id), @@ -3295,4 +3512,25 @@ mod tests { assert_eq!(state.vmm_state.state, VmmState::Failed); logctx.cleanup_successful(); } + + #[test] + fn test_multicast_membership_equality() { + let membership1 = InstanceMulticastMembership { + group_ip: IpAddr::V4(Ipv4Addr::new(239, 1, 1, 1)), + sources: vec![], + }; + + let membership2 = InstanceMulticastMembership { + group_ip: IpAddr::V4(Ipv4Addr::new(239, 1, 1, 1)), + sources: vec![], + }; + + let membership3 = InstanceMulticastMembership { + group_ip: IpAddr::V4(Ipv4Addr::new(239, 1, 1, 2)), + sources: vec![], + }; + + assert_eq!(membership1, membership2); + assert_ne!(membership1, membership3); + } } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index fa8a11c89d8..540b71195a7 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -20,6 +20,7 @@ use omicron_common::api::external::ByteCount; use omicron_common::api::internal::nexus::SledVmmState; use omicron_common::api::internal::shared::SledIdentifiers; use omicron_uuid_kinds::PropolisUuid; +use sled_agent_api::v7::{InstanceEnsureBody, InstanceMulticastBody}; use sled_agent_config_reconciler::AvailableDatasetsReceiver; use sled_agent_config_reconciler::CurrentlyManagedZpoolsReceiver; use sled_agent_types::instance::*; @@ -300,6 +301,44 @@ impl InstanceManager { rx.await? } + pub async fn join_multicast_group( + &self, + propolis_id: PropolisUuid, + multicast_body: &InstanceMulticastBody, + ) -> Result<(), Error> { + let (tx, rx) = oneshot::channel(); + self.inner + .tx + .send(InstanceManagerRequest::JoinMulticastGroup { + propolis_id, + multicast_body: multicast_body.clone(), + tx, + }) + .await + .map_err(|_| Error::FailedSendInstanceManagerClosed)?; + + rx.await? + } + + pub async fn leave_multicast_group( + &self, + propolis_id: PropolisUuid, + multicast_body: &InstanceMulticastBody, + ) -> Result<(), Error> { + let (tx, rx) = oneshot::channel(); + self.inner + .tx + .send(InstanceManagerRequest::LeaveMulticastGroup { + propolis_id, + multicast_body: multicast_body.clone(), + tx, + }) + .await + .map_err(|_| Error::FailedSendInstanceManagerClosed)?; + + rx.await? + } + /// Returns the last-set size of the reservoir pub fn reservoir_size(&self) -> ByteCount { self.inner.vmm_reservoir_manager.reservoir_size() @@ -367,6 +406,16 @@ enum InstanceManagerRequest { RefreshExternalIps { tx: oneshot::Sender>, }, + JoinMulticastGroup { + propolis_id: PropolisUuid, + multicast_body: InstanceMulticastBody, + tx: oneshot::Sender>, + }, + LeaveMulticastGroup { + propolis_id: PropolisUuid, + multicast_body: InstanceMulticastBody, + tx: oneshot::Sender>, + }, GetState { propolis_id: PropolisUuid, tx: oneshot::Sender>, @@ -485,6 +534,12 @@ impl InstanceManagerRunner { }, Some(RefreshExternalIps { tx }) => { self.refresh_external_ips(tx) + }, + Some(JoinMulticastGroup { propolis_id, multicast_body, tx }) => { + self.join_multicast_group(tx, propolis_id, &multicast_body) + }, + Some(LeaveMulticastGroup { propolis_id, multicast_body, tx }) => { + self.leave_multicast_group(tx, propolis_id, &multicast_body) } Some(GetState { propolis_id, tx }) => { // TODO(eliza): it could potentially be nice to @@ -741,6 +796,48 @@ impl InstanceManagerRunner { Ok(()) } + fn join_multicast_group( + &self, + tx: oneshot::Sender>, + propolis_id: PropolisUuid, + multicast_body: &InstanceMulticastBody, + ) -> Result<(), Error> { + let Some(instance) = self.get_propolis(propolis_id) else { + return Err(Error::NoSuchVmm(propolis_id)); + }; + + match multicast_body { + InstanceMulticastBody::Join(membership) => { + instance.join_multicast_group(tx, membership)?; + } + InstanceMulticastBody::Leave(membership) => { + instance.leave_multicast_group(tx, membership)?; + } + } + Ok(()) + } + + fn leave_multicast_group( + &self, + tx: oneshot::Sender>, + propolis_id: PropolisUuid, + multicast_body: &InstanceMulticastBody, + ) -> Result<(), Error> { + let Some(instance) = self.get_propolis(propolis_id) else { + return Err(Error::NoSuchVmm(propolis_id)); + }; + + match multicast_body { + InstanceMulticastBody::Join(membership) => { + instance.join_multicast_group(tx, membership)?; + } + InstanceMulticastBody::Leave(membership) => { + instance.leave_multicast_group(tx, membership)?; + } + } + Ok(()) + } + fn get_instance_state( &self, tx: oneshot::Sender>, diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 60d6d9f0fb8..31fa6bfbceb 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -73,6 +73,7 @@ impl Server { ..config.dropshot.clone() }; let dropshot_log = log.new(o!("component" => "dropshot (SledAgent)")); + let http_server = dropshot::ServerBuilder::new(http_api(), sled_agent, dropshot_log) .config(dropshot_config) @@ -84,7 +85,6 @@ impl Server { ))) .start() .map_err(|error| format!("initializing server: {}", error))?; - Ok(Server { http_server }) } diff --git a/sled-agent/src/sim/config.rs b/sled-agent/src/sim/config.rs index 58454d2a507..744ebb1bea3 100644 --- a/sled-agent/src/sim/config.rs +++ b/sled-agent/src/sim/config.rs @@ -11,6 +11,7 @@ use omicron_uuid_kinds::SledUuid; use serde::Deserialize; use serde::Serialize; pub use sled_hardware_types::{Baseboard, SledCpuFamily}; +use sp_sim::FAKE_GIMLET_MODEL; use std::net::Ipv6Addr; use std::net::{IpAddr, SocketAddr}; @@ -100,6 +101,26 @@ impl Config { update_directory: Option<&Utf8Path>, zpool_config: ZpoolConfig, cpu_family: SledCpuFamily, + ) -> Config { + Self::for_testing_with_baseboard( + id, + sim_mode, + nexus_address, + update_directory, + zpool_config, + cpu_family, + None, + ) + } + + pub fn for_testing_with_baseboard( + id: SledUuid, + sim_mode: SimMode, + nexus_address: Option, + update_directory: Option<&Utf8Path>, + zpool_config: ZpoolConfig, + cpu_family: SledCpuFamily, + baseboard_serial: Option, ) -> Config { // This IP range is guaranteed by RFC 6666 to discard traffic. // For tests that don't use a Nexus, we use this address to simulate a @@ -120,6 +141,11 @@ impl Config { } }; + // If a baseboard serial number is provided, use it; otherwise, generate + // a default one based on the sled ID. + let baseboard_identifier = + baseboard_serial.unwrap_or_else(|| format!("sim-{id}")); + Config { id, sim_mode, @@ -142,8 +168,8 @@ impl Config { reservoir_ram: TEST_RESERVOIR_RAM, cpu_family, baseboard: Baseboard::Gimlet { - identifier: format!("sim-{}", id), - model: String::from("sim-gimlet"), + identifier: baseboard_identifier, + model: String::from(FAKE_GIMLET_MODEL), revision: 3, }, }, diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 17750f5ba79..94d0903f7a7 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -36,12 +36,12 @@ use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, }; use range_requests::PotentialRange; +use sled_agent_api::v7::InstanceMulticastBody; use sled_agent_api::*; use sled_agent_types::bootstore::BootstoreStatus; use sled_agent_types::disk::DiskEnsureBody; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody; -use sled_agent_types::instance::InstanceEnsureBody; use sled_agent_types::instance::InstanceExternalIpBody; use sled_agent_types::instance::VmmPutStateBody; use sled_agent_types::instance::VmmPutStateResponse; @@ -82,10 +82,23 @@ enum SledAgentSimImpl {} impl SledAgentApi for SledAgentSimImpl { type Context = Arc; - async fn vmm_register( + async fn vmm_register_v1( rqctx: RequestContext, path_params: Path, - body: TypedBody, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let propolis_id = path_params.into_inner().propolis_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.instance_register_v1(propolis_id, body_args).await?, + )) + } + + async fn vmm_register_v7( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, ) -> Result, HttpError> { let sa = rqctx.context(); let propolis_id = path_params.into_inner().propolis_id; @@ -146,6 +159,58 @@ impl SledAgentApi for SledAgentSimImpl { Ok(HttpResponseUpdatedNoContent()) } + async fn vmm_join_multicast_group( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let propolis_id = path_params.into_inner().propolis_id; + let body_args = body.into_inner(); + + match body_args { + InstanceMulticastBody::Join(membership) => { + sa.instance_join_multicast_group(propolis_id, &membership) + .await?; + } + InstanceMulticastBody::Leave(_) => { + // This endpoint is for joining - reject leave operations + return Err(HttpError::for_bad_request( + None, + "Join endpoint cannot process Leave operations".to_string(), + )); + } + } + + Ok(HttpResponseUpdatedNoContent()) + } + + async fn vmm_leave_multicast_group( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let propolis_id = path_params.into_inner().propolis_id; + let body_args = body.into_inner(); + + match body_args { + InstanceMulticastBody::Leave(membership) => { + sa.instance_leave_multicast_group(propolis_id, &membership) + .await?; + } + InstanceMulticastBody::Join(_) => { + // This endpoint is for leaving - reject join operations + return Err(HttpError::for_bad_request( + None, + "Leave endpoint cannot process Join operations".to_string(), + )); + } + } + + Ok(HttpResponseUpdatedNoContent()) + } + async fn disk_put( rqctx: RequestContext, path_params: Path, diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 4648490b2a0..efda700b747 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -56,14 +56,16 @@ use propolis_client::{ }; use range_requests::PotentialRange; use sled_agent_api::SupportBundleMetadata; +use sled_agent_api::v7::InstanceMulticastMembership; use sled_agent_types::disk::DiskStateRequested; use sled_agent_types::early_networking::{ EarlyNetworkConfig, EarlyNetworkConfigBody, }; use sled_agent_types::instance::{ - InstanceEnsureBody, InstanceExternalIpBody, VmmPutStateResponse, - VmmStateRequested, VmmUnregisterResponse, + InstanceExternalIpBody, VmmPutStateResponse, VmmStateRequested, + VmmUnregisterResponse, }; + use slog::Logger; use std::collections::{HashMap, HashSet}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; @@ -99,6 +101,9 @@ pub struct SledAgent { /// lists of external IPs assigned to instances pub external_ips: Mutex>>, + /// multicast group memberships for instances + pub multicast_groups: + Mutex>>, pub vpc_routes: Mutex>, config: Config, fake_zones: Mutex, @@ -180,6 +185,7 @@ impl SledAgent { simulated_upstairs, v2p_mappings: Mutex::new(HashSet::new()), external_ips: Mutex::new(HashMap::new()), + multicast_groups: Mutex::new(HashMap::new()), vpc_routes: Mutex::new(HashMap::new()), mock_propolis: futures::lock::Mutex::new(None), config: config.clone(), @@ -197,12 +203,40 @@ impl SledAgent { /// Idempotently ensures that the given API Instance (described by /// `api_instance`) exists on this server in the given runtime state /// (described by `target`). + // Keep the v1 method for compatibility but it just delegates to v2 + pub async fn instance_register_v1( + self: &Arc, + propolis_id: PropolisUuid, + instance: sled_agent_types::instance::InstanceEnsureBody, + ) -> Result { + // Convert v1 to v7 for internal processing + let v5_instance = sled_agent_api::v7::InstanceEnsureBody { + vmm_spec: instance.vmm_spec, + local_config: sled_agent_api::v7::InstanceSledLocalConfig { + hostname: instance.local_config.hostname, + nics: instance.local_config.nics, + source_nat: instance.local_config.source_nat, + ephemeral_ip: instance.local_config.ephemeral_ip, + floating_ips: instance.local_config.floating_ips, + multicast_groups: Vec::new(), // v1 doesn't support multicast + firewall_rules: instance.local_config.firewall_rules, + dhcp_config: instance.local_config.dhcp_config, + }, + vmm_runtime: instance.vmm_runtime, + instance_id: instance.instance_id, + migration_id: instance.migration_id, + propolis_addr: instance.propolis_addr, + metadata: instance.metadata, + }; + self.instance_register(propolis_id, v5_instance).await + } + pub async fn instance_register( self: &Arc, propolis_id: PropolisUuid, - instance: InstanceEnsureBody, + instance: sled_agent_api::v7::InstanceEnsureBody, ) -> Result { - let InstanceEnsureBody { + let sled_agent_api::v7::InstanceEnsureBody { vmm_spec, local_config, instance_id, @@ -685,6 +719,44 @@ impl SledAgent { Ok(()) } + pub async fn instance_join_multicast_group( + &self, + propolis_id: PropolisUuid, + membership: &sled_agent_api::v7::InstanceMulticastMembership, + ) -> Result<(), Error> { + if !self.vmms.contains_key(&propolis_id.into_untyped_uuid()).await { + return Err(Error::internal_error( + "can't join multicast group for VMM that's not registered", + )); + } + + let mut groups = self.multicast_groups.lock().unwrap(); + let my_groups = groups.entry(propolis_id).or_default(); + + my_groups.insert(membership.clone()); + + Ok(()) + } + + pub async fn instance_leave_multicast_group( + &self, + propolis_id: PropolisUuid, + membership: &sled_agent_api::v7::InstanceMulticastMembership, + ) -> Result<(), Error> { + if !self.vmms.contains_key(&propolis_id.into_untyped_uuid()).await { + return Err(Error::internal_error( + "can't leave multicast group for VMM that's not registered", + )); + } + + let mut groups = self.multicast_groups.lock().unwrap(); + let my_groups = groups.entry(propolis_id).or_default(); + + my_groups.remove(membership); + + Ok(()) + } + /// Used for integration tests that require a component to talk to a /// mocked propolis-server API. Returns the socket on which the dropshot /// service is listening, which *must* be patched into Nexus with diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index a17bb48d6b4..d9a3e7c89b2 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -54,6 +54,7 @@ use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{ GenericUuid, MupdateOverrideUuid, PropolisUuid, SledUuid, }; +use sled_agent_api::v7::{InstanceEnsureBody, InstanceMulticastBody}; use sled_agent_config_reconciler::{ ConfigReconcilerHandle, ConfigReconcilerSpawnToken, InternalDisks, InternalDisksReceiver, LedgerNewConfigError, LedgerTaskError, @@ -62,8 +63,8 @@ use sled_agent_config_reconciler::{ use sled_agent_types::disk::DiskStateRequested; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::instance::{ - InstanceEnsureBody, InstanceExternalIpBody, VmmPutStateResponse, - VmmStateRequested, VmmUnregisterResponse, + InstanceExternalIpBody, VmmPutStateResponse, VmmStateRequested, + VmmUnregisterResponse, }; use sled_agent_types::probes::ProbeCreate; use sled_agent_types::sled::{BaseboardId, StartSledAgentRequest}; @@ -841,7 +842,42 @@ impl SledAgent { /// Idempotently ensures that a given instance is registered with this sled, /// i.e., that it can be addressed by future calls to /// [`Self::instance_ensure_state`]. - pub async fn instance_ensure_registered( + pub async fn instance_ensure_registered_v1( + &self, + propolis_id: PropolisUuid, + instance: sled_agent_types::instance::InstanceEnsureBody, + ) -> Result { + // Convert v1 to v7 + let v5_instance = sled_agent_api::v7::InstanceEnsureBody { + vmm_spec: instance.vmm_spec, + local_config: sled_agent_api::v7::InstanceSledLocalConfig { + hostname: instance.local_config.hostname, + nics: instance.local_config.nics, + source_nat: instance.local_config.source_nat, + ephemeral_ip: instance.local_config.ephemeral_ip, + floating_ips: instance.local_config.floating_ips, + multicast_groups: Vec::new(), // v1 doesn't support multicast + firewall_rules: instance.local_config.firewall_rules, + dhcp_config: instance.local_config.dhcp_config, + }, + vmm_runtime: instance.vmm_runtime, + instance_id: instance.instance_id, + migration_id: instance.migration_id, + propolis_addr: instance.propolis_addr, + metadata: instance.metadata, + }; + self.instance_ensure_registered_v7(propolis_id, v5_instance).await + } + + pub async fn instance_ensure_registered_v7( + &self, + propolis_id: PropolisUuid, + instance: InstanceEnsureBody, + ) -> Result { + self.instance_ensure_registered(propolis_id, instance).await + } + + async fn instance_ensure_registered( &self, propolis_id: PropolisUuid, instance: InstanceEnsureBody, @@ -914,6 +950,30 @@ impl SledAgent { .map_err(|e| Error::Instance(e)) } + pub async fn instance_join_multicast_group( + &self, + propolis_id: PropolisUuid, + multicast_body: &InstanceMulticastBody, + ) -> Result<(), Error> { + self.inner + .instances + .join_multicast_group(propolis_id, multicast_body) + .await + .map_err(|e| Error::Instance(e)) + } + + pub async fn instance_leave_multicast_group( + &self, + propolis_id: PropolisUuid, + multicast_body: &InstanceMulticastBody, + ) -> Result<(), Error> { + self.inner + .instances + .leave_multicast_group(propolis_id, multicast_body) + .await + .map_err(|e| Error::Instance(e)) + } + /// Returns the state of the instance with the provided ID. pub async fn instance_get_state( &self, diff --git a/sled-agent/tests/multicast_cross_version_test.rs b/sled-agent/tests/multicast_cross_version_test.rs new file mode 100644 index 00000000000..b869e5a76e9 --- /dev/null +++ b/sled-agent/tests/multicast_cross_version_test.rs @@ -0,0 +1,118 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Cross-version compatibility tests for sled-agent multicast APIs. +//! +//! This test verifies that v4 and v5 instance configurations work correctly +//! together, specifically around multicast group support. It follows the same +//! pattern as the DNS cross-version tests. + +use anyhow::Result; +use std::net::IpAddr; + +use omicron_common::api::internal::shared::DhcpConfig; +use sled_agent_api::v7; + +// Generate v5 client from v5 OpenAPI spec (with enhanced multicast support) +mod v5_client { + progenitor::generate_api!( + spec = "../openapi/sled-agent/sled-agent-5.0.0-253577.json", + interface = Positional, + inner_type = slog::Logger, + derives = [schemars::JsonSchema, Clone, Eq, PartialEq], + pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { + slog::debug!(log, "client request"; + "method" => %request.method(), + "uri" => %request.url(), + "body" => ?&request.body(), + ); + }), + post_hook = (|log: &slog::Logger, result: &Result<_, _>| { + slog::debug!(log, "client response"; "result" => ?result); + }) + ); +} + +// A v5 server can productively handle requests from a v4 client, and a v4 +// client can provide instance configurations to a v5 server (backwards compatible). +// This follows the same pattern as DNS cross-version compatibility. +#[tokio::test] +pub async fn multicast_cross_version_works() -> Result<(), anyhow::Error> { + use omicron_test_utils::dev::test_setup_log; + let logctx = test_setup_log("multicast_cross_version_works"); + + let multicast_addr = "239.1.1.1".parse::().unwrap(); + let source_addr = "192.168.1.10".parse::().unwrap(); + + // Focus on the local_config field since that's where multicast_groups lives + + // Create v4 local config JSON (won't have multicast_groups field) + let v4_local_config_json = serde_json::json!({ + "hostname": "test-v4", + "nics": [], + "source_nat": { + "ip": "10.1.1.1", + "first_port": 0, + "last_port": 16383 + }, + "ephemeral_ip": null, + "floating_ips": [], + "firewall_rules": [], + "dhcp_config": { + "dns_servers": [], + "host_domain": null, + "search_domains": [] + } + }); + + // Create v5 local config with multicast_groups + let v5_local_config = v7::InstanceSledLocalConfig { + hostname: omicron_common::api::external::Hostname::try_from("test-v5") + .unwrap(), + nics: vec![], + source_nat: nexus_types::deployment::SourceNatConfig::new( + "10.1.1.1".parse().unwrap(), + 0, + 16383, + ) + .unwrap(), + ephemeral_ip: None, + floating_ips: vec![], + multicast_groups: vec![v7::InstanceMulticastMembership { + group_ip: multicast_addr, + sources: vec![source_addr], + }], + firewall_rules: vec![], + dhcp_config: DhcpConfig { + dns_servers: vec![], + host_domain: None, + search_domains: vec![], + }, + }; + + // Test that v4 can be parsed by v5 (with empty multicast_groups) + let v4_as_v5_json = serde_json::to_string(&v4_local_config_json)?; + let v5_json = serde_json::to_string(&v5_local_config)?; + + // v4 should NOT have multicast_groups in the JSON + assert!( + !v4_as_v5_json.contains("multicast_groups"), + "v4 InstanceSledLocalConfig should not contain multicast_groups field" + ); + + // v5 should HAVE multicast_groups in the JSON + assert!( + v5_json.contains("multicast_groups"), + "v5 InstanceSledLocalConfig should contain multicast_groups field" + ); + + // Verify v5 has the multicast group we added + assert!( + v5_json.contains(&format!("\"group_ip\":\"{multicast_addr}\"")), + "v5 should contain the multicast group IP" + ); + + logctx.cleanup_successful(); + Ok(()) +} diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index e10552046e6..b9b843fbcbd 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -94,6 +94,10 @@ read_only_region_replacement_start.period_secs = 30 alert_dispatcher.period_secs = 60 webhook_deliverator.period_secs = 60 sp_ereport_ingester.period_secs = 30 +# Disabled in R16, as the Hubris task that handles ereport ingestion requests +# has not merged yet, and trying to ingest them will just result in Nexus +# logging a bunch of errors. +sp_ereport_ingester.disable = true # How frequently to check for a new fault management sitrep (made by any # Nexus). # This is cheap, so we should check frequently. @@ -104,6 +108,13 @@ fm.sitrep_load_period_secs = 15 # only necessary to ensure that it always happens eventually. fm.sitrep_gc_period_secs = 600 probe_distributor.period_secs = 60 +multicast_reconciler.period_secs = 60 +# TTL for sled-to-backplane-port mapping cache +# Default: 3600 seconds (1 hour) - detects new sleds and inventory changes +# multicast_reconciler.sled_cache_ttl_secs = 3600 +# TTL for backplane topology cache (static platform configuration) +# Default: 86400 seconds (24 hours) - refreshed on-demand when validation fails +# multicast_reconciler.backplane_cache_ttl_secs = 86400 [default_region_allocation_strategy] # by default, allocate across 3 distinct sleds diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 0006dd5bb92..c7e9e5c4317 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -94,6 +94,10 @@ read_only_region_replacement_start.period_secs = 30 alert_dispatcher.period_secs = 60 webhook_deliverator.period_secs = 60 sp_ereport_ingester.period_secs = 30 +# Disabled in R16, as the Hubris task that handles ereport ingestion requests +# has not merged yet, and trying to ingest them will just result in Nexus +# logging a bunch of errors. +sp_ereport_ingester.disable = true # How frequently to check for a new fault management sitrep (made by any # Nexus). # This is cheap, so we should check frequently. @@ -104,6 +108,13 @@ fm.sitrep_load_period_secs = 15 # only necessary to ensure that it always happens eventually. fm.sitrep_gc_period_secs = 600 probe_distributor.period_secs = 60 +multicast_reconciler.period_secs = 60 +# TTL for sled-to-backplane-port mapping cache +# Default: 3600 seconds (1 hour) - detects new sleds and inventory changes +# multicast_reconciler.sled_cache_ttl_secs = 3600 +# TTL for backplane topology cache (static platform configuration) +# Default: 86400 seconds (24 hours) - refreshed on-demand when validation fails +# multicast_reconciler.backplane_cache_ttl_secs = 86400 [default_region_allocation_strategy] # by default, allocate without requirement for distinct sleds. diff --git a/sp-sim/src/gimlet.rs b/sp-sim/src/gimlet.rs index f4ab4600dce..1829e4a7c46 100644 --- a/sp-sim/src/gimlet.rs +++ b/sp-sim/src/gimlet.rs @@ -69,6 +69,12 @@ use tokio::task::{self, JoinHandle}; pub const SIM_GIMLET_BOARD: &str = "SimGimletSp"; +/// Baseboard model used for simulated Gimlets. +/// +/// Set to "i86pc", the same illumos platform identifier that real hardware reports, +/// so simulated sleds can match with simulated SPs in inventory. +pub const FAKE_GIMLET_MODEL: &str = "i86pc"; + // Type alias for the remote end of an MGS serial console connection. type AttachedMgsSerialConsole = Arc)>>>; @@ -895,10 +901,9 @@ impl Handler { fn sp_state_impl(&self) -> SpStateV2 { // Make the Baseboard a PC so that our testbeds work as expected. - const FAKE_GIMLET_MODEL: &[u8] = b"i86pc"; - let mut model = [0; 32]; - model[..FAKE_GIMLET_MODEL.len()].copy_from_slice(FAKE_GIMLET_MODEL); + model[..FAKE_GIMLET_MODEL.len()] + .copy_from_slice(FAKE_GIMLET_MODEL.as_bytes()); SpStateV2 { hubris_archive_id: [0; 8], diff --git a/sp-sim/src/lib.rs b/sp-sim/src/lib.rs index 23d753bb8d6..cf6c7b1997e 100644 --- a/sp-sim/src/lib.rs +++ b/sp-sim/src/lib.rs @@ -16,6 +16,7 @@ use async_trait::async_trait; pub use config::Config; use gateway_messages::SpPort; use gateway_types::component::SpState; +pub use gimlet::FAKE_GIMLET_MODEL; pub use gimlet::Gimlet; pub use gimlet::GimletPowerState; pub use gimlet::SIM_GIMLET_BOARD; diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index beef2f61da1..9373dc627ce 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -57,6 +57,7 @@ impl_typed_uuid_kinds! { Instance = {}, InternalZpool = {}, LoopbackAddress = {}, + MulticastGroup = {}, Mupdate = {}, MupdateOverride = {}, // `OmicronSledConfig`s do not themselves contain IDs, but we generate IDs