From 6baeca7c7fc9f4f72b9eadbdbeb81f4bb08586a4 Mon Sep 17 00:00:00 2001 From: Blaine Gardner Date: Tue, 22 Aug 2023 13:15:08 -0600 Subject: [PATCH] multus: detect network CIDRs via canary Change how Rook detects network CIDRs for Multus networks. The IPAM configuration is only defined as an arbitrary string JSON blob with a "type" field and nothing more. Rook's detection of CIDRs for whereabouts had already grown out of date since the initial implementation. Additionally, Rook did not support DHCP IPAM, which is a reasonable choice for users. And more, Rook did not support CNI plugin chaining, which further complicates NADs. Based on the CNI spec, network chaning can result in any changes to network CIDRs from the first-given plugin. All these problems make it more and more difficult for Rook to support Multus by inspecting the NAD itself to predict network CIDRs. Instead, it is better for Rook to treat the CNI process as a black box. To preserve legacy functionality of auto-detecting networks and to make that as robust as possible, change to a canary-style architecture like that used for Ceph mons, from which Rook will detect the network CIDRs if possible. Also allow users to specify overrides for CIDR ranges. This allows Rook to still support esoteric and unexpected NAD or network configurations where a CIDR range is not detectable or where the range detected would be incomplete. Because it may be impossible for Rook to understand the network CIDRs wholistically while residing only on a portion of the network, this feature should have been present from Multus's inception. Improving CIDR auto-detection and allowing users to specify overrides for auto-detected CIDRs rounds out Rook's Multus support for CephCluster (core/RADOS) installations. No further architectural changes should be needed for CephClusters as regards application of public/cluster network CIDRs for Multus networks. Signed-off-by: Blaine Gardner (cherry picked from commit 3c43268d0a924c3966a253b1bf28a5d491cdedb1) (cherry picked from commit 8b72dfa02048b6e0416f056892a427313be7d829) --- .../CRDs/Cluster/ceph-cluster-crd.md | 155 ++-- Documentation/CRDs/specification.md | 137 ++- .../Advanced/ceph-configuration.md | 13 +- .../charts/rook-ceph/templates/resources.yaml | 25 +- deploy/examples/crds.yaml | 25 +- go.mod | 1 + pkg/apis/ceph.rook.io/v1/cluster.go | 12 +- pkg/apis/ceph.rook.io/v1/network.go | 151 +++- pkg/apis/ceph.rook.io/v1/network_test.go | 293 ++++++- pkg/apis/ceph.rook.io/v1/types.go | 76 +- .../ceph.rook.io/v1/zz_generated.deepcopy.go | 53 +- pkg/operator/ceph/cluster/cluster.go | 32 +- pkg/operator/ceph/cluster/mgr/spec.go | 2 +- pkg/operator/ceph/cluster/mon/mon.go | 13 +- pkg/operator/ceph/cluster/mon/mon_test.go | 18 +- pkg/operator/ceph/cluster/mon/node_test.go | 12 +- pkg/operator/ceph/cluster/mon/spec.go | 8 +- pkg/operator/ceph/cluster/osd/key_rotation.go | 2 +- pkg/operator/ceph/cluster/osd/spec.go | 2 +- pkg/operator/ceph/cluster/rbd/spec.go | 2 +- pkg/operator/ceph/config/config.go | 14 - pkg/operator/ceph/config/monstore.go | 16 +- pkg/operator/ceph/config/network.go | 124 --- pkg/operator/ceph/config/network_test.go | 327 ------- .../ceph/controller/cluster_info_test.go | 3 +- pkg/operator/ceph/controller/network.go | 426 +++++++++ pkg/operator/ceph/controller/network_test.go | 830 ++++++++++++++++++ pkg/operator/ceph/csi/controller_test.go | 6 +- pkg/operator/ceph/csi/spec.go | 4 +- pkg/operator/ceph/file/mds/spec.go | 2 +- pkg/operator/ceph/file/mirror/spec.go | 2 +- pkg/operator/ceph/nfs/spec.go | 2 +- pkg/operator/ceph/object/spec.go | 2 +- .../k8sutil/cmdreporter/cmdreporter.go | 37 +- pkg/operator/k8sutil/network.go | 157 ++-- pkg/operator/k8sutil/network_test.go | 416 +++++---- 36 files changed, 2532 insertions(+), 868 deletions(-) delete mode 100644 pkg/operator/ceph/config/network.go delete mode 100644 pkg/operator/ceph/config/network_test.go create mode 100644 pkg/operator/ceph/controller/network.go create mode 100644 pkg/operator/ceph/controller/network_test.go diff --git a/Documentation/CRDs/Cluster/ceph-cluster-crd.md b/Documentation/CRDs/Cluster/ceph-cluster-crd.md index f2a6127ec0ff..b52dd3a8cc78 100755 --- a/Documentation/CRDs/Cluster/ceph-cluster-crd.md +++ b/Documentation/CRDs/Cluster/ceph-cluster-crd.md @@ -161,7 +161,12 @@ If not specified, the default SDN will be used. Configure the network that will be enabled for the cluster and services. * `provider`: Specifies the network provider that will be used to connect the network interface. You can choose between `host`, and `multus`. -* `selectors`: List the network selector(s) that will be used associated by a key. +* `selectors`: Used for `multus` provider only. Select NetworkAttachmentDefinitions to use for Ceph networks. + * `public`: Select the NetworkAttachmentDefinition to use for the public network. + * `cluster`: Select the NetworkAttachmentDefinition to use for the cluster network. +* `addressRanges`: Used for `host` or `multus` providers only. Allows overriding the address ranges (CIDRs) that Ceph will listen on. + * `public`: A list of individual network ranges in CIDR format to use for Ceph's public network. + * `cluster`: A list of individual network ranges in CIDR format to use for Ceph's cluster network. * `ipFamily`: Specifies the network stack Ceph daemons should listen on. * `dualStack`: Specifies that Ceph daemon should listen on both IPv4 and IPv6 network stacks. * `connections`: Settings for network connections using Ceph's msgr2 protocol @@ -185,105 +190,127 @@ Configure the network that will be enabled for the cluster and services. Changing networking configuration after a Ceph cluster has been deployed is NOT supported and will result in a non-functioning cluster. -#### Host Networking - -To use host networking, set `provider: host`. - -If the host networking setting is changed in a cluster where mons are already running, the existing mons will -remain running with the same network settings with which they were created. To complete the conversion -to or from host networking after you update this setting, you will need to -[failover the mons](../../Storage-Configuration/Advanced/ceph-mon-health.md#failing-over-a-monitor) -in order to have mons on the desired network configuration. +#### Ceph public and cluster networks -#### Multus +Ceph daemons can operate on up to two distinct networks: public, and cluster. -Rook supports addition of public and cluster network for ceph using Multus +Ceph daemons always use the public network, which is the Kubernetes pod network by default. The +public network is used for client communications with the Ceph cluster (reads/writes). -The selector keys are required to be `public` and `cluster` where each represent: +If specified, the cluster network is used to isolate internal Ceph replication traffic. This includes +additional copies of data replicated between OSDs during client reads/writes. This also includes OSD +data recovery (re-replication) when OSDs or nodes go offline. If the cluster network is unspecified, +the public network is used for this traffic instead. -* `public`: client communications with the cluster (reads/writes) -* `cluster`: internal Ceph replication network +Some Rook network providers allow manually specifying the public and network interfaces that Ceph +will use for data traffic. Use `addressRanges` to specify this. For example: -If you want to learn more, please read: +```yaml + network: + provider: host + addressRanges: + public: + - "192.168.100.0/24" + - "192.168.101.0/24" + cluster: + - "192.168.200.0/24" +``` -* [Ceph Networking reference](https://docs.ceph.com/docs/master/rados/configuration/network-config-ref/). -* [Multus documentation](https://intel.github.io/multus-cni/doc/how-to-use.html) +This spec translates directly to Ceph's `public_network` and `host_network` configurations. +Refer to [Ceph networking documentation](https://docs.ceph.com/docs/master/rados/configuration/network-config-ref/) +for more details. -Based on the configuration, the operator will do the following: +The default, unspecified network provider cannot make use of these configurations. -1. If only the `public` selector is specified, all communication will happen on that network +Ceph public and cluster network configurations are allowed to change, but this should be done with +great care. When updating underlying networks or Ceph network settings, Rook assumes that the +current network configuration used by Ceph daemons will continue to operate as intended. Network +changes are not applied to Ceph daemon pods (like OSDs and MDSes) until the pod is restarted. When +making network changes, ensure that restarted pods will not lose connectivity to existing pods, and +vice versa. - ```yaml - network: - provider: multus - selectors: - public: rook-ceph/rook-public-nw - ``` +#### Host Networking -2. If only the `cluster` selector is specified, the internal cluster traffic\* will happen on that network. All other traffic to mons, OSDs, and other daemons will be on the default network. +To use host networking, set `provider: host`. - ```yaml - network: - provider: multus - selectors: - cluster: rook-ceph/rook-cluster-nw - ``` +To instruct Ceph to operate on specific host interfaces or networks, use `addressRanges` to select +the network CIDRs Ceph will bind to on the host. -3. If both `public` and `cluster` selectors are specified the first one will run all the communication network and the second the internal cluster network\* +If the host networking setting is changed in a cluster where mons are already running, the existing mons will +remain running with the same network settings with which they were created. To complete the conversion +to or from host networking after you update this setting, you will need to +[failover the mons](../../Storage-Configuration/Advanced/ceph-mon-health.md#failing-over-a-monitor) +in order to have mons on the desired network configuration. - ```yaml - network: - provider: multus - selectors: - public: rook-ceph/rook-public-nw - cluster: rook-ceph/rook-cluster-nw - ``` +#### Multus -\* Internal cluster traffic includes OSD heartbeats, data replication, and data recovery +Rook supports using Multus NetworkAttachmentDefinitions for Ceph public and cluster networks. -Only OSD pods will have both Public and Cluster networks attached. The rest of the Ceph component pods and CSI pods will only have the Public network attached. -Rook Ceph operator will not have any networks attached as it proxies the required commands via a sidecar container in the mgr pod. +Refer to [Multus documentation](https://github.com/k8snetworkplumbingwg/multus-cni/blob/master/docs/how-to-use.md) +for details about how to set up and select Multus networks. -In order to work, each selector value must match a `NetworkAttachmentDefinition` object name in Multus. +Rook will attempt to auto-discover the network CIDRs for selected public and/or cluster networks. +This process is not guaranteed to succeed. Furthermore, this process will get a new network lease +for each CephCluster reconcile. Specify `addressRanges` manually if the auto-detection process +fails or if the selected network configuration cannot automatically recycle released network leases. -For `multus` network provider, an already working cluster with Multus networking is required. Network attachment definition that later will be attached to the cluster needs to be created before the Cluster CRD. -The Network attachment definitions should be using whereabouts cni. -If Rook cannot find the provided Network attachment definition it will fail running the Ceph OSD pods. -You can add the Multus network attachment selection annotation selecting the created network attachment definition on `selectors`. +Only OSD pods will have both public and cluster networks attached (if specified). The rest of the +Ceph component pods and CSI pods will only have the public network attached. The Rook operator will +not have any networks attached; it proxies Ceph commands via a sidecar container in the mgr pod. -A valid NetworkAttachmentDefinition will look like following: +A NetworkAttachmentDefinition must exist before it can be used by Multus for a Ceph network. A +recommended definition will look like the following: ```yaml apiVersion: "k8s.cni.cncf.io/v1" kind: NetworkAttachmentDefinition metadata: - name: rook-public-nw + name: ceph-multus-net + namespace: rook-ceph spec: config: '{ "cniVersion": "0.3.0", - "name": "public-nad", "type": "macvlan", - "master": "ens5", + "master": "eth0", "mode": "bridge", "ipam": { "type": "whereabouts", - "range": "192.168.1.0/24" + "range": "192.168.200.0/24" } }' ``` -* Ensure that `master` matches the network interface of the host that you want to use. -* IPAM type `whereabouts` is required because it makes sure that all the pods get a unique IP address from the multus network. -* The NetworkAttachmentDefinition should be referenced along with the namespace in which it is present like `public: /`. - e.g., the network attachment definition are in `default` namespace: +* Ensure that `master` matches the network interface on hosts that you want to use. + It must be the same across all hosts. +* CNI type `macvlan` is highly recommended. + It has less CPU and memory overhead compared to traditional Linux `bridge` configurations. +* IPAM type `whereabouts` is recommended because it ensures each pod gets an IP address unique + within the Kubernetes cluster. No DHCP server is required. If a DHCP server is present on the + network, ensure the IP range does not overlap with the DHCP server's range. - ```yaml - public: default/rook-public-nw - cluster: default/rook-cluster-nw - ``` +NetworkAttachmentDefinitions are selected for the desired Ceph network using `selectors`. Selector +values should include the namespace in which the NAD is present. `public` and `cluster` may be +selected independently. If `public` is left unspecified, Rook will configure Ceph to use the +Kubernetes pod network for Ceph client traffic. - * This format is required in order to use the NetworkAttachmentDefinition across namespaces. - * In Openshift, to use a NetworkAttachmentDefinition (NAD) across namespaces, the NAD must be deployed in the `default` namespace. The NAD is then referenced with the namespace: `default/rook-public-nw` +Consider the example below which selects a hypothetical Kubernetes-wide Multus network in the +default namespace for Ceph's public network and selects a Ceph-specific network in the `rook-ceph` +namespace for Ceph's cluster network. The commented-out portion shows an example of how address +ranges could be manually specified for the networks if needed. + +```yaml + network: + provider: multus + selectors: + public: default/kube-multus-net + cluster: rook-ceph/ceph-multus-net + # addressRanges: + # public: + # - "192.168.100.0/24" + # - "192.168.101.0/24" + # cluster: + # - "192.168.200.0/24" +``` ##### Validating Multus configuration diff --git a/Documentation/CRDs/specification.md b/Documentation/CRDs/specification.md index d30907261be1..46c51d2c5016 100644 --- a/Documentation/CRDs/specification.md +++ b/Documentation/CRDs/specification.md @@ -2476,6 +2476,51 @@ string +

AddressRangesSpec +

+

+(Appears on:NetworkSpec) +

+
+
+ + + + + + + + + + + + + + + + + +
FieldDescription
+public
+ + +CIDRList + + +
+(Optional) +

Public defines a list of CIDRs to use for Ceph public network communication.

+
+cluster
+ + +CIDRList + + +
+(Optional) +

Cluster defines a list of CIDRs to use for Ceph cluster network communication.

+

Annotations (map[string]string alias)

@@ -2687,6 +2732,13 @@ int64 +

CIDR +(string alias)

+
+

An IPv4 or IPv6 network CIDR.

+

This naive kubebuilder regex provides immediate feedback for some typos and for a common problem +case where the range spec is forgotten (e.g., /24). Rook does in-depth validation in code.

+

COSIDeploymentStrategy (string alias)

@@ -3479,6 +3531,25 @@ string +

CephNetworkType +(string alias)

+
+

CephNetworkType should be “public” or “cluster”. +Allow any string so that over-specified legacy clusters do not break on CRD update.

+
+ + + + + + + + + + + + +
ValueDescription

"cluster"

"public"

CephStatus

@@ -7869,6 +7940,29 @@ PoolSpec +

NetworkProviderType +(string alias)

+

+(Appears on:NetworkSpec) +

+
+

NetworkProviderType defines valid network providers for Rook.

+
+ + + + + + + + + + + + + + +
ValueDescription

""

"host"

"multus"

NetworkSpec

@@ -7889,7 +7983,9 @@ PoolSpec provider
-string + +NetworkProviderType + @@ -7901,14 +7997,45 @@ string selectors
-map[string]string +map[github.com/rook/rook/pkg/apis/ceph.rook.io/v1.CephNetworkType]string + + + +(Optional) +

Selectors define NetworkAttachmentDefinitions to be used for Ceph public and/or cluster +networks when the “multus” network provider is used. This config section is not used for +other network providers.

+

Valid keys are “public” and “cluster”. Refer to Ceph networking documentation for more: +https://docs.ceph.com/en/reef/rados/configuration/network-config-ref/

+

Refer to Multus network annotation documentation for help selecting values: +https://github.com/k8snetworkplumbingwg/multus-cni/blob/master/docs/how-to-use.md#run-pod-with-network-annotation

+

Rook will make a best-effort attempt to automatically detect CIDR address ranges for given +network attachment definitions. Rook’s methods are robust but may be imprecise for +sufficiently complicated networks. Rook’s auto-detection process obtains a new IP address +lease for each CephCluster reconcile. If Rook fails to detect, incorrectly detects, only +partially detects, or if underlying networks do not support reusing old IP addresses, it is +best to use the ‘addressRanges’ config section to specify CIDR ranges for the Ceph cluster.

+

As a contrived example, one can use a theoretical Kubernetes-wide network for Ceph client +traffic and a theoretical Rook-only network for Ceph replication traffic as shown: +selectors: +public: “default/cluster-fast-net” +cluster: “rook-ceph/ceph-backend-net”

+ + + + +addressRanges
+ + +AddressRangesSpec + (Optional) -

Selectors string values describe what networks will be used to connect the cluster. -Meanwhile the keys describe each network respective responsibilities or any metadata -storage provider decide.

+

AddressRanges specify a list of CIDRs that Rook will apply to Ceph’s ‘public_network’ and/or +‘cluster_network’ configurations. This config section may be used for the “host” or “multus” +network providers.

diff --git a/Documentation/Storage-Configuration/Advanced/ceph-configuration.md b/Documentation/Storage-Configuration/Advanced/ceph-configuration.md index ef9536b7d495..5b9ff44a459f 100644 --- a/Documentation/Storage-Configuration/Advanced/ceph-configuration.md +++ b/Documentation/Storage-Configuration/Advanced/ceph-configuration.md @@ -363,12 +363,17 @@ ceph osd primary-affinity osd.0 0 ## OSD Dedicated Network +!!! tip + This documentation is left for historical purposes. It is still valid, but Rook offers native + support for this feature via the + [CephCluster network configuration](../../CRDs/Cluster/ceph-cluster-crd.md#ceph-public-and-cluster-networks). + It is possible to configure ceph to leverage a dedicated network for the OSDs to -communicate across. A useful overview is the [CEPH Networks](http://docs.ceph.com/docs/master/rados/configuration/network-config-ref/#ceph-networks) +communicate across. A useful overview is the [Ceph Networks](http://docs.ceph.com/docs/master/rados/configuration/network-config-ref/#ceph-networks) section of the Ceph documentation. If you declare a cluster network, OSDs will -route heartbeat, object replication and recovery traffic over the cluster +route heartbeat, object replication, and recovery traffic over the cluster network. This may improve performance compared to using a single network, -especially when slower network technologies are used, with the tradeoff of +especially when slower network technologies are used. The tradeoff is additional expense and subtle failure modes. Two changes are necessary to the configuration to enable this capability: @@ -404,7 +409,7 @@ apiVersion: v1 data: config: | [global] - public network = 10.0.7.0/24 + public network = 10.0.7.0/24 cluster network = 10.0.10.0/24 public addr = "" cluster addr = "" diff --git a/deploy/charts/rook-ceph/templates/resources.yaml b/deploy/charts/rook-ceph/templates/resources.yaml index b1f0298b053f..64c58ef39bf2 100644 --- a/deploy/charts/rook-ceph/templates/resources.yaml +++ b/deploy/charts/rook-ceph/templates/resources.yaml @@ -1891,6 +1891,25 @@ spec: description: Network related configuration nullable: true properties: + addressRanges: + description: AddressRanges specify a list of CIDRs that Rook will apply to Ceph's 'public_network' and/or 'cluster_network' configurations. This config section may be used for the "host" or "multus" network providers. + nullable: true + properties: + cluster: + description: Cluster defines a list of CIDRs to use for Ceph cluster network communication. + items: + description: "An IPv4 or IPv6 network CIDR. \n This naive kubebuilder regex provides immediate feedback for some typos and for a common problem case where the range spec is forgotten (e.g., /24). Rook does in-depth validation in code." + pattern: ^[0-9a-fA-F:.]{2,}\/[0-9]{1,3}$ + type: string + type: array + public: + description: Public defines a list of CIDRs to use for Ceph public network communication. + items: + description: "An IPv4 or IPv6 network CIDR. \n This naive kubebuilder regex provides immediate feedback for some typos and for a common problem case where the range spec is forgotten (e.g., /24). Rook does in-depth validation in code." + pattern: ^[0-9a-fA-F:.]{2,}\/[0-9]{1,3}$ + type: string + type: array + type: object connections: description: Settings for network connections such as compression and encryption across the wire. nullable: true @@ -1940,12 +1959,16 @@ spec: type: object provider: description: Provider is what provides network connectivity to the cluster e.g. "host" or "multus" + enum: + - "" + - host + - multus nullable: true type: string selectors: additionalProperties: type: string - description: Selectors string values describe what networks will be used to connect the cluster. Meanwhile the keys describe each network respective responsibilities or any metadata storage provider decide. + description: "Selectors define NetworkAttachmentDefinitions to be used for Ceph public and/or cluster networks when the \"multus\" network provider is used. This config section is not used for other network providers. \n Valid keys are \"public\" and \"cluster\". Refer to Ceph networking documentation for more: https://docs.ceph.com/en/reef/rados/configuration/network-config-ref/ \n Refer to Multus network annotation documentation for help selecting values: https://github.com/k8snetworkplumbingwg/multus-cni/blob/master/docs/how-to-use.md#run-pod-with-network-annotation \n Rook will make a best-effort attempt to automatically detect CIDR address ranges for given network attachment definitions. Rook's methods are robust but may be imprecise for sufficiently complicated networks. Rook's auto-detection process obtains a new IP address lease for each CephCluster reconcile. If Rook fails to detect, incorrectly detects, only partially detects, or if underlying networks do not support reusing old IP addresses, it is best to use the 'addressRanges' config section to specify CIDR ranges for the Ceph cluster. \n As a contrived example, one can use a theoretical Kubernetes-wide network for Ceph client traffic and a theoretical Rook-only network for Ceph replication traffic as shown: selectors: public: \"default/cluster-fast-net\" cluster: \"rook-ceph/ceph-backend-net\"" nullable: true type: object type: object diff --git a/deploy/examples/crds.yaml b/deploy/examples/crds.yaml index a720bc1405ff..ac4873f86a1d 100644 --- a/deploy/examples/crds.yaml +++ b/deploy/examples/crds.yaml @@ -1889,6 +1889,25 @@ spec: description: Network related configuration nullable: true properties: + addressRanges: + description: AddressRanges specify a list of CIDRs that Rook will apply to Ceph's 'public_network' and/or 'cluster_network' configurations. This config section may be used for the "host" or "multus" network providers. + nullable: true + properties: + cluster: + description: Cluster defines a list of CIDRs to use for Ceph cluster network communication. + items: + description: "An IPv4 or IPv6 network CIDR. \n This naive kubebuilder regex provides immediate feedback for some typos and for a common problem case where the range spec is forgotten (e.g., /24). Rook does in-depth validation in code." + pattern: ^[0-9a-fA-F:.]{2,}\/[0-9]{1,3}$ + type: string + type: array + public: + description: Public defines a list of CIDRs to use for Ceph public network communication. + items: + description: "An IPv4 or IPv6 network CIDR. \n This naive kubebuilder regex provides immediate feedback for some typos and for a common problem case where the range spec is forgotten (e.g., /24). Rook does in-depth validation in code." + pattern: ^[0-9a-fA-F:.]{2,}\/[0-9]{1,3}$ + type: string + type: array + type: object connections: description: Settings for network connections such as compression and encryption across the wire. nullable: true @@ -1938,12 +1957,16 @@ spec: type: object provider: description: Provider is what provides network connectivity to the cluster e.g. "host" or "multus" + enum: + - "" + - host + - multus nullable: true type: string selectors: additionalProperties: type: string - description: Selectors string values describe what networks will be used to connect the cluster. Meanwhile the keys describe each network respective responsibilities or any metadata storage provider decide. + description: "Selectors define NetworkAttachmentDefinitions to be used for Ceph public and/or cluster networks when the \"multus\" network provider is used. This config section is not used for other network providers. \n Valid keys are \"public\" and \"cluster\". Refer to Ceph networking documentation for more: https://docs.ceph.com/en/reef/rados/configuration/network-config-ref/ \n Refer to Multus network annotation documentation for help selecting values: https://github.com/k8snetworkplumbingwg/multus-cni/blob/master/docs/how-to-use.md#run-pod-with-network-annotation \n Rook will make a best-effort attempt to automatically detect CIDR address ranges for given network attachment definitions. Rook's methods are robust but may be imprecise for sufficiently complicated networks. Rook's auto-detection process obtains a new IP address lease for each CephCluster reconcile. If Rook fails to detect, incorrectly detects, only partially detects, or if underlying networks do not support reusing old IP addresses, it is best to use the 'addressRanges' config section to specify CIDR ranges for the Ceph cluster. \n As a contrived example, one can use a theoretical Kubernetes-wide network for Ceph client traffic and a theoretical Rook-only network for Ceph replication traffic as shown: selectors: public: \"default/cluster-fast-net\" cluster: \"rook-ceph/ceph-backend-net\"" nullable: true type: object type: object diff --git a/go.mod b/go.mod index 963643f3f868..e6a15c2e830c 100644 --- a/go.mod +++ b/go.mod @@ -117,6 +117,7 @@ require ( github.com/prometheus/procfs v0.11.1 // indirect github.com/ryanuber/go-glob v1.0.0 // indirect github.com/sirupsen/logrus v1.9.3 // indirect + github.com/stretchr/objx v0.5.0 // indirect github.com/xlab/treeprint v1.2.0 // indirect go.starlark.net v0.0.0-20230814145427-12f4cb8177e4 // indirect go.uber.org/multierr v1.11.0 // indirect diff --git a/pkg/apis/ceph.rook.io/v1/cluster.go b/pkg/apis/ceph.rook.io/v1/cluster.go index 60866375b539..9fef353432de 100644 --- a/pkg/apis/ceph.rook.io/v1/cluster.go +++ b/pkg/apis/ceph.rook.io/v1/cluster.go @@ -58,6 +58,11 @@ func (c *CephCluster) ValidateCreate() (admission.Warnings, error) { return nil, errors.New("invalid create : external mode enabled cannot have mon,dashboard,monitoring,network,disruptionManagement,storage fields in CR") } } + + if err := ValidateNetworkSpec(c.GetNamespace(), c.Spec.Network); err != nil { + return nil, errors.Errorf("invalid create: %s", err) + } + return nil, nil } @@ -76,11 +81,8 @@ func validateUpdatedCephCluster(updatedCephCluster *CephCluster, found *CephClus return nil, errors.Errorf("invalid update: DataDirHostPath change from %q to %q is not allowed", found.Spec.DataDirHostPath, updatedCephCluster.Spec.DataDirHostPath) } - // Allow an attempt to enable or disable host networking, but not other provider changes - oldProvider := updatedCephCluster.Spec.Network.Provider - newProvider := found.Spec.Network.Provider - if oldProvider != newProvider && oldProvider != "host" && newProvider != "host" { - return nil, errors.Errorf("invalid update: Provider change from %q to %q is not allowed", found.Spec.Network.Provider, updatedCephCluster.Spec.Network.Provider) + if err := ValidateNetworkSpecUpdate(found.GetNamespace(), found.Spec.Network, updatedCephCluster.Spec.Network); err != nil { + return nil, errors.Errorf("invalid update: %s", err) } if len(updatedCephCluster.Spec.Storage.StorageClassDeviceSets) == len(found.Spec.Storage.StorageClassDeviceSets) { diff --git a/pkg/apis/ceph.rook.io/v1/network.go b/pkg/apis/ceph.rook.io/v1/network.go index 68c0296b30b2..6abd842ee54d 100644 --- a/pkg/apis/ceph.rook.io/v1/network.go +++ b/pkg/apis/ceph.rook.io/v1/network.go @@ -16,13 +16,160 @@ limitations under the License. package v1 +import ( + "encoding/json" + "fmt" + "net" + "strings" + + nadv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + nadutils "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/utils" + "github.com/pkg/errors" +) + // IsMultus get whether to use multus network provider func (n *NetworkSpec) IsMultus() bool { - return n.Provider == "multus" + return n.Provider == NetworkProviderMultus } // IsHost get whether to use host network provider. This method also preserve // compatibility with the old HostNetwork field. func (n *NetworkSpec) IsHost() bool { - return (n.HostNetwork && n.Provider == "") || n.Provider == "host" + return (n.HostNetwork && n.Provider == NetworkProviderDefault) || n.Provider == NetworkProviderHost +} + +func ValidateNetworkSpec(clusterNamespace string, spec NetworkSpec) error { + if spec.IsMultus() { + if len(spec.Selectors) == 0 { + return errors.Errorf("at least one network selector must be specified when using the %q network provider", NetworkProviderMultus) + } + + if _, err := spec.GetNetworkSelection(clusterNamespace, CephNetworkPublic); err != nil { + return errors.Wrap(err, "ceph public network selector provided for multus is invalid") + } + if _, err := spec.GetNetworkSelection(clusterNamespace, CephNetworkCluster); err != nil { + return errors.Wrap(err, "ceph cluster network selector provided for multus is invalid") + } + } + + if !spec.AddressRanges.IsEmpty() { + if !spec.IsMultus() && !spec.IsHost() { + // TODO: be sure to update docs that AddressRanges can be specified for host networking as + // well as multus so that the override configmap doesn't need to be set + return errors.Errorf("network ranges can only be specified for %q and %q network providers", NetworkProviderHost, NetworkProviderMultus) + } + if spec.IsMultus() { + if len(spec.AddressRanges.Public) > 0 && !spec.NetworkHasSelection(CephNetworkPublic) { + return errors.Errorf("public address range can only be specified for multus if there is a public network selection") + } + if len(spec.AddressRanges.Cluster) > 0 && !spec.NetworkHasSelection(CephNetworkCluster) { + return errors.Errorf("cluster address range can only be specified for multus if there is a cluster network selection") + } + } + } + + if err := spec.AddressRanges.Validate(); err != nil { + return err + } + + return nil +} + +func ValidateNetworkSpecUpdate(clusterNamespace string, oldSpec, newSpec NetworkSpec) error { + // Allow an attempt to enable or disable host networking, but not other provider changes + oldProvider := oldSpec.Provider + newProvider := newSpec.Provider + if oldProvider != newProvider && oldProvider != "host" && newProvider != "host" { + return errors.Errorf("invalid update: network provider change from %q to %q is not allowed", oldProvider, newProvider) + } + + return ValidateNetworkSpec(clusterNamespace, newSpec) +} + +// NetworkHasSelection returns true if the given Ceph network has a selection. +func (n *NetworkSpec) NetworkHasSelection(network CephNetworkType) bool { + s, ok := n.Selectors[network] + if !ok || s == "" { + return false + } + return true +} + +// GetNetworkSelection gets the network selection for a given Ceph network, or nil if the network +// doesn't have a selection. +func (n *NetworkSpec) GetNetworkSelection(clusterNamespace string, network CephNetworkType) (*nadv1.NetworkSelectionElement, error) { + if !n.NetworkHasSelection(network) { + return nil, nil // no selection for network + } + s := n.Selectors[network] + // From documentation of the "k8s.v1.cni.cncf.io/network-status" annotation, valid JSON inputs + // must be in list form, surrounded with brackets. The NAD utility library will only parse + // list-format JSON input. However, old versions of Rook code allowed non-list JSON objects. + // In order to support legacy users, make an attempt to turn single-JSON-object inputs into + // len(1) lists so that they parse correctly by the util library. Do not advertise this + // "feature" in documentation since it is not technically the correct format. + if strings.HasPrefix(s, "{") && strings.HasSuffix(s, "}") { + s = "[" + s + "]" + } + selection, err := nadutils.ParseNetworkAnnotation(s, clusterNamespace) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse %q network selector %q", network, s) + } + if len(selection) != 1 { + return nil, errors.Errorf("%q network selector %q has multiple (%d) selections, which is not supported", network, s, len(selection)) + } + return selection[0], nil +} + +// NetworkSelectionsToAnnotationValue converts NetworkAttachmentDefinition network selection +// elements to an annotation value for the "k8s.v1.cni.cncf.io/networks" annotation key. +func NetworkSelectionsToAnnotationValue(selections ...*nadv1.NetworkSelectionElement) (string, error) { + reduced := []*nadv1.NetworkSelectionElement{} + for _, s := range selections { + if s != nil { + reduced = append(reduced, s) + } + } + if len(reduced) == 0 { + return "", nil + } + b, err := json.Marshal(reduced) + if err != nil { + return "", errors.Wrap(err, "failed to convert network selections to annotation value") + } + return string(b), nil +} + +func (n *AddressRangesSpec) IsEmpty() bool { + return n == nil || len(n.Public) == 0 && len(n.Cluster) == 0 +} + +func (n *AddressRangesSpec) Validate() error { + if n.IsEmpty() { + return nil + } + + allRanges := append(n.Public, n.Cluster...) + invalid := []string{} + for _, cidr := range allRanges { + _, _, err := net.ParseCIDR(string(cidr)) + if err != nil { + // returned err is "invalid CIDR: " & not more useful than invalid list below + invalid = append(invalid, string(cidr)) + } + } + if len(invalid) == 0 { + return nil + } + + return fmt.Errorf("%d network ranges are invalid: %v", len(invalid), invalid) +} + +// String turns a CIDR list into a comma-delimited string of CIDRs +func (l *CIDRList) String() string { + sl := []string{} + for _, c := range *l { + sl = append(sl, string(c)) + } + return strings.Join(sl, ", ") } diff --git a/pkg/apis/ceph.rook.io/v1/network_test.go b/pkg/apis/ceph.rook.io/v1/network_test.go index 7866c4796341..38d4be111ab0 100644 --- a/pkg/apis/ceph.rook.io/v1/network_test.go +++ b/pkg/apis/ceph.rook.io/v1/network_test.go @@ -18,8 +18,10 @@ package v1 import ( "encoding/json" + "fmt" "testing" + nadv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" "github.com/stretchr/testify/assert" "k8s.io/apimachinery/pkg/util/yaml" ) @@ -63,7 +65,7 @@ selectors: expected := NetworkSpec{ Provider: "host", - Selectors: map[string]string{ + Selectors: map[CephNetworkType]string{ "server": "enp2s0f0", "broker": "enp2s0f0", }, @@ -71,3 +73,292 @@ selectors: assert.Equal(t, expected, net) } + +func TestAddressRangesSpec_IsEmpty(t *testing.T) { + var specNil *AddressRangesSpec + assert.True(t, specNil.IsEmpty()) + + empty := &AddressRangesSpec{} + assert.True(t, empty.IsEmpty()) + + someCIDR := CIDR("1.1.1.1/16") + nonEmptyTests := []AddressRangesSpec{ + {Public: []CIDR{someCIDR}}, + {Public: []CIDR{someCIDR, someCIDR}}, + {Cluster: []CIDR{someCIDR}}, + {Cluster: []CIDR{someCIDR, someCIDR}}, + {Public: []CIDR{someCIDR}, Cluster: []CIDR{someCIDR}}, + {Public: []CIDR{someCIDR, someCIDR}, Cluster: []CIDR{someCIDR, someCIDR}}, + } + for _, spec := range nonEmptyTests { + assert.False(t, spec.IsEmpty()) + } + +} + +func TestAddressRangesSpec_Validate(t *testing.T) { + // only test a small subset of CIDRs since Rook should definitely use the Go stdlib underneath + v1 := CIDR("123.123.123.123/24") + v2 := CIDR("1.0.0.1/24") + v3 := CIDR("2000::/64") + v4 := CIDR("2000:2000:2000:2000:2000:2000:2000:2000/64") + v5 := CIDR("2000::128.128.128.128/96") // ipv4 expressed as subnet of ipv6 is valid + + // invalid CIDRs + i1 := CIDR("123.123.123/24") + i2 := CIDR("123.123.123.123/33") + i4 := CIDR("2000/64") + i3 := CIDR("2000:/64") + i5 := CIDR("2000::128.128.128.128/129") + + tests := []struct { + name string + spec AddressRangesSpec + numErrs int + }{ + {"empty", AddressRangesSpec{}, 0}, + {"all valid", AddressRangesSpec{ + Public: []CIDR{v1}, + Cluster: []CIDR{v2, v3, v4, v5}, + }, 0}, + {"all invalid", AddressRangesSpec{ + Public: []CIDR{i1}, + Cluster: []CIDR{i2, i3, i4, i5}, + }, 5}, + {"public only, valid", AddressRangesSpec{Public: []CIDR{v1}}, 0}, + {"public only, invalid", AddressRangesSpec{Public: []CIDR{i1}}, 1}, + {"cluster only, valid", AddressRangesSpec{Cluster: []CIDR{v2}}, 0}, + {"cluster only, invalid", AddressRangesSpec{Cluster: []CIDR{i2}}, 1}, + {"public valid, cluster valid", AddressRangesSpec{ + Public: []CIDR{v1}, + Cluster: []CIDR{v2}, + }, 0}, + {"public valid, cluster invalid", AddressRangesSpec{ + Public: []CIDR{v2}, + Cluster: []CIDR{i2}, + }, 1}, + {"public invalid, cluster valid", AddressRangesSpec{ + Public: []CIDR{i3}, + Cluster: []CIDR{v2}, + }, 1}, + {"public invalid, cluster invalid", AddressRangesSpec{ + Public: []CIDR{i3}, + Cluster: []CIDR{i4}, + }, 2}, + {"both, valid and invalid", AddressRangesSpec{ + Public: []CIDR{v1, i2}, + Cluster: []CIDR{v3, i4}, + }, 2}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.spec.Validate() + if tt.numErrs > 0 { + assert.Error(t, err) + t.Log(err) + assert.ErrorContains(t, err, fmt.Sprintf("%d network ranges are invalid", tt.numErrs)) + } else { + assert.NoError(t, err) + } + }) + } +} + +// these two functions are should almost always used together and can be unit tested together more +// easily than apart +func TestNetworkSpec_GetNetworkSelection_NetworkSelectionsToAnnotationValue(t *testing.T) { + // inputs are the same definition expressed in json format or non-json format + input1 := func(json bool) string { + if json { + return `[{"name": "macvlan", "interface": "net1"}]` + } + return "macvlan@net1" + } + input2 := func(json bool) string { + if json { + return `[{"name": "macvlan", "interface": "net2"}]` + } + return "macvlan@net2" + } + + // allow running the test suite with json-format or non-json-format inputs + testGetNetworkAnnotationValue := func(t *testing.T, json bool) { + t.Helper() + + tests := []struct { + name string + specSelectors map[CephNetworkType]string + cephNets []CephNetworkType + want string + wantErr bool + }{ + { + name: "public want public", + specSelectors: map[CephNetworkType]string{ + "public": input1(json), + }, + cephNets: []CephNetworkType{CephNetworkPublic}, + want: `[{"name":"macvlan","namespace":"ns","interface":"net1"}]`, + wantErr: false, + }, + { + name: "cluster want cluster", + specSelectors: map[CephNetworkType]string{ + "cluster": input1(json), + }, + cephNets: []CephNetworkType{CephNetworkCluster}, + want: `[{"name":"macvlan","namespace":"ns","interface":"net1"}]`, + wantErr: false, + }, + { + name: "public want cluster", + specSelectors: map[CephNetworkType]string{ + "public": input1(json), + }, + cephNets: []CephNetworkType{CephNetworkCluster}, + want: ``, + wantErr: false, + }, + { + name: "cluster want public", + specSelectors: map[CephNetworkType]string{ + "cluster": input1(json), + }, + cephNets: []CephNetworkType{CephNetworkPublic}, + want: ``, + wantErr: false, + }, + { + name: "nothing want public", + specSelectors: map[CephNetworkType]string{}, + cephNets: []CephNetworkType{CephNetworkPublic}, + want: ``, + wantErr: false, + }, + { + name: "nothing want cluster", + specSelectors: map[CephNetworkType]string{}, + cephNets: []CephNetworkType{CephNetworkCluster}, + want: ``, + wantErr: false, + }, + { + name: "unknown want public", + specSelectors: map[CephNetworkType]string{ + "uncleKnown": input1(json), + }, + cephNets: []CephNetworkType{CephNetworkPublic}, + want: ``, + wantErr: false, + }, + { + name: "unknown want cluster", + specSelectors: map[CephNetworkType]string{ + "uncleKnown": input1(json), + }, + cephNets: []CephNetworkType{CephNetworkCluster}, + want: ``, + wantErr: false, + }, + { + name: "public want public and cluster", + specSelectors: map[CephNetworkType]string{ + "public": input1(json), + }, + cephNets: []CephNetworkType{CephNetworkPublic, CephNetworkCluster}, + want: `[{"name":"macvlan","namespace":"ns","interface":"net1"}]`, + wantErr: false, + }, + { + name: "cluster want public and cluster", + specSelectors: map[CephNetworkType]string{ + "cluster": input1(json), + }, + cephNets: []CephNetworkType{CephNetworkPublic, CephNetworkCluster}, + want: `[{"name":"macvlan","namespace":"ns","interface":"net1"}]`, + wantErr: false, + }, + { + name: "public and cluster want public and cluster", + specSelectors: map[CephNetworkType]string{ + "public": input1(json), + "cluster": input2(json), + }, + cephNets: []CephNetworkType{CephNetworkPublic, CephNetworkCluster}, + want: `[{"name":"macvlan","namespace":"ns","interface":"net1"},{"name":"macvlan","namespace":"ns","interface":"net2"}]`, + wantErr: false, + }, + { + name: "support mixed json-non-json spec", + specSelectors: map[CephNetworkType]string{ + "public": input1(json), + "cluster": input2(!json), // invert json-ness of this one + }, + cephNets: []CephNetworkType{CephNetworkPublic, CephNetworkCluster}, + want: `[{"name":"macvlan","namespace":"ns","interface":"net1"},{"name":"macvlan","namespace":"ns","interface":"net2"}]`, + wantErr: false, + }, + { + name: "public and cluster want nothing", + specSelectors: map[CephNetworkType]string{ + "public": input1(json), + "cluster": input2(json), + }, + cephNets: []CephNetworkType{}, + want: ``, + wantErr: false, + }, + { + name: "legacy single json object support", + specSelectors: map[CephNetworkType]string{ + "public": `{"name": "legacyJsonObject"}`, + }, + cephNets: []CephNetworkType{CephNetworkPublic, CephNetworkCluster}, + want: `[{"name":"legacyJsonObject","namespace":"ns"}]`, + wantErr: false, + }, + { + name: "invalid network selections", + specSelectors: map[CephNetworkType]string{ + "public": `[{"name": "jsonWithNoClosingBracket"}`, + "cluster": "multus%net", + }, + cephNets: []CephNetworkType{CephNetworkPublic, CephNetworkCluster}, + want: ``, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + n := &NetworkSpec{ + Selectors: tt.specSelectors, + } + + selections := []*nadv1.NetworkSelectionElement{} + errs := []error{} + for _, net := range tt.cephNets { + s, err := n.GetNetworkSelection("ns", net) + if err != nil { + errs = append(errs, err) + } + selections = append(selections, s) + } + got, err := NetworkSelectionsToAnnotationValue(selections...) + if err != nil { + errs = append(errs, err) + } + + assert.Equal(t, tt.wantErr, len(errs) > 0, "wantErr %v but got errs %v", tt.wantErr, errs) + assert.Equal(t, tt.want, got) + }) + } + } + + // Actual subtests + t.Run("non-JSON input", func(t *testing.T) { + testGetNetworkAnnotationValue(t, false) + }) + t.Run("JSON input", func(t *testing.T) { + testGetNetworkAnnotationValue(t, true) + }) +} diff --git a/pkg/apis/ceph.rook.io/v1/types.go b/pkg/apis/ceph.rook.io/v1/types.go index b2f428b661b1..7a63ba7f0086 100755 --- a/pkg/apis/ceph.rook.io/v1/types.go +++ b/pkg/apis/ceph.rook.io/v1/types.go @@ -2250,14 +2250,41 @@ type NetworkSpec struct { // Provider is what provides network connectivity to the cluster e.g. "host" or "multus" // +nullable // +optional - Provider string `json:"provider,omitempty"` + Provider NetworkProviderType `json:"provider,omitempty"` - // Selectors string values describe what networks will be used to connect the cluster. - // Meanwhile the keys describe each network respective responsibilities or any metadata - // storage provider decide. + // Selectors define NetworkAttachmentDefinitions to be used for Ceph public and/or cluster + // networks when the "multus" network provider is used. This config section is not used for + // other network providers. + // + // Valid keys are "public" and "cluster". Refer to Ceph networking documentation for more: + // https://docs.ceph.com/en/reef/rados/configuration/network-config-ref/ + // + // Refer to Multus network annotation documentation for help selecting values: + // https://github.com/k8snetworkplumbingwg/multus-cni/blob/master/docs/how-to-use.md#run-pod-with-network-annotation + // + // Rook will make a best-effort attempt to automatically detect CIDR address ranges for given + // network attachment definitions. Rook's methods are robust but may be imprecise for + // sufficiently complicated networks. Rook's auto-detection process obtains a new IP address + // lease for each CephCluster reconcile. If Rook fails to detect, incorrectly detects, only + // partially detects, or if underlying networks do not support reusing old IP addresses, it is + // best to use the 'addressRanges' config section to specify CIDR ranges for the Ceph cluster. + // + // As a contrived example, one can use a theoretical Kubernetes-wide network for Ceph client + // traffic and a theoretical Rook-only network for Ceph replication traffic as shown: + // selectors: + // public: "default/cluster-fast-net" + // cluster: "rook-ceph/ceph-backend-net" + // + // +nullable + // +optional + Selectors map[CephNetworkType]string `json:"selectors,omitempty"` + + // AddressRanges specify a list of CIDRs that Rook will apply to Ceph's 'public_network' and/or + // 'cluster_network' configurations. This config section may be used for the "host" or "multus" + // network providers. // +nullable // +optional - Selectors map[string]string `json:"selectors,omitempty"` + AddressRanges *AddressRangesSpec `json:"addressRanges,omitempty"` // Settings for network connections such as compression and encryption across the // wire. @@ -2284,6 +2311,45 @@ type NetworkSpec struct { MultiClusterService MultiClusterServiceSpec `json:"multiClusterService,omitempty"` } +// NetworkProviderType defines valid network providers for Rook. +// +kubebuilder:validation:Enum="";host;multus +type NetworkProviderType string + +const ( + NetworkProviderDefault = NetworkProviderType("") + NetworkProviderHost = NetworkProviderType("host") + NetworkProviderMultus = NetworkProviderType("multus") +) + +// CephNetworkType should be "public" or "cluster". +// Allow any string so that over-specified legacy clusters do not break on CRD update. +type CephNetworkType string + +const ( + CephNetworkPublic = CephNetworkType("public") + CephNetworkCluster = CephNetworkType("cluster") +) + +type AddressRangesSpec struct { + // Public defines a list of CIDRs to use for Ceph public network communication. + // +optional + Public CIDRList `json:"public"` + + // Cluster defines a list of CIDRs to use for Ceph cluster network communication. + // +optional + Cluster CIDRList `json:"cluster"` +} + +// An IPv4 or IPv6 network CIDR. +// +// This naive kubebuilder regex provides immediate feedback for some typos and for a common problem +// case where the range spec is forgotten (e.g., /24). Rook does in-depth validation in code. +// +kubebuilder:validation:Pattern=`^[0-9a-fA-F:.]{2,}\/[0-9]{1,3}$` +type CIDR string + +// A list of CIDRs. +type CIDRList []CIDR + type MultiClusterServiceSpec struct { // Enable multiClusterService to export the mon and OSD services to peer cluster. // Ensure that peer clusters are connected using an MCS API compatible application, diff --git a/pkg/apis/ceph.rook.io/v1/zz_generated.deepcopy.go b/pkg/apis/ceph.rook.io/v1/zz_generated.deepcopy.go index d2fc33cd05e5..2f8e4024c2fa 100644 --- a/pkg/apis/ceph.rook.io/v1/zz_generated.deepcopy.go +++ b/pkg/apis/ceph.rook.io/v1/zz_generated.deepcopy.go @@ -43,6 +43,32 @@ func (in *AMQPEndpointSpec) DeepCopy() *AMQPEndpointSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AddressRangesSpec) DeepCopyInto(out *AddressRangesSpec) { + *out = *in + if in.Public != nil { + in, out := &in.Public, &out.Public + *out = make(CIDRList, len(*in)) + copy(*out, *in) + } + if in.Cluster != nil { + in, out := &in.Cluster, &out.Cluster + *out = make(CIDRList, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AddressRangesSpec. +func (in *AddressRangesSpec) DeepCopy() *AddressRangesSpec { + if in == nil { + return nil + } + out := new(AddressRangesSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in Annotations) DeepCopyInto(out *Annotations) { { @@ -161,6 +187,26 @@ func (in *BucketTopicStatus) DeepCopy() *BucketTopicStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in CIDRList) DeepCopyInto(out *CIDRList) { + { + in := &in + *out = make(CIDRList, len(*in)) + copy(*out, *in) + return + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CIDRList. +func (in CIDRList) DeepCopy() CIDRList { + if in == nil { + return nil + } + out := new(CIDRList) + in.DeepCopyInto(out) + return *out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Capacity) DeepCopyInto(out *Capacity) { *out = *in @@ -3064,11 +3110,16 @@ func (in *NetworkSpec) DeepCopyInto(out *NetworkSpec) { *out = *in if in.Selectors != nil { in, out := &in.Selectors, &out.Selectors - *out = make(map[string]string, len(*in)) + *out = make(map[CephNetworkType]string, len(*in)) for key, val := range *in { (*out)[key] = val } } + if in.AddressRanges != nil { + in, out := &in.AddressRanges, &out.AddressRanges + *out = new(AddressRangesSpec) + (*in).DeepCopyInto(*out) + } if in.Connections != nil { in, out := &in.Connections, &out.Connections *out = new(ConnectionsSpec) diff --git a/pkg/operator/ceph/cluster/cluster.go b/pkg/operator/ceph/cluster/cluster.go index 5df5caaf3118..9ba83f9f6fd5 100755 --- a/pkg/operator/ceph/cluster/cluster.go +++ b/pkg/operator/ceph/cluster/cluster.go @@ -44,7 +44,6 @@ import ( "github.com/rook/rook/pkg/operator/k8sutil" rookversion "github.com/rook/rook/pkg/version" v1 "k8s.io/api/core/v1" - kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" ) @@ -290,34 +289,9 @@ func preClusterStartValidation(cluster *cluster) error { if err := validateStretchCluster(cluster); err != nil { return err } - if cluster.Spec.Network.IsMultus() { - _, isPublic := cluster.Spec.Network.Selectors[config.PublicNetworkSelectorKeyName] - _, isCluster := cluster.Spec.Network.Selectors[config.ClusterNetworkSelectorKeyName] - if !isPublic && !isCluster { - return errors.New("both network selector values for public and cluster selector cannot be empty for multus provider") - } - - for _, selector := range config.NetworkSelectors { - // If one selector is empty, we continue - // This means a single interface is used both public and cluster network - if _, ok := cluster.Spec.Network.Selectors[selector]; !ok { - continue - } - multusNamespace, nad := config.GetMultusNamespace(cluster.Spec.Network.Selectors[selector]) - if multusNamespace == "" { - multusNamespace = cluster.Namespace - } - - // Get network attachment definition - _, err := cluster.context.NetworkClient.NetworkAttachmentDefinitions(multusNamespace).Get(cluster.ClusterInfo.Context, nad, metav1.GetOptions{}) - if err != nil { - if kerrors.IsNotFound(err) { - return errors.Wrapf(err, "specified network attachment definition for selector %q does not exist", selector) - } - return errors.Wrapf(err, "failed to fetch network attachment definition for selector %q", selector) - } - } + if err := cephv1.ValidateNetworkSpec(cluster.Namespace, cluster.Spec.Network); err != nil { + return errors.Wrapf(err, "failed to validate network spec for cluster in namespace %q", cluster.Namespace) } // Validate on-PVC cluster encryption KMS settings @@ -597,7 +571,7 @@ func (c *cluster) reportTelemetry() { telemetry.ReportKeyValue(c.context, c.ClusterInfo, telemetry.DeviceSetNonPortableKey, strconv.Itoa(nonportableDeviceSets)) // Set the telemetry for network settings - telemetry.ReportKeyValue(c.context, c.ClusterInfo, telemetry.NetworkProviderKey, c.Spec.Network.Provider) + telemetry.ReportKeyValue(c.context, c.ClusterInfo, telemetry.NetworkProviderKey, string(c.Spec.Network.Provider)) // Set the telemetry for external cluster settings telemetry.ReportKeyValue(c.context, c.ClusterInfo, telemetry.ExternalModeEnabledKey, strconv.FormatBool(c.Spec.External.Enable)) diff --git a/pkg/operator/ceph/cluster/mgr/spec.go b/pkg/operator/ceph/cluster/mgr/spec.go index 50395db20795..1e36dbe34d75 100644 --- a/pkg/operator/ceph/cluster/mgr/spec.go +++ b/pkg/operator/ceph/cluster/mgr/spec.go @@ -97,7 +97,7 @@ func (c *Cluster) makeDeployment(mgrConfig *mgrConfig) (*apps.Deployment, error) if c.spec.Network.IsHost() { podSpec.Spec.DNSPolicy = v1.DNSClusterFirstWithHostNet } else if c.spec.Network.IsMultus() { - if err := k8sutil.ApplyMultus(c.spec.Network, &podSpec.ObjectMeta); err != nil { + if err := k8sutil.ApplyMultus(c.clusterInfo.Namespace, &c.spec.Network, &podSpec.ObjectMeta); err != nil { return nil, err } podSpec.Spec.Containers = append(podSpec.Spec.Containers, c.makeCmdProxySidecarContainer(mgrConfig)) diff --git a/pkg/operator/ceph/cluster/mon/mon.go b/pkg/operator/ceph/cluster/mon/mon.go index f6a7e2b6ccff..35d28222f7b2 100644 --- a/pkg/operator/ceph/cluster/mon/mon.go +++ b/pkg/operator/ceph/cluster/mon/mon.go @@ -105,7 +105,7 @@ type Cluster struct { spec cephv1.ClusterSpec Namespace string Keyring string - rookVersion string + rookImage string orchestrationMutex sync.Mutex Port int32 maxMonID int @@ -170,7 +170,7 @@ func (c *Cluster) MaxMonID() int { } // Start begins the process of running a cluster of Ceph mons. -func (c *Cluster) Start(clusterInfo *cephclient.ClusterInfo, rookVersion string, cephVersion cephver.CephVersion, spec cephv1.ClusterSpec) (*cephclient.ClusterInfo, error) { +func (c *Cluster) Start(clusterInfo *cephclient.ClusterInfo, rookImage string, cephVersion cephver.CephVersion, spec cephv1.ClusterSpec) (*cephclient.ClusterInfo, error) { // Only one goroutine can orchestrate the mons at a time c.acquireOrchestrationLock() defer c.releaseOrchestrationLock() @@ -180,7 +180,7 @@ func (c *Cluster) Start(clusterInfo *cephclient.ClusterInfo, rookVersion string, if c.ClusterInfo.Context == nil { panic("nil context") } - c.rookVersion = rookVersion + c.rookImage = rookImage c.spec = spec // fail if we were instructed to deploy more than one mon on the same machine with host networking @@ -287,6 +287,11 @@ func (c *Cluster) startMons(targetCount int) error { } } + // apply network settings after mons have been created b/c they are set in the mon k-v store + if err := controller.ApplyCephNetworkSettings(c.ClusterInfo.Context, c.rookImage, c.context, &c.spec, c.ClusterInfo); err != nil { + return errors.Wrap(err, "failed to apply ceph network settings") + } + if c.spec.IsStretchCluster() { if err := c.configureStretchCluster(mons); err != nil { return errors.Wrap(err, "failed to configure stretch mons") @@ -647,7 +652,7 @@ func scheduleMonitor(c *Cluster, mon *monConfig) (*apps.Deployment, error) { // avoid issues with the real deployment, the canary should be careful not // to modify the storage by instead running an innocuous command. d.Spec.Template.Spec.InitContainers = []v1.Container{} - d.Spec.Template.Spec.Containers[0].Image = c.rookVersion + d.Spec.Template.Spec.Containers[0].Image = c.rookImage d.Spec.Template.Spec.Containers[0].Command = []string{"sleep"} // sleep responds to signals so we don't need to wrap it d.Spec.Template.Spec.Containers[0].Args = []string{"3600"} // remove the startup and liveness probes on the canary pod diff --git a/pkg/operator/ceph/cluster/mon/mon_test.go b/pkg/operator/ceph/cluster/mon/mon_test.go index 66f305c50be8..6194ae09218f 100644 --- a/pkg/operator/ceph/cluster/mon/mon_test.go +++ b/pkg/operator/ceph/cluster/mon/mon_test.go @@ -104,7 +104,7 @@ func newCluster(context *clusterd.Context, namespace string, allowMultiplePerNod ClusterInfo: nil, context: context, Namespace: namespace, - rookVersion: "myversion", + rookImage: "myversion", spec: cephv1.ClusterSpec{ Mon: cephv1.MonSpec{ Count: 3, @@ -123,11 +123,11 @@ func newCluster(context *clusterd.Context, namespace string, allowMultiplePerNod } // setCommonMonProperties is a convenience helper for setting common test properties -func setCommonMonProperties(c *Cluster, currentMons int, mon cephv1.MonSpec, rookVersion string) { +func setCommonMonProperties(c *Cluster, currentMons int, mon cephv1.MonSpec, rookImage string) { c.ClusterInfo = clienttest.CreateTestClusterInfo(currentMons) c.spec.Mon.Count = mon.Count c.spec.Mon.AllowMultiplePerNode = mon.AllowMultiplePerNode - c.rookVersion = rookVersion + c.rookImage = rookImage } func TestResourceName(t *testing.T) { @@ -186,7 +186,7 @@ func TestStartMonPods(t *testing.T) { } // start a basic cluster - _, err = c.Start(c.ClusterInfo, c.rookVersion, cephver.Quincy, c.spec) + _, err = c.Start(c.ClusterInfo, c.rookImage, cephver.Quincy, c.spec) assert.NoError(t, err) // test annotations @@ -197,7 +197,7 @@ func TestStartMonPods(t *testing.T) { validateStart(t, c) // starting again should be a no-op - _, err = c.Start(c.ClusterInfo, c.rookVersion, cephver.Quincy, c.spec) + _, err = c.Start(c.ClusterInfo, c.rookImage, cephver.Quincy, c.spec) assert.NoError(t, err) validateStart(t, c) @@ -211,7 +211,7 @@ func TestOperatorRestart(t *testing.T) { c.ClusterInfo = clienttest.CreateTestClusterInfo(1) // start a basic cluster - info, err := c.Start(c.ClusterInfo, c.rookVersion, cephver.Quincy, c.spec) + info, err := c.Start(c.ClusterInfo, c.rookImage, cephver.Quincy, c.spec) assert.NoError(t, err) assert.NoError(t, info.IsInitialized()) @@ -221,7 +221,7 @@ func TestOperatorRestart(t *testing.T) { c.ClusterInfo = clienttest.CreateTestClusterInfo(1) // starting again should be a no-op, but will not result in an error - info, err = c.Start(c.ClusterInfo, c.rookVersion, cephver.Quincy, c.spec) + info, err = c.Start(c.ClusterInfo, c.rookImage, cephver.Quincy, c.spec) assert.NoError(t, err) assert.NoError(t, info.IsInitialized()) @@ -239,7 +239,7 @@ func TestOperatorRestartHostNetwork(t *testing.T) { c.ClusterInfo = clienttest.CreateTestClusterInfo(1) // start a basic cluster - info, err := c.Start(c.ClusterInfo, c.rookVersion, cephver.Quincy, c.spec) + info, err := c.Start(c.ClusterInfo, c.rookImage, cephver.Quincy, c.spec) assert.NoError(t, err) assert.NoError(t, info.IsInitialized()) @@ -251,7 +251,7 @@ func TestOperatorRestartHostNetwork(t *testing.T) { c.ClusterInfo = clienttest.CreateTestClusterInfo(1) // starting again should be a no-op, but still results in an error - info, err = c.Start(c.ClusterInfo, c.rookVersion, cephver.Quincy, c.spec) + info, err = c.Start(c.ClusterInfo, c.rookImage, cephver.Quincy, c.spec) assert.NoError(t, err) assert.NoError(t, info.IsInitialized(), info) diff --git a/pkg/operator/ceph/cluster/mon/node_test.go b/pkg/operator/ceph/cluster/mon/node_test.go index 67549859da7b..9c555b837d1f 100644 --- a/pkg/operator/ceph/cluster/mon/node_test.go +++ b/pkg/operator/ceph/cluster/mon/node_test.go @@ -86,7 +86,7 @@ func TestHostNetworkSameNode(t *testing.T) { c.ClusterInfo = clienttest.CreateTestClusterInfo(1) // start a basic cluster - _, err = c.Start(c.ClusterInfo, c.rookVersion, cephver.Quincy, c.spec) + _, err = c.Start(c.ClusterInfo, c.rookImage, cephver.Quincy, c.spec) assert.Error(t, err) } @@ -104,7 +104,7 @@ func TestPodMemory(t *testing.T) { c := newCluster(context, namespace, true, r) c.ClusterInfo = clienttest.CreateTestClusterInfo(1) // start a basic cluster - _, err = c.Start(c.ClusterInfo, c.rookVersion, cephver.Quincy, c.spec) + _, err = c.Start(c.ClusterInfo, c.rookImage, cephver.Quincy, c.spec) assert.NoError(t, err) // Test REQUEST == LIMIT @@ -120,7 +120,7 @@ func TestPodMemory(t *testing.T) { c = newCluster(context, namespace, true, r) c.ClusterInfo = clienttest.CreateTestClusterInfo(1) // start a basic cluster - _, err = c.Start(c.ClusterInfo, c.rookVersion, cephver.Quincy, c.spec) + _, err = c.Start(c.ClusterInfo, c.rookImage, cephver.Quincy, c.spec) assert.NoError(t, err) // Test LIMIT != REQUEST but obviously LIMIT > REQUEST @@ -136,7 +136,7 @@ func TestPodMemory(t *testing.T) { c = newCluster(context, namespace, true, r) c.ClusterInfo = clienttest.CreateTestClusterInfo(1) // start a basic cluster - _, err = c.Start(c.ClusterInfo, c.rookVersion, cephver.Quincy, c.spec) + _, err = c.Start(c.ClusterInfo, c.rookImage, cephver.Quincy, c.spec) assert.NoError(t, err) // Test valid case where pod resource is set appropriately @@ -152,7 +152,7 @@ func TestPodMemory(t *testing.T) { c = newCluster(context, namespace, true, r) c.ClusterInfo = clienttest.CreateTestClusterInfo(1) // start a basic cluster - _, err = c.Start(c.ClusterInfo, c.rookVersion, cephver.Quincy, c.spec) + _, err = c.Start(c.ClusterInfo, c.rookImage, cephver.Quincy, c.spec) assert.NoError(t, err) // Test no resources were specified on the pod @@ -160,7 +160,7 @@ func TestPodMemory(t *testing.T) { c = newCluster(context, namespace, true, r) c.ClusterInfo = clienttest.CreateTestClusterInfo(1) // start a basic cluster - _, err = c.Start(c.ClusterInfo, c.rookVersion, cephver.Quincy, c.spec) + _, err = c.Start(c.ClusterInfo, c.rookImage, cephver.Quincy, c.spec) assert.NoError(t, err) } diff --git a/pkg/operator/ceph/cluster/mon/spec.go b/pkg/operator/ceph/cluster/mon/spec.go index 06491262124b..5f9514614d34 100644 --- a/pkg/operator/ceph/cluster/mon/spec.go +++ b/pkg/operator/ceph/cluster/mon/spec.go @@ -212,7 +212,13 @@ func (c *Cluster) makeMonPod(monConfig *monConfig, canary bool) (*corev1.Pod, er if monConfig.UseHostNetwork { pod.Spec.DNSPolicy = corev1.DNSClusterFirstWithHostNet } else if c.spec.Network.IsMultus() { - if err := k8sutil.ApplyMultus(c.spec.Network, &pod.ObjectMeta); err != nil { + cluster := cephv1.CephCluster{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: c.Namespace, + }, + Spec: c.spec, + } + if err := k8sutil.ApplyMultus(cluster.GetNamespace(), &cluster.Spec.Network, &pod.ObjectMeta); err != nil { return nil, err } } diff --git a/pkg/operator/ceph/cluster/osd/key_rotation.go b/pkg/operator/ceph/cluster/osd/key_rotation.go index f5c2e4f316a5..b1af021af1d8 100644 --- a/pkg/operator/ceph/cluster/osd/key_rotation.go +++ b/pkg/operator/ceph/cluster/osd/key_rotation.go @@ -166,7 +166,7 @@ func (c *Cluster) getKeyRotationPodTemplateSpec(osdProps osdProperties, osd OSDI if c.spec.Network.IsHost() { podTemplateSpec.Spec.DNSPolicy = v1.DNSClusterFirstWithHostNet } else if c.spec.Network.IsMultus() { - if err := k8sutil.ApplyMultus(c.spec.Network, &podTemplateSpec.ObjectMeta); err != nil { + if err := k8sutil.ApplyMultus(c.clusterInfo.Namespace, &c.spec.Network, &podTemplateSpec.ObjectMeta); err != nil { return nil, err } } diff --git a/pkg/operator/ceph/cluster/osd/spec.go b/pkg/operator/ceph/cluster/osd/spec.go index 0cf055427cb3..610afea95e9b 100644 --- a/pkg/operator/ceph/cluster/osd/spec.go +++ b/pkg/operator/ceph/cluster/osd/spec.go @@ -639,7 +639,7 @@ func (c *Cluster) makeDeployment(osdProps osdProperties, osd OSDInfo, provisionC if c.spec.Network.IsHost() { podTemplateSpec.Spec.DNSPolicy = v1.DNSClusterFirstWithHostNet } else if c.spec.Network.IsMultus() { - if err := k8sutil.ApplyMultus(c.spec.Network, &podTemplateSpec.ObjectMeta); err != nil { + if err := k8sutil.ApplyMultus(c.clusterInfo.Namespace, &c.spec.Network, &podTemplateSpec.ObjectMeta); err != nil { return nil, err } } diff --git a/pkg/operator/ceph/cluster/rbd/spec.go b/pkg/operator/ceph/cluster/rbd/spec.go index 6abd327128d7..35ba7bae831f 100644 --- a/pkg/operator/ceph/cluster/rbd/spec.go +++ b/pkg/operator/ceph/cluster/rbd/spec.go @@ -66,7 +66,7 @@ func (r *ReconcileCephRBDMirror) makeDeployment(daemonConfig *daemonConfig, rbdM if r.cephClusterSpec.Network.IsHost() { podSpec.Spec.DNSPolicy = v1.DNSClusterFirstWithHostNet } else if r.cephClusterSpec.Network.IsMultus() { - if err := k8sutil.ApplyMultus(r.cephClusterSpec.Network, &podSpec.ObjectMeta); err != nil { + if err := k8sutil.ApplyMultus(r.clusterInfo.Namespace, &r.cephClusterSpec.Network, &podSpec.ObjectMeta); err != nil { return nil, err } } diff --git a/pkg/operator/ceph/config/config.go b/pkg/operator/ceph/config/config.go index f61b2e384426..5e11bd6a0a0f 100644 --- a/pkg/operator/ceph/config/config.go +++ b/pkg/operator/ceph/config/config.go @@ -137,20 +137,6 @@ func SetOrRemoveDefaultConfigs( } } - // Apply Multus if needed - if clusterSpec.Network.IsMultus() { - logger.Info("configuring ceph network(s) with multus") - cephNetworks, err := generateNetworkSettings(clusterInfo.Context, context, clusterInfo.Namespace, clusterSpec.Network.Selectors) - if err != nil { - return errors.Wrap(err, "failed to generate network settings") - } - - // Apply ceph network settings to the mon config store - if err := monStore.SetAll("global", cephNetworks); err != nil { - return errors.Wrap(err, "failed to network config overrides") - } - } - // This section will remove any previously configured option(s) from the mon centralized store // This is useful for scenarios where options are not needed anymore and we just want to reset to internal's default // On upgrade, the flag will be removed diff --git a/pkg/operator/ceph/config/monstore.go b/pkg/operator/ceph/config/monstore.go index 854ef743701e..5abc9d5cd822 100644 --- a/pkg/operator/ceph/config/monstore.go +++ b/pkg/operator/ceph/config/monstore.go @@ -56,8 +56,22 @@ type Option struct { Value string } +// SetIfChanged sets a config in the centralized mon configuration database if the config has +// changed value. +// https://docs.ceph.com/docs/master/rados/configuration/ceph-conf/#monitor-configuration-database +// +// There is a bug through at least Ceph v18 where `ceph config get global