From 85feccd5e18b1e52a5176922d116de5ec5268d4a Mon Sep 17 00:00:00 2001 From: evgenLevin Date: Tue, 3 Sep 2024 16:39:50 -0400 Subject: [PATCH 01/59] Refactor some conformance tests to utilize SRIOV_NODE_AND_DEVICE_NAME_FILTER variable --- test/conformance/tests/test_sriov_operator.go | 6 ++++-- test/util/cluster/cluster.go | 9 +++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index b665c99f0..23f477563 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -1060,9 +1060,11 @@ var _ = Describe("[sriov] operator", func() { findSriovDevice := func(vendorID, deviceID string) (string, sriovv1.InterfaceExt) { for _, node := range sriovInfos.Nodes { - for _, nic := range sriovInfos.States[node].Status.Interfaces { + devices, err := sriovInfos.FindSriovDevices(node) + Expect(err).ToNot(HaveOccurred()) + for _, nic := range devices { if vendorID != "" && deviceID != "" && nic.Vendor == vendorID && nic.DeviceID == deviceID { - return node, nic + return node, *nic } } } diff --git a/test/util/cluster/cluster.go b/test/util/cluster/cluster.go index b79e61ad2..e0cd2e45b 100644 --- a/test/util/cluster/cluster.go +++ b/test/util/cluster/cluster.go @@ -203,9 +203,14 @@ func (n *EnabledNodes) FindOneSriovNodeAndDevice() (string, *sriovv1.InterfaceEx // FindOneVfioSriovDevice retrieves a node with a valid sriov device for vfio func (n *EnabledNodes) FindOneVfioSriovDevice() (string, sriovv1.InterfaceExt) { for _, node := range n.Nodes { - for _, nic := range n.States[node].Status.Interfaces { + devices, err := n.FindSriovDevices(node) + if err != nil { + return "", sriovv1.InterfaceExt{} + } + + for _, nic := range devices { if nic.Vendor == intelVendorID && sriovv1.IsSupportedModel(nic.Vendor, nic.DeviceID) && nic.TotalVfs != 0 { - return node, nic + return node, *nic } } } From 91e04f6a00febea8efad5c0e50511f0327344be1 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Wed, 10 Jul 2024 16:09:20 +0200 Subject: [PATCH 02/59] metrics: Add PrometheusRule for namespaced metrics PrometheusRules allow recording pre-defined queries. Use `sriov_kubepoddevice` metric to add `pod|namespace` pair to the sriov metrics. Feature is enabled via the `METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULE` environment variable. Signed-off-by: Andrea Panattoni --- .../metrics-prometheus-rule.yaml | 38 +++++ controllers/sriovoperatorconfig_controller.go | 1 + .../sriovoperatorconfig_controller_test.go | 10 ++ deploy/operator.yaml | 2 + deploy/role.yaml | 1 + .../sriov-network-operator-chart/README.md | 1 + .../templates/operator.yaml | 2 + .../templates/role.yaml | 1 + .../sriov-network-operator-chart/values.yaml | 1 + hack/run-e2e-conformance-virtual-ocp.sh | 1 + .../tests/test_exporter_metrics.go | 68 ++++++++- ...monitoring.coreos.com_prometheusrules.yaml | 142 ++++++++++++++++++ 12 files changed, 265 insertions(+), 3 deletions(-) create mode 100644 bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml create mode 100644 test/util/crds/monitoring.coreos.com_prometheusrules.yaml diff --git a/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml b/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml new file mode 100644 index 000000000..efd760113 --- /dev/null +++ b/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml @@ -0,0 +1,38 @@ +--- +{{ if and .IsPrometheusOperatorInstalled .PrometheusOperatorDeployRules }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: sriov-vf-rules + namespace: {{.Namespace}} +spec: + groups: + - name: sriov-network-metrics-operator.rules + interval: 30s + rules: + - expr: | + sriov_vf_tx_packets * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_tx_packets + - expr: | + sriov_vf_rx_packets * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_rx_packets + - expr: | + sriov_vf_tx_bytes * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_tx_bytes + - expr: | + sriov_vf_rx_bytes * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_rx_bytes + - expr: | + sriov_vf_tx_dropped * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_tx_dropped + - expr: | + sriov_vf_rx_dropped * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_rx_dropped + - expr: | + sriov_vf_rx_broadcast * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_rx_broadcast + - expr: | + sriov_vf_rx_multicast * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_rx_multicast +{{ end }} + diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index 8d028d8eb..1121b623f 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -241,6 +241,7 @@ func (r *SriovOperatorConfigReconciler) syncMetricsExporter(ctx context.Context, data.Data["IsOpenshift"] = r.PlatformHelper.IsOpenshiftCluster() data.Data["IsPrometheusOperatorInstalled"] = strings.ToLower(os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED")) == trueString + data.Data["PrometheusOperatorDeployRules"] = strings.ToLower(os.Getenv("METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES")) == trueString data.Data["PrometheusOperatorServiceAccount"] = os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT") data.Data["PrometheusOperatorNamespace"] = os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE") diff --git a/controllers/sriovoperatorconfig_controller_test.go b/controllers/sriovoperatorconfig_controller_test.go index 582d9781d..cff8ca7c8 100644 --- a/controllers/sriovoperatorconfig_controller_test.go +++ b/controllers/sriovoperatorconfig_controller_test.go @@ -368,6 +368,8 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { It("should deploy extra configuration when the Prometheus operator is installed", func() { DeferCleanup(os.Setenv, "METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED", os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED")) os.Setenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED", "true") + DeferCleanup(os.Setenv, "METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES", os.Getenv("METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES")) + os.Setenv("METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES", "true") err := util.WaitForNamespacedObject(&rbacv1.Role{}, k8sClient, testNamespace, "prometheus-k8s", util.RetryInterval, util.APITimeout) Expect(err).ToNot(HaveOccurred()) @@ -382,6 +384,14 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { Version: "v1", }, client.ObjectKey{Namespace: testNamespace, Name: "sriov-network-metrics-exporter"}) + + assertResourceExists( + schema.GroupVersionKind{ + Group: "monitoring.coreos.com", + Kind: "PrometheusRule", + Version: "v1", + }, + client.ObjectKey{Namespace: testNamespace, Name: "sriov-vf-rules"}) }) }) }) diff --git a/deploy/operator.yaml b/deploy/operator.yaml index b2aa302ab..e9fb25de3 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -78,6 +78,8 @@ spec: value: $METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE - name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED value: "$METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED" + - name: METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES + value: "$METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES" - name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT value: $METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT - name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE diff --git a/deploy/role.yaml b/deploy/role.yaml index a24f13729..d03c47e21 100644 --- a/deploy/role.yaml +++ b/deploy/role.yaml @@ -29,6 +29,7 @@ rules: - monitoring.coreos.com resources: - servicemonitors + - prometheusrules verbs: - get - create diff --git a/deployment/sriov-network-operator-chart/README.md b/deployment/sriov-network-operator-chart/README.md index 40b4e92a9..778726677 100644 --- a/deployment/sriov-network-operator-chart/README.md +++ b/deployment/sriov-network-operator-chart/README.md @@ -89,6 +89,7 @@ We have introduced the following Chart parameters. | `operator.metricsExporter.prometheusOperator.enabled` | bool | false | Wheter the operator shoud configure Prometheus resources or not (e.g. `ServiceMonitors`). | | `operator.metricsExporter.prometheusOperator.serviceAccount` | string | `prometheus-k8s` | The service account used by the Prometheus Operator. This is used to give Prometheus the permission to list resource in the SR-IOV operator namespace | | `operator.metricsExporter.prometheusOperator.namespace` | string | `monitoring` | The namespace where the Prometheus Operator is installed. Setting this variable makes the operator deploy `monitoring.coreos.com` resources. | +| `operator.metricsExporter.prometheusOperator.deployRules` | bool | false | Whether the operator should deploy `PrometheusRules` to scrape namespace version of metrics. | #### Admission Controllers parameters diff --git a/deployment/sriov-network-operator-chart/templates/operator.yaml b/deployment/sriov-network-operator-chart/templates/operator.yaml index 12a9cc660..0e89d1959 100644 --- a/deployment/sriov-network-operator-chart/templates/operator.yaml +++ b/deployment/sriov-network-operator-chart/templates/operator.yaml @@ -83,6 +83,8 @@ spec: {{- if .Values.operator.metricsExporter.prometheusOperator.enabled }} - name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED value: {{ .Values.operator.metricsExporter.prometheusOperator.enabled | quote}} + - name: METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES + value: {{ .Values.operator.metricsExporter.prometheusOperator.deployRules | quote}} - name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT value: {{ .Values.operator.metricsExporter.prometheusOperator.serviceAccount }} - name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE diff --git a/deployment/sriov-network-operator-chart/templates/role.yaml b/deployment/sriov-network-operator-chart/templates/role.yaml index 29cf80cce..28c5ff175 100644 --- a/deployment/sriov-network-operator-chart/templates/role.yaml +++ b/deployment/sriov-network-operator-chart/templates/role.yaml @@ -32,6 +32,7 @@ rules: - monitoring.coreos.com resources: - servicemonitors + - prometheusrules verbs: - get - create diff --git a/deployment/sriov-network-operator-chart/values.yaml b/deployment/sriov-network-operator-chart/values.yaml index e1c31b82d..8c6fea3a1 100644 --- a/deployment/sriov-network-operator-chart/values.yaml +++ b/deployment/sriov-network-operator-chart/values.yaml @@ -35,6 +35,7 @@ operator: enabled: false serviceAccount: "prometheus-k8s" namespace: "monitoring" + deployRules: false admissionControllers: enabled: false certificates: diff --git a/hack/run-e2e-conformance-virtual-ocp.sh b/hack/run-e2e-conformance-virtual-ocp.sh index a61906fb2..0092fcdad 100755 --- a/hack/run-e2e-conformance-virtual-ocp.sh +++ b/hack/run-e2e-conformance-virtual-ocp.sh @@ -191,6 +191,7 @@ export DEV_MODE=TRUE export CLUSTER_HAS_EMULATED_PF=TRUE export OPERATOR_LEADER_ELECTION_ENABLE=true export METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED=true +export METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULE=true export METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT:-"prometheus-k8s"} export METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE:-"openshfit-monitoring"} diff --git a/test/conformance/tests/test_exporter_metrics.go b/test/conformance/tests/test_exporter_metrics.go index e81f63067..804432f04 100644 --- a/test/conformance/tests/test_exporter_metrics.go +++ b/test/conformance/tests/test_exporter_metrics.go @@ -2,9 +2,12 @@ package tests import ( "context" + "encoding/json" "fmt" + "net/url" "strings" + sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/cluster" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/discovery" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces" @@ -13,6 +16,7 @@ import ( dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" + "github.com/prometheus/common/model" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -22,6 +26,8 @@ import ( ) var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { + var node string + var nic *sriovv1.InterfaceExt BeforeAll(func() { if cluster.VirtualCluster() { @@ -48,13 +54,11 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { Expect(err).ToNot(HaveOccurred()) WaitForSRIOVStable() - }) - It("collects metrics regarding receiving traffic via VF", func() { sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace) Expect(err).ToNot(HaveOccurred()) - node, nic, err := sriovInfos.FindOneSriovNodeAndDevice() + node, nic, err = sriovInfos.FindOneSriovNodeAndDevice() Expect(err).ToNot(HaveOccurred()) By("Using device " + nic.Name + " on node " + node) @@ -65,7 +69,13 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { Expect(err).ToNot(HaveOccurred()) waitForNetAttachDef("test-me-network", namespaces.Test) + DeferCleanup(namespaces.Clean, operatorNamespace, namespaces.Test, clients, discovery.Enabled()) + }) + + It("collects metrics regarding receiving traffic via VF", func() { + pod := createTestPod(node, []string{"test-me-network"}) + DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) ips, err := network.GetSriovNicIPs(pod, "net1") Expect(err).ToNot(HaveOccurred()) @@ -88,6 +98,28 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { Expect(finalRxPackets).Should(BeNumerically(">", initialRxPackets)) }) + It("PrometheusRule should provide namespaced metrics", func() { + pod := createTestPod(node, []string{"test-me-network"}) + DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) + + namespacedMetricNames := []string{ + "network:sriov_vf_rx_bytes", + "network:sriov_vf_tx_bytes", + "network:sriov_vf_rx_packets", + "network:sriov_vf_tx_packets", + "network:sriov_vf_rx_dropped", + "network:sriov_vf_tx_dropped", + "network:sriov_vf_rx_broadcast", + "network:sriov_vf_rx_multicast", + } + + Eventually(func(g Gomega) { + for _, metricName := range namespacedMetricNames { + values := runPromQLQuery(fmt.Sprintf(`%s{namespace="%s",pod="%s"}`, metricName, pod.Namespace, pod.Name)) + g.Expect(values).ToNot(BeEmpty(), "no value for metric %s", metricName) + } + }, "40s", "1s").Should(Succeed()) + }) }) func getMetricsForNode(nodeName string) map[string]*dto.MetricFamily { @@ -185,3 +217,33 @@ func areLabelsMatching(labels []*dto.LabelPair, labelsToMatch map[string]string) return true } + +func runPromQLQuery(query string) model.Vector { + prometheusPods, err := clients.Pods("").List(context.Background(), metav1.ListOptions{ + LabelSelector: "app.kubernetes.io/component=prometheus", + }) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + ExpectWithOffset(1, prometheusPods.Items).ToNot(HaveLen(0), "At least one Prometheus operator pod expected") + + prometheusPod := prometheusPods.Items[0] + + url := fmt.Sprintf("localhost:9090/api/v1/query?%s", (url.Values{"query": []string{query}}).Encode()) + command := []string{"curl", url} + stdout, stderr, err := pod.ExecCommand(clients, &prometheusPod, command...) + ExpectWithOffset(1, err).ToNot(HaveOccurred(), + "promQL query failed: [%s/%s] command: [%v]\nstdout: %s\nstderr: %s", prometheusPod.Namespace, prometheusPod.Name, command, stdout, stderr) + + result := struct { + Status string `json:"status"` + Data struct { + ResultType string `json:"resultType"` + Result model.Vector `json:"result"` + } `json:"data"` + }{} + + json.Unmarshal([]byte(stdout), &result) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + ExpectWithOffset(1, result.Status).To(Equal("success"), "cURL for [%s] failed: %s", url, stdout) + + return result.Data.Result +} diff --git a/test/util/crds/monitoring.coreos.com_prometheusrules.yaml b/test/util/crds/monitoring.coreos.com_prometheusrules.yaml new file mode 100644 index 000000000..6c16e8396 --- /dev/null +++ b/test/util/crds/monitoring.coreos.com_prometheusrules.yaml @@ -0,0 +1,142 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.15.0 + operator.prometheus.io/version: 0.75.1 + name: prometheusrules.monitoring.coreos.com +spec: + group: monitoring.coreos.com + names: + categories: + - prometheus-operator + kind: PrometheusRule + listKind: PrometheusRuleList + plural: prometheusrules + shortNames: + - promrule + singular: prometheusrule + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: |- + The `PrometheusRule` custom resource definition (CRD) defines [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) and [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) rules to be evaluated by `Prometheus` or `ThanosRuler` objects. + + + `Prometheus` and `ThanosRuler` objects select `PrometheusRule` objects using label and namespace selectors. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Specification of desired alerting rule definitions for Prometheus. + properties: + groups: + description: Content of Prometheus rule file + items: + description: RuleGroup is a list of sequentially evaluated recording + and alerting rules. + properties: + interval: + description: Interval determines how often rules in the group + are evaluated. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + limit: + description: |- + Limit the number of alerts an alerting rule and series a recording + rule can produce. + Limit is supported starting with Prometheus >= 2.31 and Thanos Ruler >= 0.24. + type: integer + name: + description: Name of the rule group. + minLength: 1 + type: string + partial_response_strategy: + description: |- + PartialResponseStrategy is only used by ThanosRuler and will + be ignored by Prometheus instances. + More info: https://github.com/thanos-io/thanos/blob/main/docs/components/rule.md#partial-response + pattern: ^(?i)(abort|warn)?$ + type: string + rules: + description: List of alerting and recording rules. + items: + description: |- + Rule describes an alerting or recording rule + See Prometheus documentation: [alerting](https://www.prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) or [recording](https://www.prometheus.io/docs/prometheus/latest/configuration/recording_rules/#recording-rules) rule + properties: + alert: + description: |- + Name of the alert. Must be a valid label value. + Only one of `record` and `alert` must be set. + type: string + annotations: + additionalProperties: + type: string + description: |- + Annotations to add to each alert. + Only valid for alerting rules. + type: object + expr: + anyOf: + - type: integer + - type: string + description: PromQL expression to evaluate. + x-kubernetes-int-or-string: true + for: + description: Alerts are considered firing once they have + been returned for this long. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + keep_firing_for: + description: KeepFiringFor defines how long an alert will + continue firing after the condition that triggered it + has cleared. + minLength: 1 + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + labels: + additionalProperties: + type: string + description: Labels to add or overwrite. + type: object + record: + description: |- + Name of the time series to output to. Must be a valid metric name. + Only one of `record` and `alert` must be set. + type: string + required: + - expr + type: object + type: array + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + required: + - spec + type: object + served: true + storage: true From b49cf15cb3718a5834dd26cbc6ea1ecfc6014383 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Wed, 28 Aug 2024 17:27:02 +0200 Subject: [PATCH 03/59] metrics: Add permissions to remove monitor objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the `metricsExporter` feature is turned off, deployed resources should be removed. These changes fix the error: ``` │ 2024-08-28T14:07:57.699760017Z ERROR controller/controller.go:266 Reconciler error {"controller": "sriovoperatorconfig", "controllerGroup": "sriovnetwork.openshift.io", "controllerKind": "SriovOperatorConfig", "SriovOperatorConfig": {"name":"default","namespace":"openshift-sriov-network-operator"}, │ │ "namespace": "openshift-sriov-network-operator", "name": "default", "reconcileID": "fa841c50-dbb8-4c4c-9ddd-b98624fd2a24", "error": "failed to delete object &{map[apiVersion:monitoring.coreos.com/v1 kind:ServiceMonitor metadata:map[name:sriov-network-metrics-exporter namespace:openshift-sriov-network-operator] │ │ spec:map[endpoints:[map[bearerTokenFile:/var/run/secrets/kubernetes.io/serviceaccount/token honorLabels:true interval:30s port:sriov-network-metrics scheme:https tlsConfig:map[caFile:/etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt insecureSkipVerify:false serverName:sriov-network-metrics-expor │ │ ter-service.openshift-sriov-network-operator.svc]]] namespaceSelector:map[matchNames:[openshift-sriov-network-operator]] selector:map[matchLabels:map[name:sriov-network-metrics-exporter-service]]]]} with err: could not delete object (monitoring.coreos.com/v1, Kind=ServiceMonitor) openshift-sriov-network-operato │ │ r/sriov-network-metrics-exporter: servicemonitors.monitoring.coreos.com \"sriov-network-metrics-exporter\" is forbidden: User \"system:serviceaccount:openshift-sriov-network-operator:sriov-network-operator\" cannot delete resource \"servicemonitors\" in API group \"monitoring.coreos.com\" in the namespace \"ope │ │ nshift-sriov-network-operator\""} ``` Signed-off-by: Andrea Panattoni --- deploy/role.yaml | 2 ++ .../sriov-network-operator-chart/templates/role.yaml | 2 ++ test/conformance/tests/test_sriov_operator.go | 8 ++++++++ 3 files changed, 12 insertions(+) diff --git a/deploy/role.yaml b/deploy/role.yaml index d03c47e21..0a6c27a21 100644 --- a/deploy/role.yaml +++ b/deploy/role.yaml @@ -33,6 +33,8 @@ rules: verbs: - get - create + - update + - delete - apiGroups: - apps resourceNames: diff --git a/deployment/sriov-network-operator-chart/templates/role.yaml b/deployment/sriov-network-operator-chart/templates/role.yaml index 28c5ff175..6551b5775 100644 --- a/deployment/sriov-network-operator-chart/templates/role.yaml +++ b/deployment/sriov-network-operator-chart/templates/role.yaml @@ -36,6 +36,8 @@ rules: verbs: - get - create + - update + - delete - apiGroups: - apps resourceNames: diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index c1db065b2..729bf683b 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -305,6 +305,14 @@ var _ = Describe("[sriov] operator", func() { g.Expect(err).ToNot(HaveOccurred()) }).Should(Succeed()) }) + + It("should remove ServiceMonitor when the feature is turned off", func() { + setFeatureFlag("metricsExporter", false) + Eventually(func(g Gomega) { + _, err := clients.ServiceMonitors(operatorNamespace).Get(context.Background(), "sriov-network-metrics-exporter", metav1.GetOptions{}) + g.Expect(k8serrors.IsNotFound(err)).To(BeTrue()) + }).Should(Succeed()) + }) }) }) From 6aedb8c57270e641babae3faf4746dda876a1bbf Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Wed, 18 Sep 2024 19:30:06 +0300 Subject: [PATCH 04/59] Fix merge annotation function if the current obj as annotation and the updated doesn't we still want to add the ones from the current object Signed-off-by: Sebastian Sch --- pkg/apply/merge.go | 4 ++-- pkg/apply/merge_test.go | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/pkg/apply/merge.go b/pkg/apply/merge.go index d2ed6d4fb..9ee859f83 100644 --- a/pkg/apply/merge.go +++ b/pkg/apply/merge.go @@ -220,7 +220,7 @@ func mergeAnnotations(current, updated *uns.Unstructured) { for k, v := range updatedAnnotations { curAnnotations[k] = v } - if len(curAnnotations) > 1 { + if len(curAnnotations) > 0 { updated.SetAnnotations(curAnnotations) } } @@ -238,7 +238,7 @@ func mergeLabels(current, updated *uns.Unstructured) { for k, v := range updatedLabels { curLabels[k] = v } - if len(curLabels) > 1 { + if len(curLabels) > 0 { updated.SetLabels(curLabels) } } diff --git a/pkg/apply/merge_test.go b/pkg/apply/merge_test.go index f6ad89289..ecf2fd98d 100644 --- a/pkg/apply/merge_test.go +++ b/pkg/apply/merge_test.go @@ -107,6 +107,38 @@ metadata: })) } +func TestMergeOne(t *testing.T) { + g := NewGomegaWithT(t) + + cur := UnstructuredFromYaml(t, ` +apiVersion: apps/v1 +kind: Deployment +metadata: + name: d1 + labels: + label-c: cur + annotations: + annotation-c: cur`) + + upd := UnstructuredFromYaml(t, ` +apiVersion: apps/v1 +kind: Deployment +metadata: + name: d1`) + + // this mutates updated + err := MergeObjectForUpdate(cur, upd) + g.Expect(err).NotTo(HaveOccurred()) + + g.Expect(upd.GetLabels()).To(Equal(map[string]string{ + "label-c": "cur", + })) + + g.Expect(upd.GetAnnotations()).To(Equal(map[string]string{ + "annotation-c": "cur", + })) +} + func TestMergeNilCur(t *testing.T) { g := NewGomegaWithT(t) From 644fcf2a4cb2194d1e3e8bc20be2f80690fd0693 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Thu, 19 Sep 2024 08:39:09 +0200 Subject: [PATCH 05/59] Delete webhooks when SriovOperatorConfig is deleted When a user deletes the default SriovOperatorConfig resource and tries to recreate it afterwards, the operator webhooks returns the error: ``` Error from server (InternalError): error when creating "/tmp/opconfig.yml": Internal error occurred: failed calling webhook "operator-webhook.sriovnetwork.openshift.io": failed to call webhook: Post "https://operator-webhook-service.openshift-sriov-network-operator.svc:443/validating-custom-resource?timeout=10s": service "operator-webhook-service" not found ``` as the webhook configuration is still present, while the Service and the DaemonSet has been deleted. Delete all the webhook configurations when the user deletes the default SriovOperatorConfig Signed-off-by: Andrea Panattoni --- controllers/sriovoperatorconfig_controller.go | 32 +++++++++- .../sriovoperatorconfig_controller_test.go | 61 ++++++++++++++++--- 2 files changed, 83 insertions(+), 10 deletions(-) diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index 1121b623f..377ebd2de 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -18,6 +18,7 @@ package controllers import ( "context" + "errors" "fmt" "os" "sort" @@ -28,6 +29,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" uns "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" kscheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" @@ -81,7 +83,9 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. if err != nil { if apierrors.IsNotFound(err) { logger.Info("default SriovOperatorConfig object not found. waiting for creation.") - return reconcile.Result{}, nil + + err := r.deleteAllWebhooks(ctx) + return reconcile.Result{}, err } // Error reading the object - requeue the request. logger.Error(err, "Failed to get default SriovOperatorConfig object") @@ -457,3 +461,29 @@ func (r SriovOperatorConfigReconciler) setLabelInsideObject(ctx context.Context, return nil } + +func (r SriovOperatorConfigReconciler) deleteAllWebhooks(ctx context.Context) error { + var err error + obj := &uns.Unstructured{} + obj.SetGroupVersionKind(schema.GroupVersionKind{Group: "admissionregistration.k8s.io", Kind: "MutatingWebhookConfiguration", Version: "v1"}) + obj.SetName(consts.OperatorWebHookName) + err = errors.Join( + err, r.deleteWebhookObject(ctx, obj), + ) + + obj = &uns.Unstructured{} + obj.SetGroupVersionKind(schema.GroupVersionKind{Group: "admissionregistration.k8s.io", Kind: "ValidatingWebhookConfiguration", Version: "v1"}) + obj.SetName(consts.OperatorWebHookName) + err = errors.Join( + err, r.deleteWebhookObject(ctx, obj), + ) + + obj = &uns.Unstructured{} + obj.SetGroupVersionKind(schema.GroupVersionKind{Group: "admissionregistration.k8s.io", Kind: "MutatingWebhookConfiguration", Version: "v1"}) + obj.SetName(consts.InjectorWebHookName) + err = errors.Join( + err, r.deleteWebhookObject(ctx, obj), + ) + + return err +} diff --git a/controllers/sriovoperatorconfig_controller_test.go b/controllers/sriovoperatorconfig_controller_test.go index 6a98925eb..7f6db3522 100644 --- a/controllers/sriovoperatorconfig_controller_test.go +++ b/controllers/sriovoperatorconfig_controller_test.go @@ -6,6 +6,7 @@ import ( "os" "strings" "sync" + "time" admv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" @@ -38,15 +39,7 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { BeforeAll(func() { By("Create SriovOperatorConfig controller k8s objs") - config := &sriovnetworkv1.SriovOperatorConfig{} - config.SetNamespace(testNamespace) - config.SetName(consts.DefaultConfigName) - config.Spec = sriovnetworkv1.SriovOperatorConfigSpec{ - EnableInjector: true, - EnableOperatorWebhook: true, - ConfigDaemonNodeSelector: map[string]string{}, - LogLevel: 2, - } + config := makeDefaultSriovOpConfig() Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) DeferCleanup(func() { err := k8sClient.Delete(context.Background(), config) @@ -224,6 +217,29 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { Expect(err).NotTo(HaveOccurred()) }) + // Namespaced resources are deleted via the `.ObjectMeta.OwnerReference` field. That logic can't be tested here because testenv doesn't have built-in controllers + // (See https://book.kubebuilder.io/reference/envtest#testing-considerations). Since Service and DaemonSet are deleted when default/SriovOperatorConfig is no longer + // present, it's important that webhook configurations are deleted as well. + It("should delete the webhooks when SriovOperatorConfig/default is deleted", func() { + DeferCleanup(k8sClient.Create, context.Background(), makeDefaultSriovOpConfig()) + + err := k8sClient.Delete(context.Background(), &sriovnetworkv1.SriovOperatorConfig{ + ObjectMeta: metav1.ObjectMeta{Namespace: testNamespace, Name: "default"}, + }) + Expect(err).NotTo(HaveOccurred()) + + assertResourceDoesNotExist( + schema.GroupVersionKind{Group: "admissionregistration.k8s.io", Kind: "MutatingWebhookConfiguration", Version: "v1"}, + client.ObjectKey{Name: "sriov-operator-webhook-config"}) + assertResourceDoesNotExist( + schema.GroupVersionKind{Group: "admissionregistration.k8s.io", Kind: "ValidatingWebhookConfiguration", Version: "v1"}, + client.ObjectKey{Name: "sriov-operator-webhook-config"}) + + assertResourceDoesNotExist( + schema.GroupVersionKind{Group: "admissionregistration.k8s.io", Kind: "MutatingWebhookConfiguration", Version: "v1"}, + client.ObjectKey{Name: "network-resources-injector-config"}) + }) + It("should be able to update the node selector of sriov-network-config-daemon", func() { By("specify the configDaemonNodeSelector") nodeSelector := map[string]string{"node-role.kubernetes.io/worker": ""} @@ -517,6 +533,19 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { }) }) +func makeDefaultSriovOpConfig() *sriovnetworkv1.SriovOperatorConfig { + config := &sriovnetworkv1.SriovOperatorConfig{} + config.SetNamespace(testNamespace) + config.SetName(consts.DefaultConfigName) + config.Spec = sriovnetworkv1.SriovOperatorConfigSpec{ + EnableInjector: true, + EnableOperatorWebhook: true, + ConfigDaemonNodeSelector: map[string]string{}, + LogLevel: 2, + } + return config +} + func assertResourceExists(gvk schema.GroupVersionKind, key client.ObjectKey) { u := &unstructured.Unstructured{} u.SetGroupVersionKind(gvk) @@ -524,6 +553,20 @@ func assertResourceExists(gvk schema.GroupVersionKind, key client.ObjectKey) { Expect(err).NotTo(HaveOccurred()) } +func assertResourceDoesNotExist(gvk schema.GroupVersionKind, key client.ObjectKey) { + Eventually(func(g Gomega) { + u := &unstructured.Unstructured{} + u.SetGroupVersionKind(gvk) + err := k8sClient.Get(context.Background(), key, u) + g.Expect(err).To(HaveOccurred()) + g.Expect(errors.IsNotFound(err)).To(BeTrue()) + }). + WithOffset(1). + WithPolling(100*time.Millisecond). + WithTimeout(2*time.Second). + Should(Succeed(), "Resource type[%s] name[%s] still present in the cluster", gvk.String(), key.String()) +} + func updateConfigDaemonNodeSelector(newValue map[string]string) func() { config := &sriovnetworkv1.SriovOperatorConfig{} err := k8sClient.Get(context.Background(), types.NamespacedName{Namespace: testNamespace, Name: "default"}, config) From f17bb2a9cb77897e833e96ff35fec3e626c928b9 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Thu, 19 Sep 2024 17:16:02 +0200 Subject: [PATCH 06/59] metrics: Fix typo in `METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES` Signed-off-by: Andrea Panattoni --- hack/run-e2e-conformance-virtual-ocp.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hack/run-e2e-conformance-virtual-ocp.sh b/hack/run-e2e-conformance-virtual-ocp.sh index 0092fcdad..cb65aaf50 100755 --- a/hack/run-e2e-conformance-virtual-ocp.sh +++ b/hack/run-e2e-conformance-virtual-ocp.sh @@ -191,9 +191,9 @@ export DEV_MODE=TRUE export CLUSTER_HAS_EMULATED_PF=TRUE export OPERATOR_LEADER_ELECTION_ENABLE=true export METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED=true -export METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULE=true +export METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES=true export METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT:-"prometheus-k8s"} -export METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE:-"openshfit-monitoring"} +export METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE:-"openshift-monitoring"} export SRIOV_NETWORK_OPERATOR_IMAGE="$registry/$NAMESPACE/sriov-network-operator:latest" export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE="$registry/$NAMESPACE/sriov-network-config-daemon:latest" From f94fa644ddee573d246f656e49e8f232273f3bc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Fri, 20 Sep 2024 19:39:43 +0200 Subject: [PATCH 07/59] Fix syntax for RDMA_CNI_IMAGE var substitution The bash syntax was incorrect and yielded: hack/env.sh: line 35: ${$RDMA_CNI_IMAGE:-}: bad substitution --- hack/env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/env.sh b/hack/env.sh index 28e0007e7..c49c399d8 100755 --- a/hack/env.sh +++ b/hack/env.sh @@ -16,7 +16,7 @@ else # ensure that OVS_CNI_IMAGE is set, empty string is a valid value OVS_CNI_IMAGE=${OVS_CNI_IMAGE:-} # ensure that RDMA_CNI_IMAGE is set, empty string is a valid value - RDMA_CNI_IMAGE=${$RDMA_CNI_IMAGE:-} + RDMA_CNI_IMAGE=${RDMA_CNI_IMAGE:-} METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE=${METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE:-} [ -z $SRIOV_CNI_IMAGE ] && echo "SRIOV_CNI_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 [ -z $SRIOV_INFINIBAND_CNI_IMAGE ] && echo "SRIOV_INFINIBAND_CNI_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 From 3ff1b85e8465a4382e3eb62aa247ebf3c81cd9ee Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Thu, 12 Sep 2024 12:20:45 +0200 Subject: [PATCH 08/59] metrics: Add `node` label to `sriov_*` metrics It might happen that two SR-IOV pods, deployed on different node, are using devices with the same PCI address. In such cases, the query suggested [1] by the sriov-network-metrics-exporter produces the error: ``` Error loading values found duplicate series for the match group {pciAddr="0000:3b:02.4"} on the right hand-side of the operation: [ { __name__="sriov_kubepoddevice", container="test", dev_type="openshift.io/intelnetdevice", endpoint="sriov-network-metrics", instance="10.1.98.60:9110", job="sriov-network-metrics-exporter-service", namespace="cnf-4916", pciAddr="0000:3b:02.4", pod="pod-cnfdr22.telco5g.eng.rdu2.redhat.com", prometheus="openshift-monitoring/k8s", service="sriov-network-metrics-exporter-service" }, { __name__="sriov_kubepoddevice", container="test", dev_type="openshift.io/intelnetdevice", endpoint="sriov-network-metrics", instance="10.1.98.230:9110", job="sriov-network-metrics-exporter-service", namespace="cnf-4916", pciAddr="0000:3b:02.4", pod="pod-dhcp-98-230.telco5g.eng.rdu2.redhat.com", prometheus="openshift-monitoring/k8s", service="sriov-network-metrics-exporter-service" } ];many-to-many matching not allowed: matching labels must be unique on one side ``` Configure the ServiceMonitor resource to add a `node` label to all metrics. The right query to get metrics, as updated in the PrometheusRule, will be: ``` sriov_vf_tx_packets * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice ``` Also remove `pod`, `namespace` and `container` label from the `sriov_vf_*` metrics, as they were wrongly set to `sriov-network-metrics-exporter-zj2n9`, `openshift-sriov-network-operator`, `kube-rbac-proxy` [1] https://github.com/k8snetworkplumbingwg/sriov-network-metrics-exporter/blob/0f6a784f377ede87b95f31e569116ceb9775b5b9/README.md?plain=1#L38 Signed-off-by: Andrea Panattoni --- .../metrics-prometheus-rule.yaml | 16 ++-- .../metrics-exporter/metrics-prometheus.yaml | 11 +++ .../tests/test_exporter_metrics.go | 95 ++++++++++++++----- test/util/k8sreporter/reporter.go | 20 ++++ 4 files changed, 111 insertions(+), 31 deletions(-) diff --git a/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml b/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml index efd760113..a385fa677 100644 --- a/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml +++ b/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml @@ -11,28 +11,28 @@ spec: interval: 30s rules: - expr: | - sriov_vf_tx_packets * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_tx_packets * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_tx_packets - expr: | - sriov_vf_rx_packets * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_packets * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_packets - expr: | - sriov_vf_tx_bytes * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_tx_bytes * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_tx_bytes - expr: | - sriov_vf_rx_bytes * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_bytes * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_bytes - expr: | - sriov_vf_tx_dropped * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_tx_dropped * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_tx_dropped - expr: | - sriov_vf_rx_dropped * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_dropped * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_dropped - expr: | - sriov_vf_rx_broadcast * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_broadcast * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_broadcast - expr: | - sriov_vf_rx_multicast * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_multicast * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_multicast {{ end }} diff --git a/bindata/manifests/metrics-exporter/metrics-prometheus.yaml b/bindata/manifests/metrics-exporter/metrics-prometheus.yaml index 45ae7adbf..d1772a554 100644 --- a/bindata/manifests/metrics-exporter/metrics-prometheus.yaml +++ b/bindata/manifests/metrics-exporter/metrics-prometheus.yaml @@ -12,6 +12,17 @@ spec: bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token" scheme: "https" honorLabels: true + relabelings: + - action: replace + sourceLabels: + - __meta_kubernetes_endpoint_node_name + targetLabel: node + - action: labeldrop + regex: pod + - action: labeldrop + regex: container + - action: labeldrop + regex: namespace tlsConfig: serverName: sriov-network-metrics-exporter-service.{{.Namespace}}.svc caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt diff --git a/test/conformance/tests/test_exporter_metrics.go b/test/conformance/tests/test_exporter_metrics.go index 804432f04..96bf792b5 100644 --- a/test/conformance/tests/test_exporter_metrics.go +++ b/test/conformance/tests/test_exporter_metrics.go @@ -19,21 +19,18 @@ import ( "github.com/prometheus/common/model" corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) -var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { +var _ = Describe("[sriov] Metrics Exporter", Ordered, ContinueOnFailure, func() { var node string var nic *sriovv1.InterfaceExt BeforeAll(func() { - if cluster.VirtualCluster() { - Skip("IGB driver does not support VF statistics") - } - err := namespaces.Create(namespaces.Test, clients) Expect(err).ToNot(HaveOccurred()) @@ -73,6 +70,9 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { }) It("collects metrics regarding receiving traffic via VF", func() { + if cluster.VirtualCluster() { + Skip("IGB driver does not support VF statistics") + } pod := createTestPod(node, []string{"test-me-network"}) DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) @@ -98,27 +98,76 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { Expect(finalRxPackets).Should(BeNumerically(">", initialRxPackets)) }) - It("PrometheusRule should provide namespaced metrics", func() { - pod := createTestPod(node, []string{"test-me-network"}) - DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) + Context("When Prometheus operator is available", func() { + BeforeEach(func() { + _, err := clients.ServiceMonitors(operatorNamespace).List(context.Background(), metav1.ListOptions{}) + if k8serrors.IsNotFound(err) { + Skip("Prometheus operator not available in the cluster") + } + }) - namespacedMetricNames := []string{ - "network:sriov_vf_rx_bytes", - "network:sriov_vf_tx_bytes", - "network:sriov_vf_rx_packets", - "network:sriov_vf_tx_packets", - "network:sriov_vf_rx_dropped", - "network:sriov_vf_tx_dropped", - "network:sriov_vf_rx_broadcast", - "network:sriov_vf_rx_multicast", - } + It("PrometheusRule should provide namespaced metrics", func() { + pod := createTestPod(node, []string{"test-me-network"}) + DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) + + namespacedMetricNames := []string{ + "network:sriov_vf_rx_bytes", + "network:sriov_vf_tx_bytes", + "network:sriov_vf_rx_packets", + "network:sriov_vf_tx_packets", + "network:sriov_vf_rx_dropped", + "network:sriov_vf_tx_dropped", + "network:sriov_vf_rx_broadcast", + "network:sriov_vf_rx_multicast", + } - Eventually(func(g Gomega) { - for _, metricName := range namespacedMetricNames { - values := runPromQLQuery(fmt.Sprintf(`%s{namespace="%s",pod="%s"}`, metricName, pod.Namespace, pod.Name)) - g.Expect(values).ToNot(BeEmpty(), "no value for metric %s", metricName) + Eventually(func(g Gomega) { + for _, metricName := range namespacedMetricNames { + values := runPromQLQuery(fmt.Sprintf(`%s{namespace="%s",pod="%s"}`, metricName, pod.Namespace, pod.Name)) + g.Expect(values).ToNot(BeEmpty(), "no value for metric %s", metricName) + } + }, "90s", "1s").Should(Succeed()) + }) + + It("Metrics should have the correct labels", func() { + pod := createTestPod(node, []string{"test-me-network"}) + DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) + + metricsName := []string{ + "sriov_vf_rx_bytes", + "sriov_vf_tx_bytes", + "sriov_vf_rx_packets", + "sriov_vf_tx_packets", + "sriov_vf_rx_dropped", + "sriov_vf_tx_dropped", + "sriov_vf_rx_broadcast", + "sriov_vf_rx_multicast", } - }, "40s", "1s").Should(Succeed()) + + Eventually(func(g Gomega) { + for _, metricName := range metricsName { + samples := runPromQLQuery(metricName) + g.Expect(samples).ToNot(BeEmpty(), "no value for metric %s", metricName) + g.Expect(samples[0].Metric).To(And( + HaveKey(model.LabelName("pciAddr")), + HaveKey(model.LabelName("node")), + HaveKey(model.LabelName("pf")), + HaveKey(model.LabelName("vf")), + )) + } + }, "90s", "1s").Should(Succeed()) + + // sriov_kubepoddevice has a different sets of label than statistics metrics + samples := runPromQLQuery(fmt.Sprintf(`sriov_kubepoddevice{namespace="%s",pod="%s"}`, pod.Namespace, pod.Name)) + Expect(samples).ToNot(BeEmpty(), "no value for metric sriov_kubepoddevice") + Expect(samples[0].Metric).To(And( + HaveKey(model.LabelName("pciAddr")), + HaveKeyWithValue(model.LabelName("node"), model.LabelValue(pod.Spec.NodeName)), + HaveKeyWithValue(model.LabelName("dev_type"), model.LabelValue("openshift.io/metricsResource")), + HaveKeyWithValue(model.LabelName("namespace"), model.LabelValue(pod.Namespace)), + HaveKeyWithValue(model.LabelName("pod"), model.LabelValue(pod.Name)), + )) + }) }) }) diff --git a/test/util/k8sreporter/reporter.go b/test/util/k8sreporter/reporter.go index 5a3405a91..13baac0aa 100644 --- a/test/util/k8sreporter/reporter.go +++ b/test/util/k8sreporter/reporter.go @@ -10,6 +10,9 @@ import ( sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + rbacv1 "k8s.io/api/rbac/v1" ) func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) { @@ -18,6 +21,17 @@ func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) { if err != nil { return err } + + err = monitoringv1.AddToScheme(s) + if err != nil { + return err + } + + err = rbacv1.AddToScheme(s) + if err != nil { + return err + } + return nil } @@ -38,6 +52,8 @@ func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) { return true case multusNamespace != "" && ns == multusNamespace: return true + case ns == "openshift-monitoring": + return true } return false } @@ -47,6 +63,10 @@ func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) { {Cr: &sriovv1.SriovNetworkNodePolicyList{}}, {Cr: &sriovv1.SriovNetworkList{}}, {Cr: &sriovv1.SriovOperatorConfigList{}}, + {Cr: &monitoringv1.ServiceMonitorList{}, Namespace: &operatorNamespace}, + {Cr: &monitoringv1.PrometheusRuleList{}, Namespace: &operatorNamespace}, + {Cr: &rbacv1.RoleList{}, Namespace: &operatorNamespace}, + {Cr: &rbacv1.RoleBindingList{}, Namespace: &operatorNamespace}, } err := os.Mkdir(reportPath, 0755) From 084810a1b5afa144f7f208e12bde2dbc58c72086 Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Wed, 11 Sep 2024 15:30:26 -0400 Subject: [PATCH 09/59] openstack: dynamically mount the config-drive When we want to use config-drive in immutable systems, very often the config-drive is only used at boot and then umounted (e.g. ignition does this). Later when we want to fetch Metadata from the config drive, we actually have to mount it. In this PR, I'm adding similar code than coreos/ignition where we dynamically mount the config-drive is the device was found with the right label (config-2 or CONFIG-2 as documented in OpenStack). If the device is found, we mount it, fetch the data and umount it. --- pkg/platforms/openstack/openstack.go | 115 +++++++++++++++++++++------ 1 file changed, 92 insertions(+), 23 deletions(-) diff --git a/pkg/platforms/openstack/openstack.go b/pkg/platforms/openstack/openstack.go index 94a9ae433..8968c96be 100644 --- a/pkg/platforms/openstack/openstack.go +++ b/pkg/platforms/openstack/openstack.go @@ -5,6 +5,8 @@ import ( "fmt" "io" "os" + "os/exec" + "path/filepath" "strconv" "strings" @@ -21,15 +23,18 @@ import ( ) const ( - ospHostMetaDataDir = "/host/var/config/openstack/2018-08-27" - ospMetaDataDir = "/var/config/openstack/2018-08-27" - ospMetaDataBaseURL = "http://169.254.169.254/openstack/2018-08-27" - ospNetworkDataJSON = "network_data.json" - ospMetaDataJSON = "meta_data.json" - ospHostNetworkDataFile = ospHostMetaDataDir + "/" + ospNetworkDataJSON - ospHostMetaDataFile = ospHostMetaDataDir + "/" + ospMetaDataJSON - ospNetworkDataURL = ospMetaDataBaseURL + "/" + ospNetworkDataJSON - ospMetaDataURL = ospMetaDataBaseURL + "/" + ospMetaDataJSON + varConfigPath = "/var/config" + ospMetaDataBaseDir = "/openstack/2018-08-27" + ospMetaDataDir = varConfigPath + ospMetaDataBaseDir + ospMetaDataBaseURL = "http://169.254.169.254" + ospMetaDataBaseDir + ospNetworkDataJSON = "network_data.json" + ospMetaDataJSON = "meta_data.json" + ospNetworkDataURL = ospMetaDataBaseURL + "/" + ospNetworkDataJSON + ospMetaDataURL = ospMetaDataBaseURL + "/" + ospMetaDataJSON + // Config drive is defined as an iso9660 or vfat (deprecated) drive + // with the "config-2" label. + //https://docs.openstack.org/nova/latest/user/config-drive.html + configDriveLabel = "config-2" ) var ( @@ -109,9 +114,10 @@ func New(hostManager host.HostManagerInterface) OpenstackInterface { } // GetOpenstackData gets the metadata and network_data -func getOpenstackData(useHostPath bool) (metaData *OSPMetaData, networkData *OSPNetworkData, err error) { - metaData, networkData, err = getOpenstackDataFromConfigDrive(useHostPath) +func getOpenstackData(mountConfigDrive bool) (metaData *OSPMetaData, networkData *OSPNetworkData, err error) { + metaData, networkData, err = getOpenstackDataFromConfigDrive(mountConfigDrive) if err != nil { + log.Log.Error(err, "GetOpenStackData(): non-fatal error getting OpenStack data from config drive") metaData, networkData, err = getOpenstackDataFromMetadataService() if err != nil { return metaData, networkData, fmt.Errorf("GetOpenStackData(): error getting OpenStack data: %w", err) @@ -153,46 +159,109 @@ func getOpenstackData(useHostPath bool) (metaData *OSPMetaData, networkData *OSP return metaData, networkData, err } +// getConfigDriveDevice returns the config drive device which was found +func getConfigDriveDevice() (string, error) { + dev := "/dev/disk/by-label/" + configDriveLabel + if _, err := os.Stat(dev); os.IsNotExist(err) { + out, err := exec.Command( + "blkid", "-l", + "-t", "LABEL="+configDriveLabel, + "-o", "device", + ).CombinedOutput() + if err != nil { + return "", fmt.Errorf("unable to run blkid: %v", err) + } + dev = strings.TrimSpace(string(out)) + } + log.Log.Info("found config drive device", "device", dev) + return dev, nil +} + +// mountConfigDriveDevice mounts the config drive and return the path +func mountConfigDriveDevice(device string) (string, error) { + if device == "" { + return "", fmt.Errorf("device is empty") + } + tmpDir, err := os.MkdirTemp("", "sriov-configdrive") + if err != nil { + return "", fmt.Errorf("error creating temp directory: %w", err) + } + cmd := exec.Command("mount", "-o", "ro", "-t", "auto", device, tmpDir) + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("error mounting config drive: %w", err) + } + log.Log.V(2).Info("mounted config drive device", "device", device, "path", tmpDir) + return tmpDir, nil +} + +// ummountConfigDriveDevice ummounts the config drive device +func ummountConfigDriveDevice(path string) error { + if path == "" { + return fmt.Errorf("path is empty") + } + cmd := exec.Command("umount", path) + if err := cmd.Run(); err != nil { + return fmt.Errorf("error umounting config drive: %w", err) + } + log.Log.V(2).Info("umounted config drive", "path", path) + return nil +} + // getOpenstackDataFromConfigDrive reads the meta_data and network_data files -func getOpenstackDataFromConfigDrive(useHostPath bool) (metaData *OSPMetaData, networkData *OSPNetworkData, err error) { +func getOpenstackDataFromConfigDrive(mountConfigDrive bool) (metaData *OSPMetaData, networkData *OSPNetworkData, err error) { metaData = &OSPMetaData{} networkData = &OSPNetworkData{} + var configDrivePath string log.Log.Info("reading OpenStack meta_data from config-drive") var metadataf *os.File ospMetaDataFilePath := ospMetaDataFile - if useHostPath { - ospMetaDataFilePath = ospHostMetaDataFile + if mountConfigDrive { + configDriveDevice, err := getConfigDriveDevice() + if err != nil { + return metaData, networkData, fmt.Errorf("error finding config drive device: %w", err) + } + configDrivePath, err = mountConfigDriveDevice(configDriveDevice) + if err != nil { + return metaData, networkData, fmt.Errorf("error mounting config drive device: %w", err) + } + defer func() { + if e := ummountConfigDriveDevice(configDrivePath); err == nil && e != nil { + err = fmt.Errorf("error umounting config drive device: %w", e) + } + if e := os.Remove(configDrivePath); err == nil && e != nil { + err = fmt.Errorf("error removing temp directory %s: %w", configDrivePath, e) + } + }() + ospMetaDataFilePath = filepath.Join(configDrivePath, ospMetaDataBaseDir, ospMetaDataJSON) + ospNetworkDataFile = filepath.Join(configDrivePath, ospMetaDataBaseDir, ospNetworkDataJSON) } metadataf, err = os.Open(ospMetaDataFilePath) if err != nil { - return metaData, networkData, fmt.Errorf("error opening file %s: %w", ospHostMetaDataFile, err) + return metaData, networkData, fmt.Errorf("error opening file %s: %w", ospMetaDataFilePath, err) } defer func() { if e := metadataf.Close(); err == nil && e != nil { - err = fmt.Errorf("error closing file %s: %w", ospHostMetaDataFile, e) + err = fmt.Errorf("error closing file %s: %w", ospMetaDataFilePath, e) } }() if err = json.NewDecoder(metadataf).Decode(&metaData); err != nil { - return metaData, networkData, fmt.Errorf("error unmarshalling metadata from file %s: %w", ospHostMetaDataFile, err) + return metaData, networkData, fmt.Errorf("error unmarshalling metadata from file %s: %w", ospMetaDataFilePath, err) } log.Log.Info("reading OpenStack network_data from config-drive") var networkDataf *os.File ospNetworkDataFilePath := ospNetworkDataFile - if useHostPath { - ospNetworkDataFilePath = ospHostNetworkDataFile - } networkDataf, err = os.Open(ospNetworkDataFilePath) if err != nil { - return metaData, networkData, fmt.Errorf("error opening file %s: %w", ospHostNetworkDataFile, err) + return metaData, networkData, fmt.Errorf("error opening file %s: %w", ospNetworkDataFilePath, err) } defer func() { if e := networkDataf.Close(); err == nil && e != nil { - err = fmt.Errorf("error closing file %s: %w", ospHostNetworkDataFile, e) + err = fmt.Errorf("error closing file %s: %w", ospNetworkDataFilePath, e) } }() if err = json.NewDecoder(networkDataf).Decode(&networkData); err != nil { - return metaData, networkData, fmt.Errorf("error unmarshalling metadata from file %s: %w", ospHostNetworkDataFile, err) + return metaData, networkData, fmt.Errorf("error unmarshalling metadata from file %s: %w", ospNetworkDataFilePath, err) } return metaData, networkData, err } From ba21df035b79c907dd1cbc4898e83a7557109553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Mon, 23 Sep 2024 14:19:43 +0200 Subject: [PATCH 10/59] Enclose array expansions in double quote Fixes the following shellcheck error: SC2068 (error): Double quote array expansions to avoid re-splitting elements. https://www.shellcheck.net/wiki/SC2068 --- hack/deploy-setup.sh | 2 +- hack/vf-netns-switcher.sh | 8 ++++---- test/scripts/enable-kargs_test.sh | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hack/deploy-setup.sh b/hack/deploy-setup.sh index 2c2fc7d8d..807479c38 100755 --- a/hack/deploy-setup.sh +++ b/hack/deploy-setup.sh @@ -22,7 +22,7 @@ load_manifest() { fi files="service_account.yaml role.yaml role_binding.yaml clusterrole.yaml clusterrolebinding.yaml configmap.yaml sriovoperatorconfig.yaml operator.yaml" for m in ${files}; do - if [ "$(echo ${EXCLUSIONS[@]} | grep -o ${m} | wc -w | xargs)" == "0" ] ; then + if [ "$(echo "${EXCLUSIONS[@]}" | grep -o ${m} | wc -w | xargs)" == "0" ] ; then envsubst< ${m} | ${OPERATOR_EXEC} apply ${namespace:-} --validate=false -f - fi done diff --git a/hack/vf-netns-switcher.sh b/hack/vf-netns-switcher.sh index de4e8041a..e842a8dc8 100755 --- a/hack/vf-netns-switcher.sh +++ b/hack/vf-netns-switcher.sh @@ -95,7 +95,7 @@ It must be of the form :,. This flag can be repeated to specify done return_interfaces_to_default_namespace(){ - for netns in ${netnses[@]};do + for netns in "${netnses[@]}";do for pf in ${pfs[$netns]};do return_interface_to_default_namespace "${netns}" "${pf}" done @@ -360,7 +360,7 @@ main(){ trap return_interfaces_to_default_namespace INT EXIT TERM while true;do - for netns in ${netnses[@]};do + for netns in "${netnses[@]}";do switch_pfs "$netns" "${pfs[$netns]}" sleep 2 switch_netns_vfs "$netns" @@ -388,7 +388,7 @@ if [[ "$status" != "0" ]];then exit $status fi -for netns in ${netnses[@]};do +for netns in "${netnses[@]}";do netns_create "$netns" let status=$status+$? if [[ "$status" != "0" ]];then @@ -397,7 +397,7 @@ for netns in ${netnses[@]};do fi done -for netns in ${netnses[@]};do +for netns in "${netnses[@]}";do get_pcis_from_pfs "$netns" "${pfs[$netns]}" get_pf_switch_dev_info "$netns" "${pfs[$netns]}" done diff --git a/test/scripts/enable-kargs_test.sh b/test/scripts/enable-kargs_test.sh index 615f3d2b2..40c2764be 100755 --- a/test/scripts/enable-kargs_test.sh +++ b/test/scripts/enable-kargs_test.sh @@ -46,7 +46,7 @@ setUp() { # Mock chroot calls to the temporary test folder export real_chroot=$(which chroot) chroot() { - $real_chroot $FAKE_HOST ${@:2} + $real_chroot $FAKE_HOST "${@:2}" } export -f chroot From 3d553bfd6985fbd7225f615577148ed1e6a42963 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Mon, 23 Sep 2024 14:22:26 +0200 Subject: [PATCH 11/59] Add missing shebang Fixes the following shellcheck error: SC2148 (error): Tips depend on target shell and yours is unknown. Add a shebang or a 'shell' directive. https://www.shellcheck.net/wiki/SC2148 --- hack/env.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hack/env.sh b/hack/env.sh index c49c399d8..64f79212d 100755 --- a/hack/env.sh +++ b/hack/env.sh @@ -1,3 +1,5 @@ +#!/bin/bash + if [ -z $SKIP_VAR_SET ]; then export SRIOV_CNI_IMAGE=${SRIOV_CNI_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-cni} export SRIOV_INFINIBAND_CNI_IMAGE=${SRIOV_INFINIBAND_CNI_IMAGE:-ghcr.io/k8snetworkplumbingwg/ib-sriov-cni} From 63246d6918a155fc9cbe2aed057274a5dcc9503d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Mon, 23 Sep 2024 15:51:05 +0200 Subject: [PATCH 12/59] Explicitly expand array values Fixes the following shellcheck errors: SC2145 (error): Argument mixes string and array. Use * or separate argument. SC2199 (error): Arrays implicitly concatenate in [[ ]]. Use a loop (or explicit * instead of @). https://www.shellcheck.net/wiki/SC2145 https://www.shellcheck.net/wiki/SC2199 Also fixes a typo in SUPPORTED_INTERFACE_SWITCHER_MODES. --- hack/run-e2e-test-kind.sh | 6 +++--- hack/vf-netns-switcher.sh | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hack/run-e2e-test-kind.sh b/hack/run-e2e-test-kind.sh index 5cb7750c7..3cc080d9c 100755 --- a/hack/run-e2e-test-kind.sh +++ b/hack/run-e2e-test-kind.sh @@ -6,7 +6,7 @@ export SRIOV_NETWORK_OPERATOR_IMAGE="${SRIOV_NETWORK_OPERATOR_IMAGE:-sriov-netwo export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE="${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE:-origin-sriov-network-config-daemon:e2e-test}" export KUBECONFIG="${KUBECONFIG:-${HOME}/.kube/config}" INTERFACES_SWITCHER="${INTERFACES_SWITCHER:-"test-suite"}" -SUPPORTED_INTERFACE_SWTICHER_MODES=("test-suite", "system-service") +SUPPORTED_INTERFACE_SWITCHER_MODES=("test-suite", "system-service") RETRY_MAX=10 INTERVAL=10 TIMEOUT=300 @@ -16,9 +16,9 @@ while test $# -gt 0; do case "$1" in --device-netns-switcher) INTERFACES_SWITCHER="$2" - if [[ ! "${SUPPORTED_INTERFACE_SWTICHER_MODES[@]}" =~ "${INTERFACES_SWITCHER}" ]]; then + if [[ ! "${SUPPORTED_INTERFACE_SWITCHER_MODES[*]}" =~ "${INTERFACES_SWITCHER}" ]]; then echo "Error: unsupported interface switching mode: ${INTERFACES_SWITCHER}!" - echo "Supported modes are: ${SUPPORTED_INTERFACE_SWTICHER_MODES[@]}" + echo "Supported modes are: ${SUPPORTED_INTERFACE_SWITCHER_MODES[*]}" exit 1 fi shift diff --git a/hack/vf-netns-switcher.sh b/hack/vf-netns-switcher.sh index e842a8dc8..69881da7b 100755 --- a/hack/vf-netns-switcher.sh +++ b/hack/vf-netns-switcher.sh @@ -348,7 +348,7 @@ variables_check(){ check_empty_var(){ local var_name="$1" - if [[ -z "${!var_name[@]}" ]];then + if [[ -z "${!var_name[*]}" ]];then echo "Error: $var_name is empty..." return 1 fi @@ -403,7 +403,7 @@ for netns in "${netnses[@]}";do done if [[ "${#pcis[@]}" == "0" ]];then - echo "Error: could not get pci addresses of interfaces ${pfs[@]}!!" + echo "Error: could not get pci addresses of interfaces ${pfs[*]}!!" exit 1 fi From 3529811b1d3a0833dacc2e7fc27425749562f769 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Mon, 23 Sep 2024 17:43:01 +0200 Subject: [PATCH 13/59] Iterate over globs. Fixes the following shellcheck error: SC2045 (error): Iterating over ls output is fragile. Use globs. https://www.shellcheck.net/wiki/SC2045 --- hack/vf-netns-switcher.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/hack/vf-netns-switcher.sh b/hack/vf-netns-switcher.sh index 69881da7b..c383b5d1e 100755 --- a/hack/vf-netns-switcher.sh +++ b/hack/vf-netns-switcher.sh @@ -277,19 +277,20 @@ switch_interface_vf_representors(){ return 0 fi - for interface in $(ls /sys/class/net);do - phys_switch_id=$(cat /sys/class/net/$interface/phys_switch_id) + for interface in /sys/class/net/*;do + phys_switch_id=$(cat $interface/phys_switch_id) if [[ "$phys_switch_id" != "${pf_switch_ids[$pf_name]}" ]]; then continue fi - phys_port_name=$(cat /sys/class/net/$interface/phys_port_name) + phys_port_name=$(cat $interface/phys_port_name) phys_port_name_pf_index=${phys_port_name%vf*} phys_port_name_pf_index=${phys_port_name_pf_index#pf} if [[ "$phys_port_name_pf_index" != "${pf_port_names[$pf_name]:1}" ]]; then continue fi - echo "Switching VF representor $interface of PF $pf_name to netns $worker_netns" - switch_vf $interface $worker_netns + interface_name=${interface##*/} + echo "Switching VF representor $interface_name of PF $pf_name to netns $worker_netns" + switch_vf $interface_name $worker_netns done } From 61aacb5bc7d51894346749fb6e838a320c0b7505 Mon Sep 17 00:00:00 2001 From: Yury Kulazhenkov Date: Mon, 23 Sep 2024 19:26:03 +0300 Subject: [PATCH 14/59] Fix: GetDevlinkDeviceParam to handle edge-cases correctly On some kernels GetDevlinkDeviceParam may return empty values for some kernel parameters. The netlink library is able to handle this, but the code in GetDevlinkDeviceParam function may panic if unexpected value received. Add extra checks to avoid panics --- pkg/host/internal/network/network.go | 22 +++++++++++++++------- pkg/host/internal/sriov/sriov.go | 5 +++++ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/pkg/host/internal/network/network.go b/pkg/host/internal/network/network.go index b3014f9e9..2eb40dd69 100644 --- a/pkg/host/internal/network/network.go +++ b/pkg/host/internal/network/network.go @@ -264,12 +264,12 @@ func (n *network) GetDevlinkDeviceParam(pciAddr, paramName string) (string, erro funcLog.Error(err, "GetDevlinkDeviceParam(): fail to get devlink device param") return "", err } - if len(param.Values) == 0 { - err = fmt.Errorf("param %s has no value", paramName) - funcLog.Error(err, "GetDevlinkDeviceParam(): error") - return "", err + if len(param.Values) == 0 || param.Values[0].Data == nil { + funcLog.Info("GetDevlinkDeviceParam(): WARNING: can't read devlink parameter from the device, an empty value received") + return "", nil } var value string + var ok bool switch param.Type { case nl.DEVLINK_PARAM_TYPE_U8, nl.DEVLINK_PARAM_TYPE_U16, nl.DEVLINK_PARAM_TYPE_U32: var valData uint64 @@ -281,14 +281,22 @@ func (n *network) GetDevlinkDeviceParam(pciAddr, paramName string) (string, erro case uint32: valData = uint64(v) default: - return "", fmt.Errorf("unexpected uint type type") + return "", fmt.Errorf("value is not uint") } value = strconv.FormatUint(valData, 10) case nl.DEVLINK_PARAM_TYPE_STRING: - value = param.Values[0].Data.(string) + value, ok = param.Values[0].Data.(string) + if !ok { + return "", fmt.Errorf("value is not a string") + } case nl.DEVLINK_PARAM_TYPE_BOOL: - value = strconv.FormatBool(param.Values[0].Data.(bool)) + var boolValue bool + boolValue, ok = param.Values[0].Data.(bool) + if !ok { + return "", fmt.Errorf("value is not a bool") + } + value = strconv.FormatBool(boolValue) default: return "", fmt.Errorf("unknown value type: %d", param.Type) } diff --git a/pkg/host/internal/sriov/sriov.go b/pkg/host/internal/sriov/sriov.go index 379cf6a70..bd453ae30 100644 --- a/pkg/host/internal/sriov/sriov.go +++ b/pkg/host/internal/sriov/sriov.go @@ -376,6 +376,11 @@ func (s *sriov) configureHWOptionsForSwitchdev(iface *sriovnetworkv1.Interface) log.Log.Error(err, "configureHWOptionsForSwitchdev(): fail to read current flow steering mode for the device", "device", iface.PciAddress) return err } + if currentFlowSteeringMode == "" { + log.Log.V(2).Info("configureHWOptionsForSwitchdev(): can't detect current flow_steering_mode mode for the device, skip", + "device", iface.PciAddress) + return nil + } if currentFlowSteeringMode == desiredFlowSteeringMode { return nil } From a01a1392f384df0653e4baa7cbdcacdc58953a38 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Mon, 7 Oct 2024 14:20:03 +0200 Subject: [PATCH 15/59] metrics: Fix `Metrics should have the correct labels` test `sriov_kubepoddevice` metric might end up in the Prometheus database after a while, as the default scrape interval is 30s. This leads to failures in the end-to-end lane like: ``` [sriov] Metrics Exporter When Prometheus operator is available [It] Metrics should have the correct labels /root/opr-ocp2-1/data/sriov-network-operator/sriov-network-operator/test/conformance/tests/test_exporter_metrics.go:132 [FAILED] no value for metric sriov_kubepoddevice ``` Put the metric assertion in an `Eventually` statement Signed-off-by: Andrea Panattoni --- .../tests/test_exporter_metrics.go | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/test/conformance/tests/test_exporter_metrics.go b/test/conformance/tests/test_exporter_metrics.go index 96bf792b5..f7bc82d3f 100644 --- a/test/conformance/tests/test_exporter_metrics.go +++ b/test/conformance/tests/test_exporter_metrics.go @@ -66,6 +66,8 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, ContinueOnFailure, func() Expect(err).ToNot(HaveOccurred()) waitForNetAttachDef("test-me-network", namespaces.Test) + WaitForSRIOVStable() + DeferCleanup(namespaces.Clean, operatorNamespace, namespaces.Test, clients, discovery.Enabled()) }) @@ -158,15 +160,17 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, ContinueOnFailure, func() }, "90s", "1s").Should(Succeed()) // sriov_kubepoddevice has a different sets of label than statistics metrics - samples := runPromQLQuery(fmt.Sprintf(`sriov_kubepoddevice{namespace="%s",pod="%s"}`, pod.Namespace, pod.Name)) - Expect(samples).ToNot(BeEmpty(), "no value for metric sriov_kubepoddevice") - Expect(samples[0].Metric).To(And( - HaveKey(model.LabelName("pciAddr")), - HaveKeyWithValue(model.LabelName("node"), model.LabelValue(pod.Spec.NodeName)), - HaveKeyWithValue(model.LabelName("dev_type"), model.LabelValue("openshift.io/metricsResource")), - HaveKeyWithValue(model.LabelName("namespace"), model.LabelValue(pod.Namespace)), - HaveKeyWithValue(model.LabelName("pod"), model.LabelValue(pod.Name)), - )) + Eventually(func(g Gomega) { + samples := runPromQLQuery(fmt.Sprintf(`sriov_kubepoddevice{namespace="%s",pod="%s"}`, pod.Namespace, pod.Name)) + g.Expect(samples).ToNot(BeEmpty(), "no value for metric sriov_kubepoddevice") + g.Expect(samples[0].Metric).To(And( + HaveKey(model.LabelName("pciAddr")), + HaveKeyWithValue(model.LabelName("node"), model.LabelValue(pod.Spec.NodeName)), + HaveKeyWithValue(model.LabelName("dev_type"), model.LabelValue("openshift.io/metricsResource")), + HaveKeyWithValue(model.LabelName("namespace"), model.LabelValue(pod.Namespace)), + HaveKeyWithValue(model.LabelName("pod"), model.LabelValue(pod.Name)), + )) + }, "60s", "1s").Should(Succeed()) }) }) }) From 6abdfe6d188344ab6bd6ad0e64f0895ab7aa414f Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Tue, 8 Oct 2024 21:23:42 +0300 Subject: [PATCH 16/59] Fix NRI rbac Signed-off-by: Sebastian Sch --- bindata/manifests/webhook/002-rbac.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindata/manifests/webhook/002-rbac.yaml b/bindata/manifests/webhook/002-rbac.yaml index 77b2d95d7..32affca29 100644 --- a/bindata/manifests/webhook/002-rbac.yaml +++ b/bindata/manifests/webhook/002-rbac.yaml @@ -21,7 +21,7 @@ rules: - apiGroups: - "" resources: - - configmap + - configmaps verbs: - 'watch' - 'list' From fb193e80038325b4c9bc8d8012d809eca9bc46da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Thu, 26 Sep 2024 14:23:34 +0200 Subject: [PATCH 17/59] Use grep for matching args with sh Fixes the following shellcheck error: SC2081 (error): [ .. ] can't match globs. Use a case statement. https://www.shellcheck.net/wiki/SC2081 --- test/scripts/enable-kargs_test.sh | 1 + test/scripts/rpm-ostree_mock | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/test/scripts/enable-kargs_test.sh b/test/scripts/enable-kargs_test.sh index 40c2764be..93a985700 100755 --- a/test/scripts/enable-kargs_test.sh +++ b/test/scripts/enable-kargs_test.sh @@ -40,6 +40,7 @@ setUp() { cp $(which cat) ${FAKE_HOST}/usr/bin/ cp $(which test) ${FAKE_HOST}/usr/bin/ cp $(which sh) ${FAKE_HOST}/usr/bin/ + cp $(which grep) ${FAKE_HOST}/usr/bin/ cp "$SCRIPTPATH/rpm-ostree_mock" ${FAKE_HOST}/usr/bin/rpm-ostree } diff --git a/test/scripts/rpm-ostree_mock b/test/scripts/rpm-ostree_mock index 16e816cc9..db6f66040 100755 --- a/test/scripts/rpm-ostree_mock +++ b/test/scripts/rpm-ostree_mock @@ -5,7 +5,7 @@ # Write invocation with arguments to a file to allow making assertion. echo "$*" >> /rpm-ostree_calls -if [ "$*" != *"--append"* ] +if ! echo "$*" | grep -q "\--append" then # Caller is trying to read kernel arguments. cat /proc/cmdline From 5394d218f8c50ab7acf05558c1777491115fdbaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Mon, 23 Sep 2024 17:58:54 +0200 Subject: [PATCH 18/59] CI: Add a bash linter to pre-submits Warns about shellcheck issues with severity `error`. --- .github/workflows/test.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2fbe84c81..d59e52e47 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -100,6 +100,16 @@ jobs: # Required: the version of golangci-lint is required and must be specified without patch version: we always use the latest patch version. version: v1.55.2 + shellcheck: + name: Shellcheck + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run ShellCheck + uses: ludeeus/action-shellcheck@master + with: + severity: error + test-coverage: name: test-coverage runs-on: ubuntu-latest From f286a04ad7c47216fece213bca47fddcc774f4d2 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Fri, 4 Oct 2024 19:05:53 +0200 Subject: [PATCH 19/59] config-daemon: Restart all instances of device-plugin When the operator changes the device-plugin Spec (e.g. .Spec.NodeSelector), it may happen that there are two device plugin pods for a given node, one that is terminating, the other that is initializing. If the config-daemon executes `restartDevicePluginPod()` at the same time, it may kill the terminating pod, while the initializing one will run with the old dp configuration. This may cause one or more resources to not being advertised, until a manual device plugin restart occurs. Make the config-daemon restart all the device-plugin instances it founds for its own node. Signed-off-by: Andrea Panattoni --- pkg/daemon/daemon.go | 53 +++++++++++++++++----------------- pkg/daemon/daemon_test.go | 61 ++++++++++++++++++++++++++++++--------- 2 files changed, 74 insertions(+), 40 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 5ed31ff85..ff7f326dc 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -120,6 +120,7 @@ func New( eventRecorder: er, featureGate: featureGates, disabledPlugins: disabledPlugins, + mu: &sync.Mutex{}, } } @@ -159,7 +160,6 @@ func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error { var timeout int64 = 5 var metadataKey = "metadata.name" - dn.mu = &sync.Mutex{} informerFactory := sninformer.NewFilteredSharedInformerFactory(dn.sriovClient, time.Second*15, vars.Namespace, @@ -683,7 +683,6 @@ func (dn *Daemon) restartDevicePluginPod() error { defer dn.mu.Unlock() log.Log.V(2).Info("restartDevicePluginPod(): try to restart device plugin pod") - var podToDelete string pods, err := dn.kubeClient.CoreV1().Pods(vars.Namespace).List(context.Background(), metav1.ListOptions{ LabelSelector: "app=sriov-device-plugin", FieldSelector: "spec.nodeName=" + vars.NodeName, @@ -702,35 +701,37 @@ func (dn *Daemon) restartDevicePluginPod() error { log.Log.Info("restartDevicePluginPod(): device plugin pod exited") return nil } - podToDelete = pods.Items[0].Name - log.Log.V(2).Info("restartDevicePluginPod(): Found device plugin pod, deleting it", "pod-name", podToDelete) - err = dn.kubeClient.CoreV1().Pods(vars.Namespace).Delete(context.Background(), podToDelete, metav1.DeleteOptions{}) - if errors.IsNotFound(err) { - log.Log.Info("restartDevicePluginPod(): pod to delete not found") - return nil - } - if err != nil { - log.Log.Error(err, "restartDevicePluginPod(): Failed to delete device plugin pod, retrying") - return err - } - - if err := wait.PollImmediateUntil(3*time.Second, func() (bool, error) { - _, err := dn.kubeClient.CoreV1().Pods(vars.Namespace).Get(context.Background(), podToDelete, metav1.GetOptions{}) + for _, pod := range pods.Items { + podToDelete := pod.Name + log.Log.V(2).Info("restartDevicePluginPod(): Found device plugin pod, deleting it", "pod-name", podToDelete) + err = dn.kubeClient.CoreV1().Pods(vars.Namespace).Delete(context.Background(), podToDelete, metav1.DeleteOptions{}) if errors.IsNotFound(err) { - log.Log.Info("restartDevicePluginPod(): device plugin pod exited") - return true, nil + log.Log.Info("restartDevicePluginPod(): pod to delete not found") + continue } - if err != nil { - log.Log.Error(err, "restartDevicePluginPod(): Failed to check for device plugin exit, retrying") - } else { - log.Log.Info("restartDevicePluginPod(): waiting for device plugin pod to exit", "pod-name", podToDelete) + log.Log.Error(err, "restartDevicePluginPod(): Failed to delete device plugin pod, retrying") + return err + } + + if err := wait.PollImmediateUntil(3*time.Second, func() (bool, error) { + _, err := dn.kubeClient.CoreV1().Pods(vars.Namespace).Get(context.Background(), podToDelete, metav1.GetOptions{}) + if errors.IsNotFound(err) { + log.Log.Info("restartDevicePluginPod(): device plugin pod exited") + return true, nil + } + + if err != nil { + log.Log.Error(err, "restartDevicePluginPod(): Failed to check for device plugin exit, retrying") + } else { + log.Log.Info("restartDevicePluginPod(): waiting for device plugin pod to exit", "pod-name", podToDelete) + } + return false, nil + }, dn.stopCh); err != nil { + log.Log.Error(err, "restartDevicePluginPod(): failed to wait for checking pod deletion") + return err } - return false, nil - }, dn.stopCh); err != nil { - log.Log.Error(err, "restartDevicePluginPod(): failed to wait for checking pod deletion") - return err } return nil diff --git a/pkg/daemon/daemon_test.go b/pkg/daemon/daemon_test.go index f1111810a..67a56633f 100644 --- a/pkg/daemon/daemon_test.go +++ b/pkg/daemon/daemon_test.go @@ -32,6 +32,8 @@ import ( "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/fakefilesystem" ) +var SriovDevicePluginPod corev1.Pod + func TestConfigDaemon(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "Config Daemon Suite") @@ -107,19 +109,6 @@ var _ = Describe("Config Daemon", func() { }, } - SriovDevicePluginPod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "sriov-device-plugin-xxxx", - Namespace: vars.Namespace, - Labels: map[string]string{ - "app": "sriov-device-plugin", - }, - }, - Spec: corev1.PodSpec{ - NodeName: "test-node", - }, - } - err = sriovnetworkv1.AddToScheme(scheme.Scheme) Expect(err).ToNot(HaveOccurred()) kClient := kclient.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(&corev1.Node{ @@ -130,7 +119,7 @@ var _ = Describe("Config Daemon", func() { Namespace: vars.Namespace, }}).Build() - kubeClient := fakek8s.NewSimpleClientset(&FakeSupportedNicIDs, &SriovDevicePluginPod) + kubeClient := fakek8s.NewSimpleClientset(&FakeSupportedNicIDs) snclient := snclientset.NewSimpleClientset() err = sriovnetworkv1.InitNicIDMapFromConfigMap(kubeClient, vars.Namespace) Expect(err).ToNot(HaveOccurred()) @@ -175,6 +164,22 @@ var _ = Describe("Config Daemon", func() { err := sut.Run(stopCh, exitCh) Expect(err).ToNot(HaveOccurred()) }() + + SriovDevicePluginPod = corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "sriov-device-plugin-xxxx", + Namespace: vars.Namespace, + Labels: map[string]string{ + "app": "sriov-device-plugin", + }, + }, + Spec: corev1.PodSpec{ + NodeName: "test-node", + }, + } + _, err = sut.kubeClient.CoreV1().Pods(vars.Namespace).Create(context.Background(), &SriovDevicePluginPod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + }) AfterEach(func() { @@ -286,6 +291,34 @@ var _ = Describe("Config Daemon", func() { Expect(sut.desiredNodeState.GetGeneration()).To(BeNumerically("==", 777)) }) + + It("restart all the sriov-device-plugin pods present on the node", func() { + otherPod1 := SriovDevicePluginPod.DeepCopy() + otherPod1.Name = "sriov-device-plugin-xxxa" + _, err := sut.kubeClient.CoreV1().Pods(vars.Namespace).Create(context.Background(), otherPod1, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + otherPod2 := SriovDevicePluginPod.DeepCopy() + otherPod2.Name = "sriov-device-plugin-xxxz" + _, err = sut.kubeClient.CoreV1().Pods(vars.Namespace).Create(context.Background(), otherPod2, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + err = sut.restartDevicePluginPod() + Expect(err).ToNot(HaveOccurred()) + + Eventually(func() (int, error) { + podList, err := sut.kubeClient.CoreV1().Pods(vars.Namespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: "app=sriov-device-plugin", + FieldSelector: "spec.nodeName=test-node", + }) + + if err != nil { + return 0, err + } + + return len(podList.Items), nil + }, "1s").Should(BeZero()) + }) }) }) From 85063dc58f1c86d5d33c09e12a42504abd0217dd Mon Sep 17 00:00:00 2001 From: William Zhao Date: Thu, 10 Oct 2024 16:16:00 -0400 Subject: [PATCH 20/59] Add Intel Corporation Ethernet Controller E810-XXV for backplane, E823-L for SFP, E823-L for backplane for NetSec Accelerator Cards Fixes Issue #789 Signed-off-by: William Zhao --- deploy/configmap.yaml | 3 +++ .../sriov-network-operator-chart/templates/configmap.yaml | 3 +++ doc/supported-hardware.md | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/deploy/configmap.yaml b/deploy/configmap.yaml index b21b07ea6..8aa4cb970 100644 --- a/deploy/configmap.yaml +++ b/deploy/configmap.yaml @@ -16,8 +16,11 @@ data: Intel_ice_Columbiaville_E810-CQDA2_2CQDA2: "8086 1592 1889" Intel_ice_Columbiaville_E810-XXVDA4: "8086 1593 1889" Intel_ice_Columbiaville_E810-XXVDA2: "8086 159b 1889" + Intel_ice_Columbiaville_E810-XXV_BACKPLANE: "8086 1599 1889" Intel_ice_Columbiaville_E810: "8086 1591 1889" Intel_ice_Columbiapark_E823C: "8086 188a 1889" + Intel_ice_Columbiapark_E823L_SFP: "8086 124d 1889" + Intel_ice_Columbiapark_E823L_BACKPLANE: "8086 124c 1889" Nvidia_mlx5_ConnectX-4: "15b3 1013 1014" Nvidia_mlx5_ConnectX-4LX: "15b3 1015 1016" Nvidia_mlx5_ConnectX-5: "15b3 1017 1018" diff --git a/deployment/sriov-network-operator-chart/templates/configmap.yaml b/deployment/sriov-network-operator-chart/templates/configmap.yaml index 6f6ab3bcc..a4e467da8 100644 --- a/deployment/sriov-network-operator-chart/templates/configmap.yaml +++ b/deployment/sriov-network-operator-chart/templates/configmap.yaml @@ -16,8 +16,11 @@ data: Intel_ice_Columbiaville_E810-CQDA2_2CQDA2: "8086 1592 1889" Intel_ice_Columbiaville_E810-XXVDA4: "8086 1593 1889" Intel_ice_Columbiaville_E810-XXVDA2: "8086 159b 1889" + Intel_ice_Columbiaville_E810-XXV_BACKPLANE: "8086 1599 1889" Intel_ice_Columbiaville_E810: "8086 1591 1889" Intel_ice_Columbiapark_E823C: "8086 188a 1889" + Intel_ice_Columbiapark_E823L_SFP: "8086 124d 1889" + Intel_ice_Columbiapark_E823L_BACKPLANE: "8086 124c 1889" Nvidia_mlx5_ConnectX-4: "15b3 1013 1014" Nvidia_mlx5_ConnectX-4LX: "15b3 1015 1016" Nvidia_mlx5_ConnectX-5: "15b3 1017 1018" diff --git a/doc/supported-hardware.md b/doc/supported-hardware.md index 446190905..75b3fafcb 100644 --- a/doc/supported-hardware.md +++ b/doc/supported-hardware.md @@ -13,7 +13,10 @@ The following SR-IOV capable hardware is supported with sriov-network-operator: | Intel E810-CQDA2/2CQDA2 Family | 8086 | 1592 | | Intel E810-XXVDA4 Family | 8086 | 1593 | | Intel E810-XXVDA2 Family | 8086 | 159b | +| Intel E810-XXV Backplane Family | 8086 | 1599 | | Intel E823-C Family | 8086 | 188a | +| Intel E823-L SFP Family | 8086 | 124d | +| Intel E823-L Backplane Family | 8086 | 124c | | Mellanox MT27700 Family [ConnectX-4] | 15b3 | 1013 | | Mellanox MT27710 Family [ConnectX-4 Lx] | 15b3 | 1015 | | Mellanox MT27800 Family [ConnectX-5] | 15b3 | 1017 | @@ -53,7 +56,10 @@ The following table depicts the supported SR-IOV hardware features of each suppo | Intel E810-CQDA2/2CQDA2 Family | V | V | X | | Intel E810-XXVDA4 Family | V | V | X | | Intel E810-XXVDA2 Family | V | V | X | +| Intel E810-XXV Backplane Family | V | V | X | | Intel E823-C Family | V | V | X | +| Intel E823-L SFP Family | V | V | X | +| Intel E823-L Backplane Family | V | V | X | | Mellanox MT27700 Family [ConnectX-4] | V | V | V | | Mellanox MT27710 Family [ConnectX-4 Lx] | V | V | V | | Mellanox MT27800 Family [ConnectX-5] | V | V | V | From 6556c92a3d47b1c1d87136eff437d10dbe4c2562 Mon Sep 17 00:00:00 2001 From: Ivan Kolodiazhnyi Date: Thu, 19 Sep 2024 11:50:21 +0300 Subject: [PATCH 21/59] Add NVIDIA ConnectX-8 to supported NICs list Signed-off-by: Ivan Kolodiazhnyi --- deploy/configmap.yaml | 1 + .../sriov-network-operator-chart/templates/configmap.yaml | 1 + doc/supported-hardware.md | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/deploy/configmap.yaml b/deploy/configmap.yaml index b21b07ea6..98845d907 100644 --- a/deploy/configmap.yaml +++ b/deploy/configmap.yaml @@ -26,6 +26,7 @@ data: Nvidia_mlx5_ConnectX-6_Dx: "15b3 101d 101e" Nvidia_mlx5_ConnectX-6_Lx: "15b3 101f 101e" Nvidia_mlx5_ConnectX-7: "15b3 1021 101e" + Nvidia_mlx5_ConnectX-8: "15b3 1023 101e" Nvidia_mlx5_MT42822_BlueField-2_integrated_ConnectX-6_Dx: "15b3 a2d6 101e" Nvidia_mlx5_MT43244_BlueField-3_integrated_ConnectX-7_Dx: "15b3 a2dc 101e" Broadcom_bnxt_BCM57414_2x25G: "14e4 16d7 16dc" diff --git a/deployment/sriov-network-operator-chart/templates/configmap.yaml b/deployment/sriov-network-operator-chart/templates/configmap.yaml index 6f6ab3bcc..b250ddfe5 100644 --- a/deployment/sriov-network-operator-chart/templates/configmap.yaml +++ b/deployment/sriov-network-operator-chart/templates/configmap.yaml @@ -26,6 +26,7 @@ data: Nvidia_mlx5_ConnectX-6_Dx: "15b3 101d 101e" Nvidia_mlx5_ConnectX-6_Lx: "15b3 101f 101e" Nvidia_mlx5_ConnectX-7: "15b3 1021 101e" + Nvidia_mlx5_ConnectX-8: "15b3 1023 101e" Nvidia_mlx5_MT42822_BlueField-2_integrated_ConnectX-6_Dx: "15b3 a2d6 101e" Nvidia_mlx5_MT43244_BlueField-3_integrated_ConnectX-7_Dx: "15b3 a2dc 101e" Broadcom_bnxt_BCM57414_2x25G: "14e4 16d7 16dc" diff --git a/doc/supported-hardware.md b/doc/supported-hardware.md index 446190905..7e2c3002c 100644 --- a/doc/supported-hardware.md +++ b/doc/supported-hardware.md @@ -21,7 +21,8 @@ The following SR-IOV capable hardware is supported with sriov-network-operator: | Mellanox MT28908 Family [ConnectX-6] | 15b3 | 101b | | Mellanox MT28908 Family [ConnectX-6 Dx] | 15b3 | 101d | | Mellanox MT28908 Family [ConnectX-6 Lx] | 15b3 | 101f | -| Mellanox MT2910 Family [ConnectX-7 | 15b3 | 1021 | +| Mellanox MT2910 Family [ConnectX-7] | 15b3 | 1021 | +| Mellanox CX8 Family [ConnectX-8] | 15b3 | 1023 | | Mellanox MT42822 BlueField-2 integrated ConnectX-6 Dx | 15b3 | a2d6 | | Mellanox MT43244 BlueField-3 integrated ConnectX-7 Dx | 15b3 | a2dc | | Qlogic QL45000 Series 50GbE Controller | 1077 | 1654 | @@ -62,6 +63,7 @@ The following table depicts the supported SR-IOV hardware features of each suppo | Mellanox MT28908 Family [ConnectX-6 Dx] | V | V | V | | Mellanox MT28908 Family [ConnectX-6 Lx] | V | V | V | | Mellanox MT28908 Family [ConnectX-7] | V | V | V | +| Mellanox CX8 Family [ConnectX-8] | V | V | V | | Mellanox MT42822 BlueField-2 integrated ConnectX-6 Dx | V | V | V | | Mellanox MT43244 BlueField-3 integrated ConnectX-6 Dx | V | V | V | | Qlogic QL45000 Series 50GbE Controller | V | X | X | From 9782923ca92fc34454081ede724bbaed191da200 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Fri, 18 Oct 2024 13:11:18 +0200 Subject: [PATCH 22/59] logging: Reduce device discovering verbosity The `DiscoverSriovDevices` routine produces a huge amount of log entries, making debugging problems hard. Remove log entries that can produce a log line for each configured VF and which does not produce any change in the environment. Signed-off-by: Andrea Panattoni --- pkg/host/internal/network/network.go | 2 +- pkg/host/internal/vdpa/vdpa.go | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/host/internal/network/network.go b/pkg/host/internal/network/network.go index 2eb40dd69..ef85ad24a 100644 --- a/pkg/host/internal/network/network.go +++ b/pkg/host/internal/network/network.go @@ -75,7 +75,7 @@ func (n *network) TryToGetVirtualInterfaceName(pciAddr string) string { func (n *network) TryGetInterfaceName(pciAddr string) string { names, err := n.dputilsLib.GetNetNames(pciAddr) if err != nil || len(names) < 1 { - log.Log.Error(err, "TryGetInterfaceName(): failed to get interface name") + log.Log.Error(err, "TryGetInterfaceName(): failed to get interface name", "pciAddress", pciAddr) return "" } netDevName := names[0] diff --git a/pkg/host/internal/vdpa/vdpa.go b/pkg/host/internal/vdpa/vdpa.go index 4a41c63d1..e21d00cb6 100644 --- a/pkg/host/internal/vdpa/vdpa.go +++ b/pkg/host/internal/vdpa/vdpa.go @@ -94,11 +94,9 @@ func (v *vdpa) DeleteVDPADevice(pciAddr string) error { func (v *vdpa) DiscoverVDPAType(pciAddr string) string { expectedVDPAName := generateVDPADevName(pciAddr) funcLog := log.Log.WithValues("device", pciAddr, "name", expectedVDPAName) - funcLog.V(2).Info("DiscoverVDPAType() discover device type") _, err := v.netlinkLib.VDPAGetDevByName(expectedVDPAName) if err != nil { if errors.Is(err, syscall.ENODEV) { - funcLog.V(2).Info("DiscoverVDPAType(): VDPA device for VF not found") return "" } if errors.Is(err, syscall.ENOENT) { From b5b0d6b2177231d7faca9db52ced9c25f50cab0b Mon Sep 17 00:00:00 2001 From: Soule BA Date: Tue, 22 Oct 2024 14:28:56 +0200 Subject: [PATCH 23/59] Add a note in documentation regarding systemd mode Signed-off-by: Soule BA --- deployment/sriov-network-operator-chart/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/deployment/sriov-network-operator-chart/README.md b/deployment/sriov-network-operator-chart/README.md index a867613b2..d5d529dc9 100644 --- a/deployment/sriov-network-operator-chart/README.md +++ b/deployment/sriov-network-operator-chart/README.md @@ -135,6 +135,11 @@ This section contains general parameters that apply to both the operator and dae | `sriovOperatorConfig.configurationMode` | string | `daemon` | sriov-network-config-daemon configuration mode. either `daemon` or `systemd` | | `sriovOperatorConfig.featureGates` | map[string]bool | `{}` | feature gates to enable/disable | +**Note** + +When `sriovOperatorConfig.configurationMode` is configured as `systemd`, configurations files and `systemd` service files are created on the node. +Upon chart deletion, those files are not cleaned up. For cases where this is not acceptable, users should rather configured the `daemon` mode. + ### Images parameters | Name | description | From dc299c464d838a4d73dffe1978cc9edac0bc64fb Mon Sep 17 00:00:00 2001 From: Ido Heyvi Date: Sun, 27 Oct 2024 16:04:24 +0200 Subject: [PATCH 24/59] Fixing daemon sriov VFs config, where PF pci address got unbind instead of VF address, in case of using IB link type Signed-off-by: Ido Heyvi --- pkg/host/internal/sriov/sriov.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/host/internal/sriov/sriov.go b/pkg/host/internal/sriov/sriov.go index bd453ae30..bf9919a7e 100644 --- a/pkg/host/internal/sriov/sriov.go +++ b/pkg/host/internal/sriov/sriov.go @@ -484,7 +484,7 @@ func (s *sriov) configSriovVFDevices(iface *sriovnetworkv1.Interface) error { if err := s.infinibandHelper.ConfigureVfGUID(addr, iface.PciAddress, vfID, pfLink); err != nil { return err } - if err := s.kernelHelper.Unbind(iface.PciAddress); err != nil { + if err := s.kernelHelper.Unbind(addr); err != nil { return err } } else { From df1407d3a6af01f9d5e8bfe859ef39e07d29aaa3 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Tue, 29 Oct 2024 13:51:45 +0200 Subject: [PATCH 25/59] Fix k8s CI have a service that will load the br_netfilter driver after reboot Signed-off-by: Sebastian Sch --- hack/run-e2e-conformance-virtual-cluster.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/hack/run-e2e-conformance-virtual-cluster.sh b/hack/run-e2e-conformance-virtual-cluster.sh index 1a75a280d..d6fa44fd9 100755 --- a/hack/run-e2e-conformance-virtual-cluster.sh +++ b/hack/run-e2e-conformance-virtual-cluster.sh @@ -196,6 +196,22 @@ WantedBy=default.target' > /etc/systemd/system/disable-offload.service systemctl daemon-reload systemctl enable --now disable-offload +echo '[Unit] +Description=load br_netfilter +After=network.target + +[Service] +Type=oneshot +ExecStart=/usr/bin/bash -c "modprobe br_netfilter" +StandardOutput=journal+console +StandardError=journal+console + +[Install] +WantedBy=default.target' > /etc/systemd/system/load-br-netfilter.service + +systemctl daemon-reload +systemctl enable --now load-br-netfilter + systemctl restart NetworkManager EOF From 0d9a7070041b8256cd0ece643d0f91aa4cd1e5bc Mon Sep 17 00:00:00 2001 From: Ido Heyvi Date: Sun, 13 Oct 2024 10:52:43 +0300 Subject: [PATCH 26/59] adding sriov operator config finalizer, to control generated cluster level objects cleanup Signed-off-by: Ido Heyvi --- api/v1/helper.go | 11 ++--- controllers/sriovoperatorconfig_controller.go | 37 ++++++++++++++++- .../sriovoperatorconfig_controller_test.go | 40 ++++++++++++++++++- 3 files changed, 80 insertions(+), 8 deletions(-) diff --git a/api/v1/helper.go b/api/v1/helper.go index bfdfbc473..62ea0d2a5 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -26,11 +26,12 @@ import ( ) const ( - LASTNETWORKNAMESPACE = "operator.sriovnetwork.openshift.io/last-network-namespace" - NETATTDEFFINALIZERNAME = "netattdef.finalizers.sriovnetwork.openshift.io" - POOLCONFIGFINALIZERNAME = "poolconfig.finalizers.sriovnetwork.openshift.io" - ESwithModeLegacy = "legacy" - ESwithModeSwitchDev = "switchdev" + LASTNETWORKNAMESPACE = "operator.sriovnetwork.openshift.io/last-network-namespace" + NETATTDEFFINALIZERNAME = "netattdef.finalizers.sriovnetwork.openshift.io" + POOLCONFIGFINALIZERNAME = "poolconfig.finalizers.sriovnetwork.openshift.io" + OPERATORCONFIGFINALIZERNAME = "operatorconfig.finalizers.sriovnetwork.openshift.io" + ESwithModeLegacy = "legacy" + ESwithModeSwitchDev = "switchdev" SriovCniStateEnable = "enable" SriovCniStateDisable = "disable" diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index 377ebd2de..c9f21f428 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -40,6 +40,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/go-logr/logr" machinev1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" @@ -83,8 +84,6 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. if err != nil { if apierrors.IsNotFound(err) { logger.Info("default SriovOperatorConfig object not found. waiting for creation.") - - err := r.deleteAllWebhooks(ctx) return reconcile.Result{}, err } // Error reading the object - requeue the request. @@ -94,6 +93,19 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. snolog.SetLogLevel(defaultConfig.Spec.LogLevel) + // examine DeletionTimestamp to determine if object is under deletion + if !defaultConfig.ObjectMeta.DeletionTimestamp.IsZero() { + // The object is being deleted + return r.handleSriovOperatorConfigDeletion(ctx, defaultConfig, logger) + } + // add finalizer if needed + if !sriovnetworkv1.StringInArray(sriovnetworkv1.OPERATORCONFIGFINALIZERNAME, defaultConfig.ObjectMeta.Finalizers) { + defaultConfig.ObjectMeta.Finalizers = append(defaultConfig.ObjectMeta.Finalizers, sriovnetworkv1.OPERATORCONFIGFINALIZERNAME) + if err := r.Update(ctx, defaultConfig); err != nil { + return reconcile.Result{}, err + } + } + r.FeatureGate.Init(defaultConfig.Spec.FeatureGates) logger.Info("enabled featureGates", "featureGates", r.FeatureGate.String()) @@ -434,6 +446,27 @@ func (r *SriovOperatorConfigReconciler) syncOpenShiftSystemdService(ctx context. return r.setLabelInsideObject(ctx, cr, objs) } +func (r *SriovOperatorConfigReconciler) handleSriovOperatorConfigDeletion(ctx context.Context, + defaultConfig *sriovnetworkv1.SriovOperatorConfig, logger logr.Logger) (ctrl.Result, error) { + var err error + if sriovnetworkv1.StringInArray(sriovnetworkv1.OPERATORCONFIGFINALIZERNAME, defaultConfig.ObjectMeta.Finalizers) { + // our finalizer is present, so lets handle any external dependency + logger.Info("delete SriovOperatorConfig CR", "Namespace", defaultConfig.Namespace, "Name", defaultConfig.Name) + // make sure webhooks objects are deleted prior of removing finalizer + err = r.deleteAllWebhooks(ctx) + if err != nil { + return reconcile.Result{}, err + } + // remove our finalizer from the list and update it. + defaultConfig.ObjectMeta.Finalizers, _ = sriovnetworkv1.RemoveString(sriovnetworkv1.OPERATORCONFIGFINALIZERNAME, defaultConfig.ObjectMeta.Finalizers) + if err := r.Update(ctx, defaultConfig); err != nil { + return reconcile.Result{}, err + } + } + + return reconcile.Result{}, err +} + func (r SriovOperatorConfigReconciler) setLabelInsideObject(ctx context.Context, cr *sriovnetworkv1.SriovOperatorConfig, objs []*uns.Unstructured) error { logger := log.Log.WithName("setLabelInsideObject") for _, obj := range objs { diff --git a/controllers/sriovoperatorconfig_controller_test.go b/controllers/sriovoperatorconfig_controller_test.go index 7f6db3522..47e4fc09d 100644 --- a/controllers/sriovoperatorconfig_controller_test.go +++ b/controllers/sriovoperatorconfig_controller_test.go @@ -102,9 +102,15 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { Context("When is up", func() { BeforeEach(func() { + var err error config := &sriovnetworkv1.SriovOperatorConfig{} - err := util.WaitForNamespacedObject(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout) + err = util.WaitForNamespacedObject(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout) Expect(err).NotTo(HaveOccurred()) + // in case controller yet to add object's finalizer (e.g whenever test deferCleanup is creating new 'default' config object) + if len(config.Finalizers) == 0 { + err = util.WaitForNamespacedObject(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout) + Expect(err).NotTo(HaveOccurred()) + } config.Spec = sriovnetworkv1.SriovOperatorConfigSpec{ EnableInjector: true, EnableOperatorWebhook: true, @@ -240,6 +246,38 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { client.ObjectKey{Name: "network-resources-injector-config"}) }) + It("should add/delete finalizer 'operatorconfig' when SriovOperatorConfig/default is added/deleted", func() { + DeferCleanup(k8sClient.Create, context.Background(), makeDefaultSriovOpConfig()) + + // verify that finalizer has been added upon object creation + config := &sriovnetworkv1.SriovOperatorConfig{} + Eventually(func() []string { + // wait for SriovOperatorConfig flags to get updated + err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config) + if err != nil { + return nil + } + return config.Finalizers + }, util.APITimeout, util.RetryInterval).Should(Equal([]string{sriovnetworkv1.OPERATORCONFIGFINALIZERNAME})) + + err := k8sClient.Delete(context.Background(), &sriovnetworkv1.SriovOperatorConfig{ + ObjectMeta: metav1.ObjectMeta{Namespace: testNamespace, Name: "default"}, + }) + Expect(err).NotTo(HaveOccurred()) + + // verify that finalizer has been removed + var empty []string + config = &sriovnetworkv1.SriovOperatorConfig{} + Eventually(func() []string { + // wait for SriovOperatorConfig flags to get updated + err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config) + if err != nil { + return nil + } + return config.Finalizers + }, util.APITimeout, util.RetryInterval).Should(Equal(empty)) + }) + It("should be able to update the node selector of sriov-network-config-daemon", func() { By("specify the configDaemonNodeSelector") nodeSelector := map[string]string{"node-role.kubernetes.io/worker": ""} From b1bb0443823ed741d52f8fc55739d93dd6dc0ef6 Mon Sep 17 00:00:00 2001 From: Ido Heyvi Date: Mon, 28 Oct 2024 15:14:05 +0200 Subject: [PATCH 27/59] adding sriov operator config cleanup binary, to be used under helm uninstall pre-delete hook Signed-off-by: Ido Heyvi --- Dockerfile | 2 + Makefile | 2 +- .../cleanup.go | 83 ++++++++ .../cleanup_test.go | 177 ++++++++++++++++++ .../main.go | 38 ++++ .../suite_test.go | 121 ++++++++++++ .../templates/pre-delete-webooks.yaml | 27 +++ 7 files changed, 449 insertions(+), 1 deletion(-) create mode 100644 cmd/sriov-network-operator-config-cleanup/cleanup.go create mode 100644 cmd/sriov-network-operator-config-cleanup/cleanup_test.go create mode 100644 cmd/sriov-network-operator-config-cleanup/main.go create mode 100644 cmd/sriov-network-operator-config-cleanup/suite_test.go create mode 100644 deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml diff --git a/Dockerfile b/Dockerfile index 2b26247e8..7735bef7b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,9 +2,11 @@ FROM golang:1.22 AS builder WORKDIR /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator COPY . . RUN make _build-manager BIN_PATH=build/_output/cmd +RUN make _build-sriov-network-operator-config-cleanup BIN_PATH=build/_output/cmd FROM quay.io/centos/centos:stream9 COPY --from=builder /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator/build/_output/cmd/manager /usr/bin/sriov-network-operator +COPY --from=builder /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator/build/_output/cmd/sriov-network-operator-config-cleanup /usr/bin/sriov-network-operator-config-cleanup COPY bindata /bindata ENV OPERATOR_NAME=sriov-network-operator CMD ["/usr/bin/sriov-network-operator"] diff --git a/Makefile b/Makefile index 3718b75bd..310f1dc52 100644 --- a/Makefile +++ b/Makefile @@ -60,7 +60,7 @@ GOLANGCI_LINT_VER = v1.55.2 all: generate lint build -build: manager _build-sriov-network-config-daemon _build-webhook +build: manager _build-sriov-network-config-daemon _build-webhook _build-sriov-network-operator-config-cleanup _build-%: WHAT=$* hack/build-go.sh diff --git a/cmd/sriov-network-operator-config-cleanup/cleanup.go b/cmd/sriov-network-operator-config-cleanup/cleanup.go new file mode 100644 index 000000000..e53deba34 --- /dev/null +++ b/cmd/sriov-network-operator-config-cleanup/cleanup.go @@ -0,0 +1,83 @@ +package main + +import ( + "context" + "time" + + "github.com/spf13/cobra" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/log" + + snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" + + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/watch" + + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned/typed/sriovnetwork/v1" +) + +var ( + namespace string + watchTO int +) + +func init() { + rootCmd.Flags().StringVarP(&namespace, "namespace", "n", "", "designated SriovOperatorConfig namespace") + rootCmd.Flags().IntVarP(&watchTO, "watch-timeout", "w", 10, "sriov-operator config post-delete watch timeout ") +} + +func runCleanupCmd(cmd *cobra.Command, args []string) error { + // init logger + snolog.InitLog() + setupLog := log.Log.WithName("sriov-network-operator-config-cleanup") + setupLog.Info("Run sriov-network-operator-config-cleanup") + + // adding context timeout although client-go Delete should be non-blocking by default + ctx, timeoutFunc := context.WithTimeout(context.Background(), time.Second*time.Duration(watchTO)) + defer timeoutFunc() + + restConfig := ctrl.GetConfigOrDie() + sriovcs, err := sriovnetworkv1.NewForConfig(restConfig) + if err != nil { + setupLog.Error(err, "failed to create 'sriovnetworkv1' clientset") + } + + err = sriovcs.SriovOperatorConfigs(namespace).Delete(context.Background(), "default", metav1.DeleteOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return nil + } + setupLog.Error(err, "failed to delete SriovOperatorConfig") + return err + } + + // watching 'default' config deletion with context timeout, in case sriov-operator fails to delete 'default' config + watcher, err := sriovcs.SriovOperatorConfigs(namespace).Watch(ctx, metav1.ListOptions{Watch: true}) + if err != nil { + setupLog.Error(err, "failed creating 'default' SriovOperatorConfig object watcher") + return err + } + defer watcher.Stop() + for { + select { + case event := <-watcher.ResultChan(): + if event.Type == watch.Deleted { + setupLog.Info("'default' SriovOperatorConfig is deleted") + return nil + } + + case <-ctx.Done(): + // check whether object might has been deleted before watch event triggered + _, err := sriovcs.SriovOperatorConfigs(namespace).Get(context.Background(), "default", metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return nil + } + } + err = ctx.Err() + setupLog.Error(err, "timeout has occurred for 'default' SriovOperatorConfig deletion") + return err + } + } +} diff --git a/cmd/sriov-network-operator-config-cleanup/cleanup_test.go b/cmd/sriov-network-operator-config-cleanup/cleanup_test.go new file mode 100644 index 000000000..f7926d834 --- /dev/null +++ b/cmd/sriov-network-operator-config-cleanup/cleanup_test.go @@ -0,0 +1,177 @@ +package main + +import ( + "context" + "sync" + + "github.com/golang/mock/gomock" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/spf13/cobra" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/manager" + + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/controllers" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" + mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" +) + +type configController struct { + k8sManager manager.Manager + ctx context.Context + cancel context.CancelFunc + wg *sync.WaitGroup +} + +var ( + controller *configController + testNamespace string = "sriov-network-operator" + defaultSriovOperatorSpec = sriovnetworkv1.SriovOperatorConfigSpec{ + EnableInjector: true, + EnableOperatorWebhook: true, + LogLevel: 2, + FeatureGates: nil, + } +) + +var _ = Describe("cleanup", Ordered, func() { + BeforeAll(func() { + By("Create SriovOperatorConfig controller k8s objs") + config := getDefaultSriovOperatorConfig() + Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) + + somePolicy := &sriovnetworkv1.SriovNetworkNodePolicy{} + somePolicy.SetNamespace(testNamespace) + somePolicy.SetName("some-policy") + somePolicy.Spec = sriovnetworkv1.SriovNetworkNodePolicySpec{ + NumVfs: 5, + NodeSelector: map[string]string{"foo": "bar"}, + NicSelector: sriovnetworkv1.SriovNetworkNicSelector{}, + Priority: 20, + } + Expect(k8sClient.Create(context.Background(), somePolicy)).ToNot(HaveOccurred()) + DeferCleanup(func() { + err := k8sClient.Delete(context.Background(), somePolicy) + Expect(err).ToNot(HaveOccurred()) + }) + + controller = newConfigController() + + }) + + It("test webhook cleanup flow", func() { + controller.start() + defer controller.stop() + + cmd := &cobra.Command{} + namespace = testNamespace + // verify that finalizer has been added, by controller, upon object creation + config := &sriovnetworkv1.SriovOperatorConfig{} + Eventually(func() []string { + // wait for SriovOperatorConfig flags to get updated + err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config) + if err != nil { + return nil + } + return config.Finalizers + }, util.APITimeout, util.RetryInterval).Should(Equal([]string{sriovnetworkv1.OPERATORCONFIGFINALIZERNAME})) + + Expect(runCleanupCmd(cmd, []string{})).Should(Succeed()) + config = &sriovnetworkv1.SriovOperatorConfig{} + err := util.WaitForNamespacedObjectDeleted(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout) + Expect(err).NotTo(HaveOccurred()) + + }) + + It("test 'default' config cleanup timeout", func() { + // in this test case sriov-operator controller has been scaled down. + // we are testing returned ctx timeout error, for not being able to delete 'default' config object + config := getDefaultSriovOperatorConfig() + config.Finalizers = []string{sriovnetworkv1.OPERATORCONFIGFINALIZERNAME} + Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) + + cmd := &cobra.Command{} + namespace = testNamespace + // verify that finalizer has been added, by controller, upon object creation + config = &sriovnetworkv1.SriovOperatorConfig{} + Eventually(func() []string { + // wait for SriovOperatorConfig flags to get updated + err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config) + if err != nil { + return nil + } + return config.Finalizers + }, util.APITimeout, util.RetryInterval).Should(Equal([]string{sriovnetworkv1.OPERATORCONFIGFINALIZERNAME})) + + watchTO = 1 + err := runCleanupCmd(cmd, []string{}) + Expect(err.Error()).To(ContainSubstring("context deadline exceeded")) + }) +}) + +func getDefaultSriovOperatorConfig() *sriovnetworkv1.SriovOperatorConfig { + return &sriovnetworkv1.SriovOperatorConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "default", + Namespace: testNamespace, + }, + Spec: defaultSriovOperatorSpec, + } +} + +func newConfigController() *configController { + // setup controller manager + By("Setup controller manager") + k8sManager, err := ctrl.NewManager(cfg, ctrl.Options{ + Scheme: scheme.Scheme, + }) + Expect(err).ToNot(HaveOccurred()) + + t := GinkgoT() + mockCtrl := gomock.NewController(t) + platformHelper := mock_platforms.NewMockInterface(mockCtrl) + platformHelper.EXPECT().GetFlavor().Return(openshift.OpenshiftFlavorDefault).AnyTimes() + platformHelper.EXPECT().IsOpenshiftCluster().Return(false).AnyTimes() + platformHelper.EXPECT().IsHypershift().Return(false).AnyTimes() + + err = (&controllers.SriovOperatorConfigReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + PlatformHelper: platformHelper, + FeatureGate: featuregate.New(), + }).SetupWithManager(k8sManager) + Expect(err).ToNot(HaveOccurred()) + + ctx, cancel := context.WithCancel(context.Background()) + wg := sync.WaitGroup{} + controller = &configController{ + k8sManager: k8sManager, + ctx: ctx, + cancel: cancel, + wg: &wg, + } + + return controller +} + +func (c *configController) start() { + c.wg.Add(1) + go func() { + defer c.wg.Done() + defer GinkgoRecover() + By("Start controller manager") + err := c.k8sManager.Start(c.ctx) + Expect(err).ToNot(HaveOccurred()) + }() +} + +func (c *configController) stop() { + c.cancel() + c.wg.Wait() +} diff --git a/cmd/sriov-network-operator-config-cleanup/main.go b/cmd/sriov-network-operator-config-cleanup/main.go new file mode 100644 index 000000000..51874e54e --- /dev/null +++ b/cmd/sriov-network-operator-config-cleanup/main.go @@ -0,0 +1,38 @@ +package main + +import ( + "flag" + "os" + + "github.com/spf13/cobra" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/log" + + snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" +) + +const ( + componentName = "sriov-network-operator-config-cleanup" +) + +var ( + rootCmd = &cobra.Command{ + Use: componentName, + Short: "Removes 'default' SriovOperatorConfig", + Long: `Removes 'default' SriovOperatorConfig in order to cleanup non-namespaced objects e.g clusterroles/clusterrolebinding/validating/mutating webhooks + +Example: sriov-network-operator-config-cleanup -n `, + RunE: runCleanupCmd, + } +) + +func main() { + klog.InitFlags(nil) + snolog.BindFlags(flag.CommandLine) + rootCmd.PersistentFlags().AddGoFlagSet(flag.CommandLine) + + if err := rootCmd.Execute(); err != nil { + log.Log.Error(err, "Error executing sriov-network-operator-config-cleanup") + os.Exit(1) + } +} diff --git a/cmd/sriov-network-operator-config-cleanup/suite_test.go b/cmd/sriov-network-operator-config-cleanup/suite_test.go new file mode 100644 index 000000000..ee1815ff7 --- /dev/null +++ b/cmd/sriov-network-operator-config-cleanup/suite_test.go @@ -0,0 +1,121 @@ +package main + +import ( + "context" + "io/fs" + "os" + "path/filepath" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "go.uber.org/zap/zapcore" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + + //+kubebuilder:scaffold:imports + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" + + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + "sigs.k8s.io/controller-runtime/pkg/log/zap" +) + +var ( + k8sClient client.Client + testEnv *envtest.Environment + cfg *rest.Config + kubecfgPath string +) + +var _ = BeforeSuite(func() { + + logf.SetLogger(zap.New( + zap.WriteTo(GinkgoWriter), + zap.UseDevMode(true), + func(o *zap.Options) { + o.TimeEncoder = zapcore.RFC3339NanoTimeEncoder + })) + + // Go to project root directory + err := os.Chdir("../..") + Expect(err).NotTo(HaveOccurred()) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("config", "crd", "bases"), filepath.Join("test", "util", "crds")}, + ErrorIfCRDPathMissing: true, + } + + testEnv.ControlPlane.GetAPIServer().Configure().Set("disable-admission-plugins", "MutatingAdmissionWebhook", "ValidatingAdmissionWebhook") + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + apiserverDir := testEnv.ControlPlane.GetAPIServer().CertDir + kubecfgPath = findKubecfg(apiserverDir, ".kubecfg") + err = os.Setenv("KUBECONFIG", kubecfgPath) + Expect(err).NotTo(HaveOccurred()) + + By("registering schemes") + err = sriovnetworkv1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + vars.Config = cfg + vars.Scheme = scheme.Scheme + vars.Namespace = testNamespace + + By("creating K8s client") + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + + By("creating default/common k8s objects for tests") + // Create test namespace + ns := &corev1.Namespace{ + TypeMeta: metav1.TypeMeta{}, + ObjectMeta: metav1.ObjectMeta{ + Name: testNamespace, + }, + Spec: corev1.NamespaceSpec{}, + Status: corev1.NamespaceStatus{}, + } + ctx := context.Background() + Expect(k8sClient.Create(ctx, ns)).Should(Succeed()) +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + if testEnv != nil { + Eventually(func() error { + return testEnv.Stop() + }, util.APITimeout, time.Second).ShouldNot(HaveOccurred()) + } +}) + +func findKubecfg(path, ext string) string { + var cfg string + filepath.WalkDir(path, func(s string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if filepath.Ext(d.Name()) == ext { + cfg = s + } + return nil + }) + return cfg +} + +func TestAPIs(t *testing.T) { + _, reporterConfig := GinkgoConfiguration() + + RegisterFailHandler(Fail) + + RunSpecs(t, "operator-webhook Suite", reporterConfig) +} diff --git a/deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml b/deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml new file mode 100644 index 000000000..8fc7fa06b --- /dev/null +++ b/deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml @@ -0,0 +1,27 @@ +# The following job will be used as Helm pre-delete hook. It executes a small go-client binary +# which intent to delete 'default' SriovOperatorConfig, that triggers operator removal of generated cluster objects +# e.g. mutating/validating webhooks, within operator's recoinciling loop and +# preventing operator cluster object remainings while using helm uninstall +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "sriov-network-operator.fullname" . }}-pre-delete-hook + namespace: {{ .Release.Namespace }} + annotations: + "helm.sh/hook": pre-delete + "helm.sh/hook-delete-policy": hook-succeeded,hook-failed +spec: + template: + spec: + serviceAccountName: {{ include "sriov-network-operator.fullname" . }} + containers: + - name: cleanup + image: {{ .Values.images.operator }} + command: + - sriov-network-operator-config-cleanup + args: + - --namespace + - {{ .Release.Namespace }} + restartPolicy: Never + backoffLimit: 2 + From 6d32ec0745d31821eddfcf77a2a314ddb146c0e8 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Fri, 25 Oct 2024 09:14:08 +0200 Subject: [PATCH 28/59] kernel: Set arguments based on CPU architecture Kernel arguments like `intel_iommu=on` does not have sense on AMD or ARM systems and some user might complain about their presence, though they are likely to be harmless. Also, on ARM systems the `iommu.passthrough` parameter is the one to use [1]. Improve `GHWLib` to bridge CPU information from the library. Add `CpuInfoProviderInterface` and inject it into the GenericPlugin to implement the per CPU vendor logic. [1] https://github.com/torvalds/linux/blob/master/Documentation/admin-guide/kernel-parameters.txt#L2343 Signed-off-by: Andrea Panattoni --- pkg/consts/constants.go | 7 ++-- pkg/helper/mock/mock_helper.go | 15 +++++++ pkg/host/internal/cpu/cpu.go | 40 ++++++++++++++++++ pkg/host/internal/lib/ghw/ghw.go | 8 ++++ pkg/host/internal/lib/ghw/mock/mock_ghw.go | 16 ++++++++ pkg/host/manager.go | 5 +++ pkg/host/mock/mock_host.go | 15 +++++++ pkg/host/types/interfaces.go | 13 ++++++ pkg/plugins/generic/generic_plugin.go | 27 ++++++++++++- pkg/plugins/generic/generic_plugin_test.go | 47 +++++++++++++++++----- 10 files changed, 178 insertions(+), 15 deletions(-) create mode 100644 pkg/host/internal/cpu/cpu.go diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index f3c076111..f7025c90d 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -121,9 +121,10 @@ const ( `IMPORT{program}="/etc/udev/switchdev-vf-link-name.sh $attr{phys_port_name}", ` + `NAME="%s_$env{NUMBER}"` - KernelArgPciRealloc = "pci=realloc" - KernelArgIntelIommu = "intel_iommu=on" - KernelArgIommuPt = "iommu=pt" + KernelArgPciRealloc = "pci=realloc" + KernelArgIntelIommu = "intel_iommu=on" + KernelArgIommuPt = "iommu=pt" + KernelArgIommuPassthrough = "iommu.passthrough=1" // Feature gates // ParallelNicConfigFeatureGate: allow to configure nics in parallel diff --git a/pkg/helper/mock/mock_helper.go b/pkg/helper/mock/mock_helper.go index cfca2a768..432d741be 100644 --- a/pkg/helper/mock/mock_helper.go +++ b/pkg/helper/mock/mock_helper.go @@ -351,6 +351,21 @@ func (mr *MockHostHelpersInterfaceMockRecorder) EnableService(service interface{ return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EnableService", reflect.TypeOf((*MockHostHelpersInterface)(nil).EnableService), service) } +// GetCPUVendor mocks base method. +func (m *MockHostHelpersInterface) GetCPUVendor() (types.CPUVendor, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetCPUVendor") + ret0, _ := ret[0].(types.CPUVendor) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetCPUVendor indicates an expected call of GetCPUVendor. +func (mr *MockHostHelpersInterfaceMockRecorder) GetCPUVendor() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetCPUVendor", reflect.TypeOf((*MockHostHelpersInterface)(nil).GetCPUVendor)) +} + // GetCheckPointNodeState mocks base method. func (m *MockHostHelpersInterface) GetCheckPointNodeState() (*v1.SriovNetworkNodeState, error) { m.ctrl.T.Helper() diff --git a/pkg/host/internal/cpu/cpu.go b/pkg/host/internal/cpu/cpu.go new file mode 100644 index 000000000..fd02157e6 --- /dev/null +++ b/pkg/host/internal/cpu/cpu.go @@ -0,0 +1,40 @@ +package cpu + +import ( + "fmt" + + ghwPkg "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/lib/ghw" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/types" +) + +type cpuInfoProvider struct { + ghwLib ghwPkg.GHWLib +} + +func New(ghwLib ghwPkg.GHWLib) *cpuInfoProvider { + return &cpuInfoProvider{ + ghwLib: ghwLib, + } +} + +func (c *cpuInfoProvider) GetCPUVendor() (types.CPUVendor, error) { + cpuInfo, err := c.ghwLib.CPU() + if err != nil { + return -1, fmt.Errorf("can't retrieve the CPU vendor: %w", err) + } + + if len(cpuInfo.Processors) == 0 { + return -1, fmt.Errorf("wrong CPU information retrieved: %v", cpuInfo) + } + + switch cpuInfo.Processors[0].Vendor { + case "GenuineIntel": + return types.CPUVendorIntel, nil + case "AuthenticAMD": + return types.CPUVendorAMD, nil + case "ARM": + return types.CPUVendorARM, nil + } + + return -1, fmt.Errorf("unknown CPU vendor: %s", cpuInfo.Processors[0].Vendor) +} diff --git a/pkg/host/internal/lib/ghw/ghw.go b/pkg/host/internal/lib/ghw/ghw.go index 6a6829604..d518977e4 100644 --- a/pkg/host/internal/lib/ghw/ghw.go +++ b/pkg/host/internal/lib/ghw/ghw.go @@ -2,6 +2,7 @@ package ghw import ( "github.com/jaypipes/ghw" + "github.com/jaypipes/ghw/pkg/cpu" ) func New() GHWLib { @@ -12,6 +13,9 @@ func New() GHWLib { type GHWLib interface { // PCI returns a pointer to an Info that provide methods to access info about devices PCI() (Info, error) + + // CPU returns a pointer to an Info that provide methods to access info about devices + CPU() (*cpu.Info, error) } // Info interface provide methods to access info about devices @@ -27,3 +31,7 @@ type libWrapper struct{} func (w *libWrapper) PCI() (Info, error) { return ghw.PCI() } + +func (w *libWrapper) CPU() (*cpu.Info, error) { + return ghw.CPU() +} diff --git a/pkg/host/internal/lib/ghw/mock/mock_ghw.go b/pkg/host/internal/lib/ghw/mock/mock_ghw.go index 2e2b4b5c5..9d6092362 100644 --- a/pkg/host/internal/lib/ghw/mock/mock_ghw.go +++ b/pkg/host/internal/lib/ghw/mock/mock_ghw.go @@ -9,6 +9,7 @@ import ( gomock "github.com/golang/mock/gomock" ghw "github.com/jaypipes/ghw" + cpu "github.com/jaypipes/ghw/pkg/cpu" ghw0 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/lib/ghw" ) @@ -35,6 +36,21 @@ func (m *MockGHWLib) EXPECT() *MockGHWLibMockRecorder { return m.recorder } +// CPU mocks base method. +func (m *MockGHWLib) CPU() (*cpu.Info, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CPU") + ret0, _ := ret[0].(*cpu.Info) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// CPU indicates an expected call of CPU. +func (mr *MockGHWLibMockRecorder) CPU() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CPU", reflect.TypeOf((*MockGHWLib)(nil).CPU)) +} + // PCI mocks base method. func (m *MockGHWLib) PCI() (ghw0.Info, error) { m.ctrl.T.Helper() diff --git a/pkg/host/manager.go b/pkg/host/manager.go index 02a77a659..44bd45807 100644 --- a/pkg/host/manager.go +++ b/pkg/host/manager.go @@ -2,6 +2,7 @@ package host import ( "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/bridge" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/cpu" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/infiniband" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/kernel" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/lib/dputils" @@ -30,6 +31,7 @@ type HostManagerInterface interface { types.VdpaInterface types.InfinibandInterface types.BridgeInterface + types.CPUInfoProviderInterface } type hostManager struct { @@ -42,6 +44,7 @@ type hostManager struct { types.VdpaInterface types.InfinibandInterface types.BridgeInterface + types.CPUInfoProviderInterface } func NewHostManager(utilsInterface utils.CmdInterface) (HostManagerInterface, error) { @@ -61,6 +64,7 @@ func NewHostManager(utilsInterface utils.CmdInterface) (HostManagerInterface, er } br := bridge.New() sr := sriov.New(utilsInterface, k, n, u, v, ib, netlinkLib, dpUtils, sriovnetLib, ghwLib, br) + cpuInfoProvider := cpu.New(ghwLib) return &hostManager{ utilsInterface, k, @@ -71,5 +75,6 @@ func NewHostManager(utilsInterface utils.CmdInterface) (HostManagerInterface, er v, ib, br, + cpuInfoProvider, }, nil } diff --git a/pkg/host/mock/mock_host.go b/pkg/host/mock/mock_host.go index cb4d1480a..5ebed46aa 100644 --- a/pkg/host/mock/mock_host.go +++ b/pkg/host/mock/mock_host.go @@ -321,6 +321,21 @@ func (mr *MockHostManagerInterfaceMockRecorder) EnableService(service interface{ return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EnableService", reflect.TypeOf((*MockHostManagerInterface)(nil).EnableService), service) } +// GetCPUVendor mocks base method. +func (m *MockHostManagerInterface) GetCPUVendor() (types.CPUVendor, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetCPUVendor") + ret0, _ := ret[0].(types.CPUVendor) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetCPUVendor indicates an expected call of GetCPUVendor. +func (mr *MockHostManagerInterfaceMockRecorder) GetCPUVendor() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetCPUVendor", reflect.TypeOf((*MockHostManagerInterface)(nil).GetCPUVendor)) +} + // GetCurrentKernelArgs mocks base method. func (m *MockHostManagerInterface) GetCurrentKernelArgs() (string, error) { m.ctrl.T.Helper() diff --git a/pkg/host/types/interfaces.go b/pkg/host/types/interfaces.go index 5918dca34..c6e0c8faf 100644 --- a/pkg/host/types/interfaces.go +++ b/pkg/host/types/interfaces.go @@ -187,3 +187,16 @@ type InfinibandInterface interface { // ConfigureVfGUID configures and sets a GUID for an IB VF device ConfigureVfGUID(vfAddr string, pfAddr string, vfID int, pfLink netlink.Link) error } + +type CPUVendor int + +const ( + CPUVendorIntel CPUVendor = iota + CPUVendorAMD + CPUVendorARM +) + +type CPUInfoProviderInterface interface { + // Retrieve the CPU vendor of the current system + GetCPUVendor() (CPUVendor, error) +} diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index 14b1903e5..552f8142a 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -13,6 +13,7 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" + hostTypes "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/types" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" @@ -419,9 +420,31 @@ func (p *GenericPlugin) shouldConfigureBridges() bool { func (p *GenericPlugin) addVfioDesiredKernelArg(state *sriovnetworkv1.SriovNetworkNodeState) { driverState := p.DriverStateMap[Vfio] + + kernelArgFnByCPUVendor := map[hostTypes.CPUVendor]func(){ + hostTypes.CPUVendorIntel: func() { + p.addToDesiredKernelArgs(consts.KernelArgIntelIommu) + p.addToDesiredKernelArgs(consts.KernelArgIommuPt) + }, + hostTypes.CPUVendorAMD: func() { + p.addToDesiredKernelArgs(consts.KernelArgIommuPt) + }, + hostTypes.CPUVendorARM: func() { + p.addToDesiredKernelArgs(consts.KernelArgIommuPassthrough) + }, + } + if !driverState.DriverLoaded && driverState.NeedDriverFunc(state, driverState) { - p.addToDesiredKernelArgs(consts.KernelArgIntelIommu) - p.addToDesiredKernelArgs(consts.KernelArgIommuPt) + cpuVendor, err := p.helpers.GetCPUVendor() + if err != nil { + log.Log.Error(err, "can't get CPU vendor, falling back to Intel") + cpuVendor = hostTypes.CPUVendorIntel + } + + addKernelArgFn := kernelArgFnByCPUVendor[cpuVendor] + if addKernelArgFn != nil { + addKernelArgFn() + } } } diff --git a/pkg/plugins/generic/generic_plugin_test.go b/pkg/plugins/generic/generic_plugin_test.go index 0d6701a64..0a6674712 100644 --- a/pkg/plugins/generic/generic_plugin_test.go +++ b/pkg/plugins/generic/generic_plugin_test.go @@ -10,6 +10,7 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" mock_helper "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper/mock" + hostTypes "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/types" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) @@ -850,8 +851,9 @@ var _ = Describe("Generic plugin", func() { Expect(changed).To(BeTrue()) }) - It("should detect changes on status due to missing kernel args", func() { - networkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ + Context("Kernel Args", func() { + + vfioNetworkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{ Interfaces: sriovnetworkv1.Interfaces{{ PciAddress: "0000:00:00.0", @@ -896,16 +898,41 @@ var _ = Describe("Generic plugin", func() { }, } - // Load required kernel args. - genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(networkNodeState) + It("should detect changes on status due to missing kernel args", func() { + hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorIntel, nil) - hostHelper.EXPECT().GetCurrentKernelArgs().Return("", nil) - hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false) - hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false) + // Load required kernel args. + genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) - changed, err := genericPlugin.CheckStatusChanges(networkNodeState) - Expect(err).ToNot(HaveOccurred()) - Expect(changed).To(BeTrue()) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ + consts.KernelArgIntelIommu: false, + consts.KernelArgIommuPt: false, + })) + + hostHelper.EXPECT().GetCurrentKernelArgs().Return("", nil) + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false) + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false) + + changed, err := genericPlugin.CheckStatusChanges(vfioNetworkNodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeTrue()) + }) + + It("should set the correct kernel args on AMD CPUs", func() { + hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorAMD, nil) + genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ + consts.KernelArgIommuPt: false, + })) + }) + + It("should set the correct kernel args on ARM CPUs", func() { + hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorARM, nil) + genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ + consts.KernelArgIommuPassthrough: false, + })) + }) }) It("should load vfio_pci driver", func() { From 5522c96101c673ad15efbc5a7acd1596283bc19c Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Fri, 25 Oct 2024 09:38:44 +0200 Subject: [PATCH 29/59] Update `github.com/jaypipes/ghw` To include - https://github.com/jaypipes/ghw/pull/387 Signed-off-by: Andrea Panattoni --- go.mod | 21 +-- go.sum | 50 +++--- pkg/host/internal/lib/ghw/ghw.go | 12 +- pkg/host/internal/lib/ghw/mock/mock_ghw.go | 44 +----- pkg/host/internal/sriov/sriov.go | 2 +- pkg/host/internal/sriov/sriov_test.go | 176 ++++++++++----------- pkg/platforms/openstack/openstack.go | 4 +- 7 files changed, 130 insertions(+), 179 deletions(-) diff --git a/go.mod b/go.mod index 0353c7ec1..350dbb82d 100644 --- a/go.mod +++ b/go.mod @@ -15,8 +15,8 @@ require ( github.com/google/renameio/v2 v2.0.0 github.com/google/uuid v1.3.1 github.com/hashicorp/go-retryablehttp v0.7.7 - github.com/jaypipes/ghw v0.9.0 - github.com/jaypipes/pcidb v1.0.0 + github.com/jaypipes/ghw v0.13.1-0.20241024164530-c1bfc6e6cd6a + github.com/jaypipes/pcidb v1.0.1 github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0 github.com/k8snetworkplumbingwg/sriov-network-device-plugin v0.0.0-20221127172732-a5a7395122e3 github.com/k8snetworkplumbingwg/sriovnet v1.2.0 @@ -33,7 +33,7 @@ require ( github.com/prometheus/client_model v0.5.0 github.com/prometheus/common v0.45.0 github.com/safchain/ethtool v0.3.0 - github.com/spf13/cobra v1.7.0 + github.com/spf13/cobra v1.8.0 github.com/stretchr/testify v1.8.4 github.com/vishvananda/netlink v1.2.1-beta.2.0.20240221172127-ec7bcb248e94 github.com/vishvananda/netns v0.0.4 @@ -131,6 +131,7 @@ require ( github.com/robfig/cron v1.2.0 // indirect github.com/rogpeppe/go-internal v1.10.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/samber/lo v1.47.0 // indirect github.com/shopspring/decimal v1.2.0 // indirect github.com/spf13/afero v1.9.4 // indirect github.com/spf13/cast v1.5.0 // indirect @@ -141,16 +142,16 @@ require ( go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect go.uber.org/multierr v1.11.0 // indirect go4.org v0.0.0-20200104003542-c7e774b10ea0 // indirect - golang.org/x/crypto v0.21.0 // indirect + golang.org/x/crypto v0.23.0 // indirect golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect - golang.org/x/mod v0.13.0 // indirect - golang.org/x/net v0.23.0 // indirect + golang.org/x/mod v0.17.0 // indirect + golang.org/x/net v0.25.0 // indirect golang.org/x/oauth2 v0.13.0 // indirect - golang.org/x/sync v0.4.0 // indirect + golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.20.0 // indirect - golang.org/x/term v0.18.0 // indirect - golang.org/x/text v0.14.0 // indirect - golang.org/x/tools v0.14.0 // indirect + golang.org/x/term v0.20.0 // indirect + golang.org/x/text v0.16.0 // indirect + golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/appengine v1.6.8 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19 // indirect diff --git a/go.sum b/go.sum index 6f90a1a94..4d8d4c171 100644 --- a/go.sum +++ b/go.sum @@ -119,7 +119,7 @@ github.com/coreos/vcontext v0.0.0-20190529201340-22b159166068/go.mod h1:E+6hug9b github.com/coreos/vcontext v0.0.0-20191017033345-260217907eb5/go.mod h1:E+6hug9bFSe0KZ2ZAzr8M9F5JlArJjv5D1JS7KSkPKE= github.com/coreos/vcontext v0.0.0-20230201181013-d72178a18687 h1:uSmlDgJGbUB0bwQBcZomBTottKwEDF5fF8UjSwKSzWM= github.com/coreos/vcontext v0.0.0-20230201181013-d72178a18687/go.mod h1:Salmysdw7DAVuobBW/LwsKKgpyCPHUhjyJoMJD+ZJiI= -github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= @@ -147,7 +147,6 @@ github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0X github.com/frankban/quicktest v1.14.4/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32 h1:Mn26/9ZMNWSw9C9ERFA1PUxfmGpolnw2v0bKOREu5ew= github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32/go.mod h1:GIjDIg/heH5DOkXY3YJ/wNhfHsQHoXGjl8G8amsYQ1I= github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= @@ -285,13 +284,12 @@ github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1: github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/jaypipes/ghw v0.9.0 h1:TWF4wNIGtZcgDJaiNcFgby5BR8s2ixcUe0ydxNO2McY= -github.com/jaypipes/ghw v0.9.0/go.mod h1:dXMo19735vXOjpIBDyDYSp31sB2u4hrtRCMxInqQ64k= -github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8= -github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLRCuNDfk= +github.com/jaypipes/ghw v0.13.1-0.20241024164530-c1bfc6e6cd6a h1:orxBMCkYww7RFCk3iCDP9DC3l+yKtp4VdWtctCTyjPQ= +github.com/jaypipes/ghw v0.13.1-0.20241024164530-c1bfc6e6cd6a/go.mod h1:F4UM7Ix55ONYwD3Lck2S4BI+hKezOwtizuJxXDFsioo= +github.com/jaypipes/pcidb v1.0.1 h1:WB2zh27T3nwg8AE8ei81sNRb9yWBii3JGNJtT7K9Oic= +github.com/jaypipes/pcidb v1.0.1/go.mod h1:6xYUz/yYEyOkIkUt2t2J2folIuZ4Yg6uByCGFXMCeE4= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= @@ -336,7 +334,6 @@ github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQth github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= -github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0= @@ -411,6 +408,8 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/safchain/ethtool v0.3.0 h1:gimQJpsI6sc1yIqP/y8GYgiXn/NjgvpM0RNoWLVVmP0= github.com/safchain/ethtool v0.3.0/go.mod h1:SA9BwrgyAqNo7M+uaL6IYbxpm5wk3L7Mm6ocLW+CJUs= +github.com/samber/lo v1.47.0 h1:z7RynLwP5nbyRscyvcD043DWYoOcYRv3mV8lBeqOCLc= +github.com/samber/lo v1.47.0/go.mod h1:RmDH9Ct32Qy3gduHQuKJ3gW1fMHAnE/fAzQuf6He5cU= github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= @@ -423,10 +422,8 @@ github.com/spf13/afero v1.9.4/go.mod h1:iUV7ddyEEZPO5gA3zD4fJt6iStLlL+Lg4m2cihcD github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w= github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU= -github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= -github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= -github.com/spf13/pflag v1.0.2/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/pflag v1.0.6-0.20210604193023-d5e0c0615ace h1:9PNP1jnUjRhfmGMlkXHjYPishpcw4jpSt/V/xYY3FMA= github.com/spf13/pflag v1.0.6-0.20210604193023-d5e0c0615ace/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= @@ -494,8 +491,8 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= -golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -533,8 +530,8 @@ golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY= -golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -570,8 +567,8 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -595,8 +592,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ= -golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -638,7 +635,6 @@ golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -651,8 +647,8 @@ golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9sn golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= -golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -662,8 +658,8 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -721,8 +717,8 @@ golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc= -golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/pkg/host/internal/lib/ghw/ghw.go b/pkg/host/internal/lib/ghw/ghw.go index d518977e4..2a4ba609d 100644 --- a/pkg/host/internal/lib/ghw/ghw.go +++ b/pkg/host/internal/lib/ghw/ghw.go @@ -3,6 +3,7 @@ package ghw import ( "github.com/jaypipes/ghw" "github.com/jaypipes/ghw/pkg/cpu" + "github.com/jaypipes/ghw/pkg/pci" ) func New() GHWLib { @@ -12,23 +13,16 @@ func New() GHWLib { //go:generate ../../../../../bin/mockgen -destination mock/mock_ghw.go -source ghw.go type GHWLib interface { // PCI returns a pointer to an Info that provide methods to access info about devices - PCI() (Info, error) + PCI() (*pci.Info, error) // CPU returns a pointer to an Info that provide methods to access info about devices CPU() (*cpu.Info, error) } -// Info interface provide methods to access info about devices -type Info interface { - // ListDevices returns a list of pointers to Device structs present on the - // host system - ListDevices() []*ghw.PCIDevice -} - type libWrapper struct{} // PCI returns a pointer to an Info that provide methods to access info about devices -func (w *libWrapper) PCI() (Info, error) { +func (w *libWrapper) PCI() (*pci.Info, error) { return ghw.PCI() } diff --git a/pkg/host/internal/lib/ghw/mock/mock_ghw.go b/pkg/host/internal/lib/ghw/mock/mock_ghw.go index 9d6092362..ded8784bf 100644 --- a/pkg/host/internal/lib/ghw/mock/mock_ghw.go +++ b/pkg/host/internal/lib/ghw/mock/mock_ghw.go @@ -8,9 +8,8 @@ import ( reflect "reflect" gomock "github.com/golang/mock/gomock" - ghw "github.com/jaypipes/ghw" cpu "github.com/jaypipes/ghw/pkg/cpu" - ghw0 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/lib/ghw" + pci "github.com/jaypipes/ghw/pkg/pci" ) // MockGHWLib is a mock of GHWLib interface. @@ -52,10 +51,10 @@ func (mr *MockGHWLibMockRecorder) CPU() *gomock.Call { } // PCI mocks base method. -func (m *MockGHWLib) PCI() (ghw0.Info, error) { +func (m *MockGHWLib) PCI() (*pci.Info, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "PCI") - ret0, _ := ret[0].(ghw0.Info) + ret0, _ := ret[0].(*pci.Info) ret1, _ := ret[1].(error) return ret0, ret1 } @@ -65,40 +64,3 @@ func (mr *MockGHWLibMockRecorder) PCI() *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PCI", reflect.TypeOf((*MockGHWLib)(nil).PCI)) } - -// MockInfo is a mock of Info interface. -type MockInfo struct { - ctrl *gomock.Controller - recorder *MockInfoMockRecorder -} - -// MockInfoMockRecorder is the mock recorder for MockInfo. -type MockInfoMockRecorder struct { - mock *MockInfo -} - -// NewMockInfo creates a new mock instance. -func NewMockInfo(ctrl *gomock.Controller) *MockInfo { - mock := &MockInfo{ctrl: ctrl} - mock.recorder = &MockInfoMockRecorder{mock} - return mock -} - -// EXPECT returns an object that allows the caller to indicate expected use. -func (m *MockInfo) EXPECT() *MockInfoMockRecorder { - return m.recorder -} - -// ListDevices mocks base method. -func (m *MockInfo) ListDevices() []*ghw.PCIDevice { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ListDevices") - ret0, _ := ret[0].([]*ghw.PCIDevice) - return ret0 -} - -// ListDevices indicates an expected call of ListDevices. -func (mr *MockInfoMockRecorder) ListDevices() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListDevices", reflect.TypeOf((*MockInfo)(nil).ListDevices)) -} diff --git a/pkg/host/internal/sriov/sriov.go b/pkg/host/internal/sriov/sriov.go index bf9919a7e..3e5989bae 100644 --- a/pkg/host/internal/sriov/sriov.go +++ b/pkg/host/internal/sriov/sriov.go @@ -217,7 +217,7 @@ func (s *sriov) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]sri return nil, fmt.Errorf("DiscoverSriovDevices(): error getting PCI info: %v", err) } - devices := pci.ListDevices() + devices := pci.Devices if len(devices) == 0 { return nil, fmt.Errorf("DiscoverSriovDevices(): could not retrieve PCI devices") } diff --git a/pkg/host/internal/sriov/sriov_test.go b/pkg/host/internal/sriov/sriov_test.go index f30e93773..319bacf54 100644 --- a/pkg/host/internal/sriov/sriov_test.go +++ b/pkg/host/internal/sriov/sriov_test.go @@ -7,7 +7,7 @@ import ( "syscall" "github.com/golang/mock/gomock" - "github.com/jaypipes/ghw" + "github.com/jaypipes/ghw/pkg/pci" "github.com/jaypipes/pcidb" "github.com/vishvananda/netlink" @@ -57,12 +57,7 @@ var _ = Describe("SRIOV", func() { }) Context("DiscoverSriovDevices", func() { - var ( - ghwInfoMock *ghwMockPkg.MockInfo - ) BeforeEach(func() { - ghwInfoMock = ghwMockPkg.NewMockInfo(testCtrl) - ghwLibMock.EXPECT().PCI().Return(ghwInfoMock, nil) origNicMap := sriovnetworkv1.NicIDMap sriovnetworkv1.InitNicIDMapFromList([]string{ "15b3 101d 101e", @@ -73,7 +68,7 @@ var _ = Describe("SRIOV", func() { }) It("discovered", func() { - ghwInfoMock.EXPECT().ListDevices().Return(getTestPCIDevices()) + ghwLibMock.EXPECT().PCI().Return(getTestPCIDevices(), nil) dputilsLibMock.EXPECT().IsSriovVF("0000:d8:00.0").Return(false) dputilsLibMock.EXPECT().IsSriovVF("0000:d8:00.2").Return(true) dputilsLibMock.EXPECT().IsSriovVF("0000:3b:00.0").Return(false) @@ -628,91 +623,94 @@ var _ = Describe("SRIOV", func() { }) }) -func getTestPCIDevices() []*ghw.PCIDevice { - return []*ghw.PCIDevice{{ - Driver: "mlx5_core", - Address: "0000:d8:00.0", - Vendor: &pcidb.Vendor{ - ID: "15b3", - Name: "Mellanox Technologies", - }, - Product: &pcidb.Product{ - ID: "101d", - Name: "MT2892 Family [ConnectX-6 Dx]", - }, - Revision: "0x00", - Subsystem: &pcidb.Product{ - ID: "0083", - Name: "unknown", - }, - Class: &pcidb.Class{ - ID: "02", - Name: "Network controller", - }, - Subclass: &pcidb.Subclass{ - ID: "00", - Name: "Ethernet controller", - }, - ProgrammingInterface: &pcidb.ProgrammingInterface{ - ID: "00", - Name: "unknonw", - }, - }, - { - Driver: "mlx5_core", - Address: "0000:d8:00.2", - Vendor: &pcidb.Vendor{ - ID: "15b3", - Name: "Mellanox Technologies", - }, - Product: &pcidb.Product{ - ID: "101e", - Name: "ConnectX Family mlx5Gen Virtual Function", - }, - Revision: "0x00", - Subsystem: &pcidb.Product{ - ID: "0083", - Name: "unknown", +func getTestPCIDevices() *pci.Info { + return &pci.Info{ + Devices: []*pci.Device{ + { + Driver: "mlx5_core", + Address: "0000:d8:00.0", + Vendor: &pcidb.Vendor{ + ID: "15b3", + Name: "Mellanox Technologies", + }, + Product: &pcidb.Product{ + ID: "101d", + Name: "MT2892 Family [ConnectX-6 Dx]", + }, + Revision: "0x00", + Subsystem: &pcidb.Product{ + ID: "0083", + Name: "unknown", + }, + Class: &pcidb.Class{ + ID: "02", + Name: "Network controller", + }, + Subclass: &pcidb.Subclass{ + ID: "00", + Name: "Ethernet controller", + }, + ProgrammingInterface: &pcidb.ProgrammingInterface{ + ID: "00", + Name: "unknonw", + }, }, - Class: &pcidb.Class{ - ID: "02", - Name: "Network controller", + { + Driver: "mlx5_core", + Address: "0000:d8:00.2", + Vendor: &pcidb.Vendor{ + ID: "15b3", + Name: "Mellanox Technologies", + }, + Product: &pcidb.Product{ + ID: "101e", + Name: "ConnectX Family mlx5Gen Virtual Function", + }, + Revision: "0x00", + Subsystem: &pcidb.Product{ + ID: "0083", + Name: "unknown", + }, + Class: &pcidb.Class{ + ID: "02", + Name: "Network controller", + }, + Subclass: &pcidb.Subclass{ + ID: "00", + Name: "Ethernet controller", + }, + ProgrammingInterface: &pcidb.ProgrammingInterface{ + ID: "00", + Name: "unknonw", + }, }, - Subclass: &pcidb.Subclass{ - ID: "00", - Name: "Ethernet controller", - }, - ProgrammingInterface: &pcidb.ProgrammingInterface{ - ID: "00", - Name: "unknonw", - }, - }, - { - Driver: "mlx5_core", - Address: "0000:3b:00.0", - Vendor: &pcidb.Vendor{ - ID: "15b3", - Name: "Mellanox Technologies", - }, - Product: &pcidb.Product{ - ID: "aaaa", // not supported - Name: "not supported", - }, - Class: &pcidb.Class{ - ID: "02", - Name: "Network controller", - }, - }, - { - Driver: "test", - Address: "0000:d7:16.5", - Vendor: &pcidb.Vendor{ - ID: "8086", - Name: "Intel Corporation", + { + Driver: "mlx5_core", + Address: "0000:3b:00.0", + Vendor: &pcidb.Vendor{ + ID: "15b3", + Name: "Mellanox Technologies", + }, + Product: &pcidb.Product{ + ID: "aaaa", // not supported + Name: "not supported", + }, + Class: &pcidb.Class{ + ID: "02", + Name: "Network controller", + }, }, - Class: &pcidb.Class{ - ID: "11", // not network device - Name: "Signal processing controller", + { + Driver: "test", + Address: "0000:d7:16.5", + Vendor: &pcidb.Vendor{ + ID: "8086", + Name: "Intel Corporation", + }, + Class: &pcidb.Class{ + ID: "11", // not network device + Name: "Signal processing controller", + }, }, }, } diff --git a/pkg/platforms/openstack/openstack.go b/pkg/platforms/openstack/openstack.go index 8968c96be..608ba6f87 100644 --- a/pkg/platforms/openstack/openstack.go +++ b/pkg/platforms/openstack/openstack.go @@ -362,7 +362,7 @@ func (o *openstackContext) CreateOpenstackDevicesInfo() error { return fmt.Errorf("CreateOpenstackDevicesInfo(): error getting PCI info: %v", err) } - devices := pci.ListDevices() + devices := pci.Devices if len(devices) == 0 { return fmt.Errorf("CreateOpenstackDevicesInfo(): could not retrieve PCI devices") } @@ -421,7 +421,7 @@ func (o *openstackContext) DiscoverSriovDevicesVirtual() ([]sriovnetworkv1.Inter return nil, fmt.Errorf("DiscoverSriovDevicesVirtual(): error getting PCI info: %v", err) } - devices := pci.ListDevices() + devices := pci.Devices if len(devices) == 0 { return nil, fmt.Errorf("DiscoverSriovDevicesVirtual(): could not retrieve PCI devices") } From 73c1f81fa81c790246d111b1da99de4c7b17106d Mon Sep 17 00:00:00 2001 From: Ivan Kolodiazhnyi Date: Sat, 23 Mar 2024 18:34:28 +0200 Subject: [PATCH 30/59] RDMA subsystem is implemented via ib_core module config. --- api/v1/sriovnetworknodestate_types.go | 8 ++ api/v1/sriovnetworkpoolconfig_types.go | 4 + api/v1/zz_generated.deepcopy.go | 17 +++ ...k.openshift.io_sriovnetworknodestates.yaml | 18 +++ ....openshift.io_sriovnetworkpoolconfigs.yaml | 6 + controllers/drain_controller.go | 99 +--------------- controllers/helper.go | 109 +++++++++++++++++- .../sriovnetworknodepolicy_controller.go | 7 ++ ...k.openshift.io_sriovnetworknodestates.yaml | 18 +++ ....openshift.io_sriovnetworkpoolconfigs.yaml | 6 + pkg/consts/constants.go | 3 + pkg/daemon/daemon.go | 10 ++ pkg/daemon/writer.go | 7 ++ pkg/helper/mock/mock_helper.go | 29 +++++ .../internal/lib/netlink/mock/mock_netlink.go | 15 +++ pkg/host/internal/lib/netlink/netlink.go | 7 ++ pkg/host/internal/network/network.go | 31 +++++ pkg/host/internal/network/network_test.go | 31 +++++ pkg/host/mock/mock_host.go | 29 +++++ pkg/host/types/interfaces.go | 4 + pkg/utils/cluster.go | 3 +- 21 files changed, 357 insertions(+), 104 deletions(-) diff --git a/api/v1/sriovnetworknodestate_types.go b/api/v1/sriovnetworknodestate_types.go index 4b90d61d2..e5f59d71c 100644 --- a/api/v1/sriovnetworknodestate_types.go +++ b/api/v1/sriovnetworknodestate_types.go @@ -27,6 +27,7 @@ import ( type SriovNetworkNodeStateSpec struct { Interfaces Interfaces `json:"interfaces,omitempty"` Bridges Bridges `json:"bridges,omitempty"` + System System `json:"system,omitempty"` } type Interfaces []Interface @@ -114,10 +115,17 @@ type OVSUplinkConfigExt struct { Interface OVSInterfaceConfig `json:"interface,omitempty"` } +type System struct { + // +kubebuilder:validation:Enum=shared;exclusive + //RDMA subsystem. Allowed value "shared", "exclusive". + RdmaMode string `json:"rdmaMode,omitempty"` +} + // SriovNetworkNodeStateStatus defines the observed state of SriovNetworkNodeState type SriovNetworkNodeStateStatus struct { Interfaces InterfaceExts `json:"interfaces,omitempty"` Bridges Bridges `json:"bridges,omitempty"` + System System `json:"system,omitempty"` SyncStatus string `json:"syncStatus,omitempty"` LastSyncError string `json:"lastSyncError,omitempty"` } diff --git a/api/v1/sriovnetworkpoolconfig_types.go b/api/v1/sriovnetworkpoolconfig_types.go index c6e710a99..011ffc7d9 100644 --- a/api/v1/sriovnetworkpoolconfig_types.go +++ b/api/v1/sriovnetworkpoolconfig_types.go @@ -21,6 +21,10 @@ type SriovNetworkPoolConfigSpec struct { // Drain will respect Pod Disruption Budgets (PDBs) such as etcd quorum guards, // even if maxUnavailable is greater than one. MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"` + + // +kubebuilder:validation:Enum=shared;exclusive + // RDMA subsystem. Allowed value "shared", "exclusive". + RdmaMode string `json:"rdmaMode,omitempty"` } type OvsHardwareOffloadConfig struct { diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index fc9477593..0209c0573 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -783,6 +783,7 @@ func (in *SriovNetworkNodeStateSpec) DeepCopyInto(out *SriovNetworkNodeStateSpec } } in.Bridges.DeepCopyInto(&out.Bridges) + out.System = in.System } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SriovNetworkNodeStateSpec. @@ -806,6 +807,7 @@ func (in *SriovNetworkNodeStateStatus) DeepCopyInto(out *SriovNetworkNodeStateSt } } in.Bridges.DeepCopyInto(&out.Bridges) + out.System = in.System } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SriovNetworkNodeStateStatus. @@ -1066,6 +1068,21 @@ func (in *SriovOperatorConfigStatus) DeepCopy() *SriovOperatorConfigStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *System) DeepCopyInto(out *System) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new System. +func (in *System) DeepCopy() *System { + if in == nil { + return nil + } + out := new(System) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TrunkConfig) DeepCopyInto(out *TrunkConfig) { *out = *in diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml index c5bf230c3..31ddf3bf1 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml @@ -174,6 +174,15 @@ spec: - pciAddress type: object type: array + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object status: description: SriovNetworkNodeStateStatus defines the observed state of @@ -335,6 +344,15 @@ spec: type: string syncStatus: type: string + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object type: object served: true diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml index 2cb2ece31..3d8a6a105 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml @@ -111,6 +111,12 @@ spec: Name is the name of MachineConfigPool to be enabled with OVS hardware offload type: string type: object + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string type: object status: description: SriovNetworkPoolConfigStatus defines the observed state of diff --git a/controllers/drain_controller.go b/controllers/drain_controller.go index 86da909d8..b96458fa7 100644 --- a/controllers/drain_controller.go +++ b/controllers/drain_controller.go @@ -24,11 +24,8 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" ctrl "sigs.k8s.io/controller-runtime" @@ -48,13 +45,6 @@ import ( "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) -var ( - oneNode = intstr.FromInt32(1) - defaultNpcl = &sriovnetworkv1.SriovNetworkPoolConfig{Spec: sriovnetworkv1.SriovNetworkPoolConfigSpec{ - MaxUnavailable: &oneNode, - NodeSelector: &metav1.LabelSelector{}}} -) - type DrainReconcile struct { client.Client Scheme *runtime.Scheme @@ -346,94 +336,7 @@ func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) ( } func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.Node) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) { - logger := log.FromContext(ctx) - logger.Info("findNodePoolConfig():") - // get all the sriov network pool configs - npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{} - err := dr.List(ctx, npcl) - if err != nil { - logger.Error(err, "failed to list sriovNetworkPoolConfig") - return nil, nil, err - } - - selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{} - nodesInPools := map[string]interface{}{} - - for _, npc := range npcl.Items { - // we skip hw offload objects - if npc.Spec.OvsHardwareOffloadConfig.Name != "" { - continue - } - - if npc.Spec.NodeSelector == nil { - npc.Spec.NodeSelector = &metav1.LabelSelector{} - } - - selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector) - if err != nil { - logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector) - return nil, nil, err - } - - if selector.Matches(labels.Set(node.Labels)) { - selectedNpcl = append(selectedNpcl, npc.DeepCopy()) - } - - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector}) - if err != nil { - logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector", - "machineConfigPoolName", npc, - "nodeSelector", npc.Spec.NodeSelector) - return nil, nil, err - } - - for _, nodeName := range nodeList.Items { - nodesInPools[nodeName.Name] = nil - } - } - - if len(selectedNpcl) > 1 { - // don't allow the node to be part of multiple pools - err = fmt.Errorf("node is part of more then one pool") - logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl) - return nil, nil, err - } else if len(selectedNpcl) == 1 { - // found one pool for our node - logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0]) - selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector) - if err != nil { - logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector) - return nil, nil, err - } - - // list all the nodes that are also part of this pool and return them - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector}) - if err != nil { - logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector) - return nil, nil, err - } - - return selectedNpcl[0], nodeList.Items, nil - } else { - // in this case we get all the nodes and remove the ones that already part of any pool - logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultNpcl) - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList) - if err != nil { - logger.Error(err, "failed to list all the nodes") - return nil, nil, err - } - - defaultNodeLists := []corev1.Node{} - for _, nodeObj := range nodeList.Items { - if _, exist := nodesInPools[nodeObj.Name]; !exist { - defaultNodeLists = append(defaultNodeLists, nodeObj) - } - } - return defaultNpcl, defaultNodeLists, nil - } + return findNodePoolConfig(ctx, node, dr.Client) } // SetupWithManager sets up the controller with the Manager. diff --git a/controllers/helper.go b/controllers/helper.go index 9ff735473..b90ad44f8 100644 --- a/controllers/helper.go +++ b/controllers/helper.go @@ -30,9 +30,12 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" uns "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" kscheme "k8s.io/client-go/kubernetes/scheme" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -47,10 +50,17 @@ import ( "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) -var webhooks = map[string](string){ - constants.InjectorWebHookName: constants.InjectorWebHookPath, - constants.OperatorWebHookName: constants.OperatorWebHookPath, -} +var ( + webhooks = map[string](string){ + constants.InjectorWebHookName: constants.InjectorWebHookPath, + constants.OperatorWebHookName: constants.OperatorWebHookPath, + } + oneNode = intstr.FromInt32(1) + defaultPoolConfig = &sriovnetworkv1.SriovNetworkPoolConfig{Spec: sriovnetworkv1.SriovNetworkPoolConfigSpec{ + MaxUnavailable: &oneNode, + NodeSelector: &metav1.LabelSelector{}, + RdmaMode: ""}} +) const ( clusterRoleResourceName = "ClusterRole" @@ -397,3 +407,94 @@ func updateDaemonsetNodeSelector(obj *uns.Unstructured, nodeSelector map[string] } return nil } + +func findNodePoolConfig(ctx context.Context, node *corev1.Node, c k8sclient.Client) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) { + logger := log.FromContext(ctx) + logger.Info("FindNodePoolConfig():") + // get all the sriov network pool configs + npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{} + err := c.List(ctx, npcl) + if err != nil { + logger.Error(err, "failed to list sriovNetworkPoolConfig") + return nil, nil, err + } + + selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{} + nodesInPools := map[string]interface{}{} + + for _, npc := range npcl.Items { + // we skip hw offload objects + if npc.Spec.OvsHardwareOffloadConfig.Name != "" { + continue + } + + if npc.Spec.NodeSelector == nil { + npc.Spec.NodeSelector = &metav1.LabelSelector{} + } + + selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector) + if err != nil { + logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector) + return nil, nil, err + } + + if selector.Matches(labels.Set(node.Labels)) { + selectedNpcl = append(selectedNpcl, npc.DeepCopy()) + } + + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList, &k8sclient.ListOptions{LabelSelector: selector}) + if err != nil { + logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector", + "machineConfigPoolName", npc, + "nodeSelector", npc.Spec.NodeSelector) + return nil, nil, err + } + + for _, nodeName := range nodeList.Items { + nodesInPools[nodeName.Name] = nil + } + } + + if len(selectedNpcl) > 1 { + // don't allow the node to be part of multiple pools + err = fmt.Errorf("node is part of more then one pool") + logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl) + return nil, nil, err + } else if len(selectedNpcl) == 1 { + // found one pool for our node + logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0]) + selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector) + if err != nil { + logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector) + return nil, nil, err + } + + // list all the nodes that are also part of this pool and return them + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList, &k8sclient.ListOptions{LabelSelector: selector}) + if err != nil { + logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector) + return nil, nil, err + } + + return selectedNpcl[0], nodeList.Items, nil + } else { + // in this case we get all the nodes and remove the ones that already part of any pool + logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultPoolConfig) + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList) + if err != nil { + logger.Error(err, "failed to list all the nodes") + return nil, nil, err + } + + defaultNodeLists := []corev1.Node{} + for _, nodeObj := range nodeList.Items { + if _, exist := nodesInPools[nodeObj.Name]; !exist { + defaultNodeLists = append(defaultNodeLists, nodeObj) + } + } + return defaultPoolConfig, defaultNodeLists, nil + } +} diff --git a/controllers/sriovnetworknodepolicy_controller.go b/controllers/sriovnetworknodepolicy_controller.go index be46880b7..1d2811fac 100644 --- a/controllers/sriovnetworknodepolicy_controller.go +++ b/controllers/sriovnetworknodepolicy_controller.go @@ -272,6 +272,13 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con ns.Name = node.Name ns.Namespace = vars.Namespace j, _ := json.Marshal(ns) + netPoolConfig, _, err := findNodePoolConfig(ctx, &node, r.Client) + if err != nil { + log.Log.Error(err, "nodeStateSyncHandler(): failed to get SriovNetworkPoolConfig for the current node") + } + if netPoolConfig != nil { + ns.Spec.System.RdmaMode = netPoolConfig.Spec.RdmaMode + } logger.V(2).Info("SriovNetworkNodeState CR", "content", j) if err := r.syncSriovNetworkNodeState(ctx, dc, npl, ns, &node); err != nil { logger.Error(err, "Fail to sync", "SriovNetworkNodeState", ns.Name) diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml index c5bf230c3..31ddf3bf1 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml @@ -174,6 +174,15 @@ spec: - pciAddress type: object type: array + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object status: description: SriovNetworkNodeStateStatus defines the observed state of @@ -335,6 +344,15 @@ spec: type: string syncStatus: type: string + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object type: object served: true diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml index 2cb2ece31..3d8a6a105 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml @@ -111,6 +111,12 @@ spec: Name is the name of MachineConfigPool to be enabled with OVS hardware offload type: string type: object + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string type: object status: description: SriovNetworkPoolConfigStatus defines the observed state of diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index f7025c90d..66a5ad2b5 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -54,6 +54,9 @@ const ( VdpaTypeVirtio = "virtio" VdpaTypeVhost = "vhost" + RdmaSubsystemModeShared = "shared" + RdmaSubsystemModeExclusive = "exclusive" + ClusterTypeOpenshift = "openshift" ClusterTypeKubernetes = "kubernetes" diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index ff7f326dc..0867685dc 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -429,6 +429,16 @@ func (dn *Daemon) nodeStateSyncHandler() error { reqReboot = reqReboot || r } + if dn.currentNodeState.Status.System.RdmaMode != dn.desiredNodeState.Spec.System.RdmaMode { + err = dn.HostHelpers.SetRDMASubsystem(dn.desiredNodeState.Spec.System.RdmaMode) + if err != nil { + log.Log.Error(err, "nodeStateSyncHandler(): failed to set RDMA subsystem") + return err + } + reqReboot = true + reqDrain = true + } + // When running using systemd check if the applied configuration is the latest one // or there is a new config we need to apply // When using systemd configuration we write the file diff --git a/pkg/daemon/writer.go b/pkg/daemon/writer.go index 09d06d8f9..60d4e8d91 100644 --- a/pkg/daemon/writer.go +++ b/pkg/daemon/writer.go @@ -118,6 +118,7 @@ func (w *NodeStateStatusWriter) pollNicStatus() error { log.Log.V(2).Info("pollNicStatus()") var iface []sriovnetworkv1.InterfaceExt var bridges sriovnetworkv1.Bridges + var rdmaMode string var err error if vars.PlatformType == consts.VirtualOpenStack { @@ -138,8 +139,14 @@ func (w *NodeStateStatusWriter) pollNicStatus() error { } } + rdmaMode, err = w.hostHelper.DiscoverRDMASubsystem() + if err != nil { + return err + } + w.status.Interfaces = iface w.status.Bridges = bridges + w.status.System.RdmaMode = rdmaMode return nil } diff --git a/pkg/helper/mock/mock_helper.go b/pkg/helper/mock/mock_helper.go index 432d741be..b413ecdee 100644 --- a/pkg/helper/mock/mock_helper.go +++ b/pkg/helper/mock/mock_helper.go @@ -294,6 +294,21 @@ func (mr *MockHostHelpersInterfaceMockRecorder) DiscoverBridges() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverBridges", reflect.TypeOf((*MockHostHelpersInterface)(nil).DiscoverBridges)) } +// DiscoverRDMASubsystem mocks base method. +func (m *MockHostHelpersInterface) DiscoverRDMASubsystem() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DiscoverRDMASubsystem") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DiscoverRDMASubsystem indicates an expected call of DiscoverRDMASubsystem. +func (mr *MockHostHelpersInterfaceMockRecorder) DiscoverRDMASubsystem() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverRDMASubsystem", reflect.TypeOf((*MockHostHelpersInterface)(nil).DiscoverRDMASubsystem)) +} + // DiscoverSriovDevices mocks base method. func (m *MockHostHelpersInterface) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]v1.InterfaceExt, error) { m.ctrl.T.Helper() @@ -1044,6 +1059,20 @@ func (mr *MockHostHelpersInterfaceMockRecorder) SetNicSriovMode(pciAddr, mode in return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetNicSriovMode", reflect.TypeOf((*MockHostHelpersInterface)(nil).SetNicSriovMode), pciAddr, mode) } +// SetRDMASubsystem mocks base method. +func (m *MockHostHelpersInterface) SetRDMASubsystem(mode string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SetRDMASubsystem", mode) + ret0, _ := ret[0].(error) + return ret0 +} + +// SetRDMASubsystem indicates an expected call of SetRDMASubsystem. +func (mr *MockHostHelpersInterfaceMockRecorder) SetRDMASubsystem(mode interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetRDMASubsystem", reflect.TypeOf((*MockHostHelpersInterface)(nil).SetRDMASubsystem), mode) +} + // SetSriovNumVfs mocks base method. func (m *MockHostHelpersInterface) SetSriovNumVfs(pciAddr string, numVfs int) error { m.ctrl.T.Helper() diff --git a/pkg/host/internal/lib/netlink/mock/mock_netlink.go b/pkg/host/internal/lib/netlink/mock/mock_netlink.go index 5b3bcc790..758346a3f 100644 --- a/pkg/host/internal/lib/netlink/mock/mock_netlink.go +++ b/pkg/host/internal/lib/netlink/mock/mock_netlink.go @@ -145,6 +145,21 @@ func (mr *MockNetlinkLibMockRecorder) DevlinkSetDeviceParam(bus, device, param, return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DevlinkSetDeviceParam", reflect.TypeOf((*MockNetlinkLib)(nil).DevlinkSetDeviceParam), bus, device, param, cmode, value) } +// DiscoverRDMASubsystem mocks base method. +func (m *MockNetlinkLib) DiscoverRDMASubsystem() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DiscoverRDMASubsystem") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DiscoverRDMASubsystem indicates an expected call of DiscoverRDMASubsystem. +func (mr *MockNetlinkLibMockRecorder) DiscoverRDMASubsystem() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverRDMASubsystem", reflect.TypeOf((*MockNetlinkLib)(nil).DiscoverRDMASubsystem)) +} + // IsLinkAdminStateUp mocks base method. func (m *MockNetlinkLib) IsLinkAdminStateUp(link netlink.Link) bool { m.ctrl.T.Helper() diff --git a/pkg/host/internal/lib/netlink/netlink.go b/pkg/host/internal/lib/netlink/netlink.go index ed063834e..7d857921d 100644 --- a/pkg/host/internal/lib/netlink/netlink.go +++ b/pkg/host/internal/lib/netlink/netlink.go @@ -68,6 +68,8 @@ type NetlinkLib interface { RdmaLinkByName(name string) (*netlink.RdmaLink, error) // IsLinkAdminStateUp checks if the admin state of a link is up IsLinkAdminStateUp(link Link) bool + // DiscoverRDMASubsystem returns RDMA subsystem mode + DiscoverRDMASubsystem() (string, error) } type libWrapper struct{} @@ -185,3 +187,8 @@ func (w *libWrapper) RdmaLinkByName(name string) (*netlink.RdmaLink, error) { func (w *libWrapper) IsLinkAdminStateUp(link Link) bool { return link.Attrs().Flags&net.FlagUp == 1 } + +// DiscoverRDMASubsystem returns RDMA subsystem mode +func (w *libWrapper) DiscoverRDMASubsystem() (string, error) { + return netlink.RdmaSystemGetNetnsMode() +} diff --git a/pkg/host/internal/network/network.go b/pkg/host/internal/network/network.go index ef85ad24a..940c4b248 100644 --- a/pkg/host/internal/network/network.go +++ b/pkg/host/internal/network/network.go @@ -429,3 +429,34 @@ func (n *network) GetPciAddressFromInterfaceName(interfaceName string) (string, log.Log.V(2).Info("GetPciAddressFromInterfaceName(): result", "interface", interfaceName, "pci address", pciAddress) return pciAddress, nil } + +func (n *network) DiscoverRDMASubsystem() (string, error) { + log.Log.Info("DiscoverRDMASubsystem(): retrieving RDMA subsystem mode") + subsystem, err := n.netlinkLib.DiscoverRDMASubsystem() + + if err != nil { + log.Log.Error(err, "DiscoverRDMASubsystem(): failed to get RDMA subsystem mode") + return "", err + } + + return subsystem, nil +} + +func (n *network) SetRDMASubsystem(mode string) error { + log.Log.Info("SetRDMASubsystem(): Updating RDMA subsystem mode") + + modeValue := 1 + if mode == "exclusive" { + modeValue = 0 + } + config := fmt.Sprintf("options ib_core netns_mode=%d\n", modeValue) + path := filepath.Join(vars.FilesystemRoot, consts.Host, "etc", "modprobe.d", "ib_core.conf") + err := os.WriteFile(path, []byte(config), 0644) + + if err != nil { + log.Log.Error(err, "SetRDMASubsystem(): failed to write ib_core config") + return fmt.Errorf("failed to write ib_core config: %v", err) + } + + return nil +} diff --git a/pkg/host/internal/network/network_test.go b/pkg/host/internal/network/network_test.go index 19eb3f438..51c56b875 100644 --- a/pkg/host/internal/network/network_test.go +++ b/pkg/host/internal/network/network_test.go @@ -283,4 +283,35 @@ var _ = Describe("Network", func() { Expect(pci).To(Equal("0000:3b:00.0")) }) }) + Context("DiscoverRDMASubsystem", func() { + It("Should get RDMA Subsystem using netlink", func() { + netlinkLibMock.EXPECT().DiscoverRDMASubsystem().Return("shared", nil) + + pci, err := n.DiscoverRDMASubsystem() + Expect(err).NotTo(HaveOccurred()) + Expect(pci).To(Equal("shared")) + }) + }) + Context("SetRDMASubsystem", func() { + It("Should set RDMA Subsystem shared mode", func() { + helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{ + Dirs: []string{"/host/etc/modprobe.d"}, + Files: map[string][]byte{ + "/host/etc/modprobe.d/ib_core.conf": {}, + }, + }) + Expect(n.SetRDMASubsystem("shared")).NotTo(HaveOccurred()) + helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/ib_core.conf", "options ib_core netns_mode=1\n") + }) + It("Should set RDMA Subsystem exclusive mode", func() { + helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{ + Dirs: []string{"/host/etc/modprobe.d"}, + Files: map[string][]byte{ + "/host/etc/modprobe.d/ib_core.conf": {}, + }, + }) + Expect(n.SetRDMASubsystem("exclusive")).NotTo(HaveOccurred()) + helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/ib_core.conf", "options ib_core netns_mode=0\n") + }) + }) }) diff --git a/pkg/host/mock/mock_host.go b/pkg/host/mock/mock_host.go index 5ebed46aa..095d270a9 100644 --- a/pkg/host/mock/mock_host.go +++ b/pkg/host/mock/mock_host.go @@ -264,6 +264,21 @@ func (mr *MockHostManagerInterfaceMockRecorder) DiscoverBridges() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverBridges", reflect.TypeOf((*MockHostManagerInterface)(nil).DiscoverBridges)) } +// DiscoverRDMASubsystem mocks base method. +func (m *MockHostManagerInterface) DiscoverRDMASubsystem() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DiscoverRDMASubsystem") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DiscoverRDMASubsystem indicates an expected call of DiscoverRDMASubsystem. +func (mr *MockHostManagerInterfaceMockRecorder) DiscoverRDMASubsystem() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverRDMASubsystem", reflect.TypeOf((*MockHostManagerInterface)(nil).DiscoverRDMASubsystem)) +} + // DiscoverSriovDevices mocks base method. func (m *MockHostManagerInterface) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]v1.InterfaceExt, error) { m.ctrl.T.Helper() @@ -859,6 +874,20 @@ func (mr *MockHostManagerInterfaceMockRecorder) SetNicSriovMode(pciAddr, mode in return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetNicSriovMode", reflect.TypeOf((*MockHostManagerInterface)(nil).SetNicSriovMode), pciAddr, mode) } +// SetRDMASubsystem mocks base method. +func (m *MockHostManagerInterface) SetRDMASubsystem(mode string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SetRDMASubsystem", mode) + ret0, _ := ret[0].(error) + return ret0 +} + +// SetRDMASubsystem indicates an expected call of SetRDMASubsystem. +func (mr *MockHostManagerInterfaceMockRecorder) SetRDMASubsystem(mode interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetRDMASubsystem", reflect.TypeOf((*MockHostManagerInterface)(nil).SetRDMASubsystem), mode) +} + // SetSriovNumVfs mocks base method. func (m *MockHostManagerInterface) SetSriovNumVfs(pciAddr string, numVfs int) error { m.ctrl.T.Helper() diff --git a/pkg/host/types/interfaces.go b/pkg/host/types/interfaces.go index c6e0c8faf..6844ee5ae 100644 --- a/pkg/host/types/interfaces.go +++ b/pkg/host/types/interfaces.go @@ -90,6 +90,10 @@ type NetworkInterface interface { GetNetDevLinkAdminState(ifaceName string) string // GetPciAddressFromInterfaceName parses sysfs to get pci address of an interface by name GetPciAddressFromInterfaceName(interfaceName string) (string, error) + // DiscoverRDMASubsystem returns RDMA subsystem mode + DiscoverRDMASubsystem() (string, error) + // SetRDMASubsystem changes RDMA subsystem mode + SetRDMASubsystem(mode string) error } type ServiceInterface interface { diff --git a/pkg/utils/cluster.go b/pkg/utils/cluster.go index 6f8d72e07..c5f1f333a 100644 --- a/pkg/utils/cluster.go +++ b/pkg/utils/cluster.go @@ -5,13 +5,12 @@ import ( "fmt" "os" - "sigs.k8s.io/controller-runtime/pkg/log" - configv1 "github.com/openshift/api/config/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" ) From 02c6b009c3c4b0bf0c1345ebc2a16bb490e68000 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Mon, 28 Oct 2024 15:28:41 +0200 Subject: [PATCH 31/59] Add kernel args for rdma mode to complement the modprobe file Signed-off-by: Sebastian Sch --- Makefile | 2 +- bindata/scripts/enable-kargs.sh | 33 -- bindata/scripts/kargs.sh | 55 +++ .../sriovnetworknodepolicy_controller.go | 13 +- go.mod | 2 +- pkg/consts/constants.go | 2 + pkg/daemon/daemon.go | 17 +- pkg/daemon/plugin_test.go | 8 + pkg/daemon/writer.go | 1 + .../internal/lib/netlink/mock/mock_netlink.go | 30 +- pkg/host/internal/lib/netlink/netlink.go | 8 +- pkg/host/internal/network/network.go | 24 +- pkg/host/internal/network/network_test.go | 10 +- pkg/plugins/generic/generic_plugin.go | 200 +++++----- pkg/plugins/generic/generic_plugin_test.go | 74 +++- test/conformance/tests/test_networkpool.go | 345 ++++++++++++++++++ .../{enable-kargs_test.sh => kargs_test.sh} | 29 +- test/scripts/rpm-ostree_mock | 6 + 18 files changed, 667 insertions(+), 192 deletions(-) delete mode 100755 bindata/scripts/enable-kargs.sh create mode 100755 bindata/scripts/kargs.sh create mode 100644 test/conformance/tests/test_networkpool.go rename test/scripts/{enable-kargs_test.sh => kargs_test.sh} (61%) diff --git a/Makefile b/Makefile index 310f1dc52..f5ca7edc8 100644 --- a/Makefile +++ b/Makefile @@ -226,7 +226,7 @@ test-e2e-k8s: export NAMESPACE=sriov-network-operator test-e2e-k8s: test-e2e test-bindata-scripts: fakechroot - fakechroot ./test/scripts/enable-kargs_test.sh + fakechroot ./test/scripts/kargs_test.sh test-%: generate manifests envtest KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir=/tmp -p path)" HOME="$(shell pwd)" go test ./$*/... -coverprofile cover-$*.out -coverpkg ./... -v diff --git a/bindata/scripts/enable-kargs.sh b/bindata/scripts/enable-kargs.sh deleted file mode 100755 index 0dc18c784..000000000 --- a/bindata/scripts/enable-kargs.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -set -x - -declare -a kargs=( "$@" ) -ret=0 -args=$(chroot /host/ cat /proc/cmdline) - -if chroot /host/ test -f /run/ostree-booted ; then - for t in "${kargs[@]}";do - if [[ $args != *${t}* ]];then - if chroot /host/ rpm-ostree kargs | grep -vq ${t}; then - chroot /host/ rpm-ostree kargs --append ${t} > /dev/null 2>&1 - fi - let ret++ - fi - done -else - chroot /host/ which grubby > /dev/null 2>&1 - # if grubby is not there, let's tell it - if [ $? -ne 0 ]; then - exit 127 - fi - for t in "${kargs[@]}";do - if [[ $args != *${t}* ]];then - if chroot /host/ grubby --info=DEFAULT | grep args | grep -vq ${t}; then - chroot /host/ grubby --update-kernel=DEFAULT --args=${t} > /dev/null 2>&1 - fi - let ret++ - fi - done -fi - -echo $ret diff --git a/bindata/scripts/kargs.sh b/bindata/scripts/kargs.sh new file mode 100755 index 000000000..8d118456e --- /dev/null +++ b/bindata/scripts/kargs.sh @@ -0,0 +1,55 @@ +#!/bin/bash +set -x + +command=$1 +shift +declare -a kargs=( "$@" ) +ret=0 +args=$(chroot /host/ cat /proc/cmdline) + +if chroot /host/ test -f /run/ostree-booted ; then + for t in "${kargs[@]}";do + if [[ $command == "add" ]];then + if [[ $args != *${t}* ]];then + if chroot /host/ rpm-ostree kargs | grep -vq ${t}; then + chroot /host/ rpm-ostree kargs --append ${t} > /dev/null 2>&1 + fi + let ret++ + fi + fi + if [[ $command == "remove" ]];then + if [[ $args == *${t}* ]];then + if chroot /host/ rpm-ostree kargs | grep -q ${t}; then + chroot /host/ rpm-ostree kargs --delete ${t} > /dev/null 2>&1 + fi + let ret++ + fi + fi + done +else + chroot /host/ which grubby > /dev/null 2>&1 + # if grubby is not there, let's tell it + if [ $? -ne 0 ]; then + exit 127 + fi + for t in "${kargs[@]}";do + if [[ $command == "add" ]];then + if [[ $args != *${t}* ]];then + if chroot /host/ grubby --info=DEFAULT | grep args | grep -vq ${t}; then + chroot /host/ grubby --update-kernel=DEFAULT --args=${t} > /dev/null 2>&1 + fi + let ret++ + fi + fi + if [[ $command == "remove" ]];then + if [[ $args == *${t}* ]];then + if chroot /host/ grubby --info=DEFAULT | grep args | grep -q ${t}; then + chroot /host/ grubby --update-kernel=DEFAULT --remove-args=${t} > /dev/null 2>&1 + fi + let ret++ + fi + fi + done +fi + +echo $ret diff --git a/controllers/sriovnetworknodepolicy_controller.go b/controllers/sriovnetworknodepolicy_controller.go index 1d2811fac..62218436f 100644 --- a/controllers/sriovnetworknodepolicy_controller.go +++ b/controllers/sriovnetworknodepolicy_controller.go @@ -155,22 +155,22 @@ func (r *SriovNetworkNodePolicyReconciler) SetupWithManager(mgr ctrl.Manager) er delayedEventHandler := handler.Funcs{ CreateFunc: func(ctx context.Context, e event.CreateEvent, q workqueue.RateLimitingInterface) { log.Log.WithName("SriovNetworkNodePolicy"). - Info("Enqueuing sync for create event", "resource", e.Object.GetName()) + Info("Enqueuing sync for create event", "resource", e.Object.GetName(), "type", e.Object.GetObjectKind().GroupVersionKind().String()) qHandler(q) }, UpdateFunc: func(ctx context.Context, e event.UpdateEvent, q workqueue.RateLimitingInterface) { log.Log.WithName("SriovNetworkNodePolicy"). - Info("Enqueuing sync for update event", "resource", e.ObjectNew.GetName()) + Info("Enqueuing sync for update event", "resource", e.ObjectNew.GetName(), "type", e.ObjectNew.GetObjectKind().GroupVersionKind().String()) qHandler(q) }, DeleteFunc: func(ctx context.Context, e event.DeleteEvent, q workqueue.RateLimitingInterface) { log.Log.WithName("SriovNetworkNodePolicy"). - Info("Enqueuing sync for delete event", "resource", e.Object.GetName()) + Info("Enqueuing sync for delete event", "resource", e.Object.GetName(), "type", e.Object.GetObjectKind().GroupVersionKind().String()) qHandler(q) }, GenericFunc: func(ctx context.Context, e event.GenericEvent, q workqueue.RateLimitingInterface) { log.Log.WithName("SriovNetworkNodePolicy"). - Info("Enqueuing sync for generic event", "resource", e.Object.GetName()) + Info("Enqueuing sync for generic event", "resource", e.Object.GetName(), "type", e.Object.GetObjectKind().GroupVersionKind().String()) qHandler(q) }, } @@ -199,6 +199,7 @@ func (r *SriovNetworkNodePolicyReconciler) SetupWithManager(mgr ctrl.Manager) er For(&sriovnetworkv1.SriovNetworkNodePolicy{}). Watches(&corev1.Node{}, nodeEvenHandler). Watches(&sriovnetworkv1.SriovNetworkNodePolicy{}, delayedEventHandler). + Watches(&sriovnetworkv1.SriovNetworkPoolConfig{}, delayedEventHandler). WatchesRawSource(&source.Channel{Source: eventChan}, delayedEventHandler). Complete(r) } @@ -271,14 +272,14 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con ns := &sriovnetworkv1.SriovNetworkNodeState{} ns.Name = node.Name ns.Namespace = vars.Namespace - j, _ := json.Marshal(ns) netPoolConfig, _, err := findNodePoolConfig(ctx, &node, r.Client) if err != nil { - log.Log.Error(err, "nodeStateSyncHandler(): failed to get SriovNetworkPoolConfig for the current node") + logger.Error(err, "failed to get SriovNetworkPoolConfig for the current node") } if netPoolConfig != nil { ns.Spec.System.RdmaMode = netPoolConfig.Spec.RdmaMode } + j, _ := json.Marshal(ns) logger.V(2).Info("SriovNetworkNodeState CR", "content", j) if err := r.syncSriovNetworkNodeState(ctx, dc, npl, ns, &node); err != nil { logger.Error(err, "Fail to sync", "SriovNetworkNodeState", ns.Name) diff --git a/go.mod b/go.mod index 350dbb82d..31d70d572 100644 --- a/go.mod +++ b/go.mod @@ -38,6 +38,7 @@ require ( github.com/vishvananda/netlink v1.2.1-beta.2.0.20240221172127-ec7bcb248e94 github.com/vishvananda/netns v0.0.4 go.uber.org/zap v1.25.0 + golang.org/x/net v0.25.0 golang.org/x/time v0.3.0 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c gopkg.in/yaml.v3 v3.0.1 @@ -145,7 +146,6 @@ require ( golang.org/x/crypto v0.23.0 // indirect golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect golang.org/x/mod v0.17.0 // indirect - golang.org/x/net v0.25.0 // indirect golang.org/x/oauth2 v0.13.0 // indirect golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.20.0 // indirect diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index 66a5ad2b5..ba1830f5b 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -128,6 +128,8 @@ const ( KernelArgIntelIommu = "intel_iommu=on" KernelArgIommuPt = "iommu=pt" KernelArgIommuPassthrough = "iommu.passthrough=1" + KernelArgRdmaShared = "ib_core.netns_mode=1" + KernelArgRdmaExclusive = "ib_core.netns_mode=0" // Feature gates // ParallelNicConfigFeatureGate: allow to configure nics in parallel diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 0867685dc..53fe82b8b 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "math/rand" - "os/exec" "reflect" "sync" "time" @@ -429,16 +428,6 @@ func (dn *Daemon) nodeStateSyncHandler() error { reqReboot = reqReboot || r } - if dn.currentNodeState.Status.System.RdmaMode != dn.desiredNodeState.Spec.System.RdmaMode { - err = dn.HostHelpers.SetRDMASubsystem(dn.desiredNodeState.Spec.System.RdmaMode) - if err != nil { - log.Log.Error(err, "nodeStateSyncHandler(): failed to set RDMA subsystem") - return err - } - reqReboot = true - reqDrain = true - } - // When running using systemd check if the applied configuration is the latest one // or there is a new config we need to apply // When using systemd configuration we write the file @@ -761,11 +750,11 @@ func (dn *Daemon) rebootNode() { // However note we use `;` instead of `&&` so we keep rebooting even // if kubelet failed to shutdown - that way the machine will still eventually reboot // as systemd will time out the stop invocation. - cmd := exec.Command("systemd-run", "--unit", "sriov-network-config-daemon-reboot", + stdOut, StdErr, err := dn.HostHelpers.RunCommand("systemd-run", "--unit", "sriov-network-config-daemon-reboot", "--description", "sriov-network-config-daemon reboot node", "/bin/sh", "-c", "systemctl stop kubelet.service; reboot") - if err := cmd.Run(); err != nil { - log.Log.Error(err, "failed to reboot node") + if err != nil { + log.Log.Error(err, "failed to reboot node", "stdOut", stdOut, "StdErr", StdErr) } } diff --git a/pkg/daemon/plugin_test.go b/pkg/daemon/plugin_test.go index a13fc1f8b..7b14a4504 100644 --- a/pkg/daemon/plugin_test.go +++ b/pkg/daemon/plugin_test.go @@ -41,6 +41,14 @@ var _ = Describe("config daemon plugin loading tests", func() { vars.ClusterType = consts.ClusterTypeKubernetes gmockController = gomock.NewController(GinkgoT()) helperMock = helperMocks.NewMockHostHelpersInterface(gmockController) + helperMock.EXPECT().GetCurrentKernelArgs().Return("", nil).AnyTimes() + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgPciRealloc).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaExclusive).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaShared).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPassthrough).Return(false) + // k8s plugin is ATM the only plugin which require mocking/faking, as its New method performs additional logic // other than simple plugin struct initialization K8sPlugin = func(_ helper.HostHelpersInterface) (plugin.VendorPlugin, error) { diff --git a/pkg/daemon/writer.go b/pkg/daemon/writer.go index 60d4e8d91..42eeb2928 100644 --- a/pkg/daemon/writer.go +++ b/pkg/daemon/writer.go @@ -189,6 +189,7 @@ func (w *NodeStateStatusWriter) setNodeStateStatus(msg Message) (*sriovnetworkv1 nodeState, err := w.updateNodeStateStatusRetry(func(nodeState *sriovnetworkv1.SriovNetworkNodeState) { nodeState.Status.Interfaces = w.status.Interfaces nodeState.Status.Bridges = w.status.Bridges + nodeState.Status.System = w.status.System if msg.lastSyncError != "" || msg.syncStatus == consts.SyncStatusSucceeded { // clear lastSyncError when sync Succeeded nodeState.Status.LastSyncError = msg.lastSyncError diff --git a/pkg/host/internal/lib/netlink/mock/mock_netlink.go b/pkg/host/internal/lib/netlink/mock/mock_netlink.go index 758346a3f..ec136bf29 100644 --- a/pkg/host/internal/lib/netlink/mock/mock_netlink.go +++ b/pkg/host/internal/lib/netlink/mock/mock_netlink.go @@ -145,21 +145,6 @@ func (mr *MockNetlinkLibMockRecorder) DevlinkSetDeviceParam(bus, device, param, return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DevlinkSetDeviceParam", reflect.TypeOf((*MockNetlinkLib)(nil).DevlinkSetDeviceParam), bus, device, param, cmode, value) } -// DiscoverRDMASubsystem mocks base method. -func (m *MockNetlinkLib) DiscoverRDMASubsystem() (string, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "DiscoverRDMASubsystem") - ret0, _ := ret[0].(string) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// DiscoverRDMASubsystem indicates an expected call of DiscoverRDMASubsystem. -func (mr *MockNetlinkLibMockRecorder) DiscoverRDMASubsystem() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverRDMASubsystem", reflect.TypeOf((*MockNetlinkLib)(nil).DiscoverRDMASubsystem)) -} - // IsLinkAdminStateUp mocks base method. func (m *MockNetlinkLib) IsLinkAdminStateUp(link netlink.Link) bool { m.ctrl.T.Helper() @@ -304,6 +289,21 @@ func (mr *MockNetlinkLibMockRecorder) RdmaLinkByName(name interface{}) *gomock.C return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RdmaLinkByName", reflect.TypeOf((*MockNetlinkLib)(nil).RdmaLinkByName), name) } +// RdmaSystemGetNetnsMode mocks base method. +func (m *MockNetlinkLib) RdmaSystemGetNetnsMode() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RdmaSystemGetNetnsMode") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// RdmaSystemGetNetnsMode indicates an expected call of RdmaSystemGetNetnsMode. +func (mr *MockNetlinkLibMockRecorder) RdmaSystemGetNetnsMode() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RdmaSystemGetNetnsMode", reflect.TypeOf((*MockNetlinkLib)(nil).RdmaSystemGetNetnsMode)) +} + // VDPADelDev mocks base method. func (m *MockNetlinkLib) VDPADelDev(name string) error { m.ctrl.T.Helper() diff --git a/pkg/host/internal/lib/netlink/netlink.go b/pkg/host/internal/lib/netlink/netlink.go index 7d857921d..ad6056710 100644 --- a/pkg/host/internal/lib/netlink/netlink.go +++ b/pkg/host/internal/lib/netlink/netlink.go @@ -68,8 +68,8 @@ type NetlinkLib interface { RdmaLinkByName(name string) (*netlink.RdmaLink, error) // IsLinkAdminStateUp checks if the admin state of a link is up IsLinkAdminStateUp(link Link) bool - // DiscoverRDMASubsystem returns RDMA subsystem mode - DiscoverRDMASubsystem() (string, error) + // RdmaSystemGetNetnsMode returns RDMA subsystem mode + RdmaSystemGetNetnsMode() (string, error) } type libWrapper struct{} @@ -188,7 +188,7 @@ func (w *libWrapper) IsLinkAdminStateUp(link Link) bool { return link.Attrs().Flags&net.FlagUp == 1 } -// DiscoverRDMASubsystem returns RDMA subsystem mode -func (w *libWrapper) DiscoverRDMASubsystem() (string, error) { +// RdmaSystemGetNetnsMode returns RDMA subsystem mode +func (w *libWrapper) RdmaSystemGetNetnsMode() (string, error) { return netlink.RdmaSystemGetNetnsMode() } diff --git a/pkg/host/internal/network/network.go b/pkg/host/internal/network/network.go index 940c4b248..3ac17cf8f 100644 --- a/pkg/host/internal/network/network.go +++ b/pkg/host/internal/network/network.go @@ -431,8 +431,7 @@ func (n *network) GetPciAddressFromInterfaceName(interfaceName string) (string, } func (n *network) DiscoverRDMASubsystem() (string, error) { - log.Log.Info("DiscoverRDMASubsystem(): retrieving RDMA subsystem mode") - subsystem, err := n.netlinkLib.DiscoverRDMASubsystem() + subsystem, err := n.netlinkLib.RdmaSystemGetNetnsMode() if err != nil { log.Log.Error(err, "DiscoverRDMASubsystem(): failed to get RDMA subsystem mode") @@ -443,19 +442,28 @@ func (n *network) DiscoverRDMASubsystem() (string, error) { } func (n *network) SetRDMASubsystem(mode string) error { - log.Log.Info("SetRDMASubsystem(): Updating RDMA subsystem mode") + log.Log.Info("SetRDMASubsystem(): Updating RDMA subsystem mode", "mode", mode) + path := filepath.Join(vars.FilesystemRoot, consts.Host, "etc", "modprobe.d", "sriov_network_operator_modules_config.conf") + + if mode == "" { + err := os.Remove(path) + if err != nil && !errors.Is(err, os.ErrNotExist) { + log.Log.Error(err, "failed to remove ib_core config file") + return err + } + return nil + } modeValue := 1 if mode == "exclusive" { modeValue = 0 } - config := fmt.Sprintf("options ib_core netns_mode=%d\n", modeValue) - path := filepath.Join(vars.FilesystemRoot, consts.Host, "etc", "modprobe.d", "ib_core.conf") - err := os.WriteFile(path, []byte(config), 0644) + config := fmt.Sprintf("# This file is managed by sriov-network-operator do not edit.\noptions ib_core netns_mode=%d\n", modeValue) + err := os.WriteFile(path, []byte(config), 0644) if err != nil { - log.Log.Error(err, "SetRDMASubsystem(): failed to write ib_core config") - return fmt.Errorf("failed to write ib_core config: %v", err) + log.Log.Error(err, "SetRDMASubsystem(): failed to write sriov_network_operator_modules_config.conf") + return fmt.Errorf("failed to write sriov_network_operator_modules_config.conf: %v", err) } return nil diff --git a/pkg/host/internal/network/network_test.go b/pkg/host/internal/network/network_test.go index 51c56b875..3e197c3f8 100644 --- a/pkg/host/internal/network/network_test.go +++ b/pkg/host/internal/network/network_test.go @@ -285,7 +285,7 @@ var _ = Describe("Network", func() { }) Context("DiscoverRDMASubsystem", func() { It("Should get RDMA Subsystem using netlink", func() { - netlinkLibMock.EXPECT().DiscoverRDMASubsystem().Return("shared", nil) + netlinkLibMock.EXPECT().RdmaSystemGetNetnsMode().Return("shared", nil) pci, err := n.DiscoverRDMASubsystem() Expect(err).NotTo(HaveOccurred()) @@ -297,21 +297,21 @@ var _ = Describe("Network", func() { helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{ Dirs: []string{"/host/etc/modprobe.d"}, Files: map[string][]byte{ - "/host/etc/modprobe.d/ib_core.conf": {}, + "/host/etc/modprobe.d/sriov_network_operator_modules_config.conf": {}, }, }) Expect(n.SetRDMASubsystem("shared")).NotTo(HaveOccurred()) - helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/ib_core.conf", "options ib_core netns_mode=1\n") + helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/sriov_network_operator_modules_config.conf", "# This file is managed by sriov-network-operator do not edit.\noptions ib_core netns_mode=1\n") }) It("Should set RDMA Subsystem exclusive mode", func() { helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{ Dirs: []string{"/host/etc/modprobe.d"}, Files: map[string][]byte{ - "/host/etc/modprobe.d/ib_core.conf": {}, + "/host/etc/modprobe.d/sriov_network_operator_modules_config.conf": {}, }, }) Expect(n.SetRDMASubsystem("exclusive")).NotTo(HaveOccurred()) - helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/ib_core.conf", "options ib_core netns_mode=0\n") + helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/sriov_network_operator_modules_config.conf", "# This file is managed by sriov-network-operator do not edit.\noptions ib_core netns_mode=0\n") }) }) }) diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index 552f8142a..948459a7f 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -1,11 +1,8 @@ package generic import ( - "bytes" "errors" - "os/exec" - "strconv" - "strings" + "fmt" "syscall" "sigs.k8s.io/controller-runtime/pkg/log" @@ -48,12 +45,14 @@ type DriverState struct { type DriverStateMapType map[uint]*DriverState +type KargStateMapType map[string]bool + type GenericPlugin struct { PluginName string SpecVersion string DesireState *sriovnetworkv1.SriovNetworkNodeState DriverStateMap DriverStateMapType - DesiredKernelArgs map[string]bool + DesiredKernelArgs KargStateMapType helpers helper.HostHelpersInterface skipVFConfiguration bool skipBridgeConfiguration bool @@ -82,7 +81,7 @@ type genericPluginOptions struct { skipBridgeConfiguration bool } -const scriptsPath = "bindata/scripts/enable-kargs.sh" +const scriptsPath = "bindata/scripts/kargs.sh" // Initialize our plugin and set up initial values func NewGenericPlugin(helpers helper.HostHelpersInterface, options ...Option) (plugin.VendorPlugin, error) { @@ -112,11 +111,27 @@ func NewGenericPlugin(helpers helper.HostHelpersInterface, options ...Option) (p NeedDriverFunc: needDriverCheckVdpaType, DriverLoaded: false, } + + // To maintain backward compatibility we don't remove the intel_iommu, iommu and pcirealloc + // kernel args if they are configured + kargs, err := helpers.GetCurrentKernelArgs() + if err != nil { + return nil, err + } + desiredKernelArgs := KargStateMapType{ + consts.KernelArgPciRealloc: helpers.IsKernelArgsSet(kargs, consts.KernelArgPciRealloc), + consts.KernelArgIntelIommu: helpers.IsKernelArgsSet(kargs, consts.KernelArgIntelIommu), + consts.KernelArgIommuPt: helpers.IsKernelArgsSet(kargs, consts.KernelArgIommuPt), + consts.KernelArgIommuPassthrough: helpers.IsKernelArgsSet(kargs, consts.KernelArgIommuPassthrough), + consts.KernelArgRdmaShared: false, + consts.KernelArgRdmaExclusive: false, + } + return &GenericPlugin{ PluginName: PluginName, SpecVersion: "1.0", DriverStateMap: driverStateMap, - DesiredKernelArgs: make(map[string]bool), + DesiredKernelArgs: desiredKernelArgs, helpers: helpers, skipVFConfiguration: cfg.skipVFConfiguration, skipBridgeConfiguration: cfg.skipBridgeConfiguration, @@ -179,18 +194,13 @@ func (p *GenericPlugin) CheckStatusChanges(current *sriovnetworkv1.SriovNetworkN } } - missingKernelArgs, err := p.getMissingKernelArgs() + shouldUpdate, err := p.shouldUpdateKernelArgs() if err != nil { log.Log.Error(err, "generic-plugin CheckStatusChanges(): failed to verify missing kernel arguments") return false, err } - if len(missingKernelArgs) != 0 { - log.Log.V(0).Info("generic-plugin CheckStatusChanges(): kernel args missing", - "kernelArgs", missingKernelArgs) - } - - return len(missingKernelArgs) != 0, nil + return shouldUpdate, nil } func (p *GenericPlugin) syncDriverState() error { @@ -228,7 +238,7 @@ func (p *GenericPlugin) Apply() error { p.DesireState.Status.Interfaces, p.skipVFConfiguration); err != nil { // Catch the "cannot allocate memory" error and try to use PCI realloc if errors.Is(err, syscall.ENOMEM) { - p.addToDesiredKernelArgs(consts.KernelArgPciRealloc) + p.enableDesiredKernelArgs(consts.KernelArgPciRealloc) } return err } @@ -264,85 +274,84 @@ func needDriverCheckVdpaType(state *sriovnetworkv1.SriovNetworkNodeState, driver return false } -// setKernelArg Tries to add the kernel args via ostree or grubby. -func setKernelArg(karg string) (bool, error) { - log.Log.Info("generic plugin setKernelArg()") - var stdout, stderr bytes.Buffer - cmd := exec.Command("/bin/sh", scriptsPath, karg) - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - if err := cmd.Run(); err != nil { +// editKernelArg Tries to add the kernel args via ostree or grubby. +func editKernelArg(helper helper.HostHelpersInterface, mode, karg string) error { + log.Log.Info("generic plugin editKernelArg()", "mode", mode, "karg", karg) + _, _, err := helper.RunCommand("/bin/sh", scriptsPath, mode, karg) + if err != nil { // if grubby is not there log and assume kernel args are set correctly. if utils.IsCommandNotFound(err) { - log.Log.Error(err, "generic plugin setKernelArg(): grubby or ostree command not found. Please ensure that kernel arg are set", + log.Log.Error(err, "generic plugin editKernelArg(): grubby or ostree command not found. Please ensure that kernel arg are correct", "kargs", karg) - return false, nil - } - log.Log.Error(err, "generic plugin setKernelArg(): fail to enable kernel arg", "karg", karg) - return false, err - } - - i, err := strconv.Atoi(strings.TrimSpace(stdout.String())) - if err == nil { - if i > 0 { - log.Log.Info("generic plugin setKernelArg(): need to reboot node for kernel arg", "karg", karg) - return true, nil + return nil } + log.Log.Error(err, "generic plugin editKernelArg(): fail to edit kernel arg", "karg", karg) + return err } - return false, err + return nil } -// addToDesiredKernelArgs Should be called to queue a kernel arg to be added to the node. -func (p *GenericPlugin) addToDesiredKernelArgs(karg string) { - if _, ok := p.DesiredKernelArgs[karg]; !ok { - log.Log.Info("generic plugin addToDesiredKernelArgs(): Adding to desired kernel arg", "karg", karg) - p.DesiredKernelArgs[karg] = false - } +// enableDesiredKernelArgs Should be called to mark a kernel arg as enabled. +func (p *GenericPlugin) enableDesiredKernelArgs(karg string) { + log.Log.Info("generic plugin enableDesiredKernelArgs(): enable kernel arg", "karg", karg) + p.DesiredKernelArgs[karg] = true } -// getMissingKernelArgs gets Kernel arguments that have not been set. -func (p *GenericPlugin) getMissingKernelArgs() ([]string, error) { - missingArgs := make([]string, 0, len(p.DesiredKernelArgs)) - if len(p.DesiredKernelArgs) == 0 { - return nil, nil - } +// disableDesiredKernelArgs Should be called to mark a kernel arg as disabled. +func (p *GenericPlugin) disableDesiredKernelArgs(karg string) { + log.Log.Info("generic plugin disableDesiredKernelArgs(): disable kernel arg", "karg", karg) + p.DesiredKernelArgs[karg] = false +} +// shouldUpdateKernelArgs returns true if the DesiredKernelArgs state is not equal to the running kernel args in the system +func (p *GenericPlugin) shouldUpdateKernelArgs() (bool, error) { kargs, err := p.helpers.GetCurrentKernelArgs() if err != nil { - return nil, err + return false, err } - for desiredKarg := range p.DesiredKernelArgs { - if !p.helpers.IsKernelArgsSet(kargs, desiredKarg) { - missingArgs = append(missingArgs, desiredKarg) + for karg, kargState := range p.DesiredKernelArgs { + if kargState && !p.helpers.IsKernelArgsSet(kargs, karg) { + return true, nil + } + + if !kargState && p.helpers.IsKernelArgsSet(kargs, karg) { + return true, nil } } - return missingArgs, nil + return false, nil } // syncDesiredKernelArgs should be called to set all the kernel arguments. Returns bool if node update is needed. -func (p *GenericPlugin) syncDesiredKernelArgs(kargs []string) (bool, error) { +func (p *GenericPlugin) syncDesiredKernelArgs() (bool, error) { + kargs, err := p.helpers.GetCurrentKernelArgs() + if err != nil { + return false, err + } + needReboot := false + for karg, kargState := range p.DesiredKernelArgs { + if kargState { + err = editKernelArg(p.helpers, "add", karg) + if err != nil { + log.Log.Error(err, "generic-plugin syncDesiredKernelArgs(): fail to set kernel arg", "karg", karg) + return false, err + } - for _, karg := range kargs { - if p.DesiredKernelArgs[karg] { - log.Log.V(2).Info("generic-plugin syncDesiredKernelArgs(): previously attempted to set kernel arg", - "karg", karg) - } - // There is a case when we try to set the kernel argument here, the daemon could decide to not reboot because - // the daemon encountered a potentially one-time error. However we always want to make sure that the kernel - // argument is set once the daemon goes through node state sync again. - update, err := setKernelArg(karg) - if err != nil { - log.Log.Error(err, "generic-plugin syncDesiredKernelArgs(): fail to set kernel arg", "karg", karg) - return false, err - } - if update { - needReboot = true - log.Log.V(2).Info("generic-plugin syncDesiredKernelArgs(): need reboot for setting kernel arg", "karg", karg) + if !p.helpers.IsKernelArgsSet(kargs, karg) { + needReboot = true + } + } else { + err = editKernelArg(p.helpers, "remove", karg) + if err != nil { + log.Log.Error(err, "generic-plugin syncDesiredKernelArgs(): fail to remove kernel arg", "karg", karg) + return false, err + } + + if p.helpers.IsKernelArgsSet(kargs, karg) { + needReboot = true + } } - p.DesiredKernelArgs[karg] = true } return needReboot, nil } @@ -423,14 +432,14 @@ func (p *GenericPlugin) addVfioDesiredKernelArg(state *sriovnetworkv1.SriovNetwo kernelArgFnByCPUVendor := map[hostTypes.CPUVendor]func(){ hostTypes.CPUVendorIntel: func() { - p.addToDesiredKernelArgs(consts.KernelArgIntelIommu) - p.addToDesiredKernelArgs(consts.KernelArgIommuPt) + p.enableDesiredKernelArgs(consts.KernelArgIntelIommu) + p.enableDesiredKernelArgs(consts.KernelArgIommuPt) }, hostTypes.CPUVendorAMD: func() { - p.addToDesiredKernelArgs(consts.KernelArgIommuPt) + p.enableDesiredKernelArgs(consts.KernelArgIommuPt) }, hostTypes.CPUVendorARM: func() { - p.addToDesiredKernelArgs(consts.KernelArgIommuPassthrough) + p.enableDesiredKernelArgs(consts.KernelArgIommuPassthrough) }, } @@ -448,26 +457,41 @@ func (p *GenericPlugin) addVfioDesiredKernelArg(state *sriovnetworkv1.SriovNetwo } } +func (p *GenericPlugin) configRdmaKernelArg(state *sriovnetworkv1.SriovNetworkNodeState) error { + if state.Spec.System.RdmaMode == "" { + p.disableDesiredKernelArgs(consts.KernelArgRdmaExclusive) + p.disableDesiredKernelArgs(consts.KernelArgRdmaShared) + } else if state.Spec.System.RdmaMode == "shared" { + p.enableDesiredKernelArgs(consts.KernelArgRdmaShared) + p.disableDesiredKernelArgs(consts.KernelArgRdmaExclusive) + } else if state.Spec.System.RdmaMode == "exclusive" { + p.enableDesiredKernelArgs(consts.KernelArgRdmaExclusive) + p.disableDesiredKernelArgs(consts.KernelArgRdmaShared) + } else { + err := fmt.Errorf("unexpected rdma mode: %s", state.Spec.System.RdmaMode) + log.Log.Error(err, "generic-plugin configRdmaKernelArg(): failed to configure kernel arguments for rdma") + return err + } + + return p.helpers.SetRDMASubsystem(state.Spec.System.RdmaMode) +} + func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { needReboot := false p.addVfioDesiredKernelArg(state) - - missingKernelArgs, err := p.getMissingKernelArgs() + err := p.configRdmaKernelArg(state) if err != nil { - log.Log.Error(err, "generic-plugin needRebootNode(): failed to verify missing kernel arguments") return false, err } - if len(missingKernelArgs) != 0 { - needReboot, err = p.syncDesiredKernelArgs(missingKernelArgs) - if err != nil { - log.Log.Error(err, "generic-plugin needRebootNode(): failed to set the desired kernel arguments") - return false, err - } - if needReboot { - log.Log.V(2).Info("generic-plugin needRebootNode(): need reboot for updating kernel arguments") - } + needReboot, err = p.syncDesiredKernelArgs() + if err != nil { + log.Log.Error(err, "generic-plugin needRebootNode(): failed to set the desired kernel arguments") + return false, err + } + if needReboot { + log.Log.V(2).Info("generic-plugin needRebootNode(): need reboot for updating kernel arguments") } return needReboot, nil diff --git a/pkg/plugins/generic/generic_plugin_test.go b/pkg/plugins/generic/generic_plugin_test.go index 0a6674712..2e2aed326 100644 --- a/pkg/plugins/generic/generic_plugin_test.go +++ b/pkg/plugins/generic/generic_plugin_test.go @@ -34,6 +34,16 @@ var _ = Describe("Generic plugin", func() { ctrl = gomock.NewController(t) hostHelper = mock_helper.NewMockHostHelpersInterface(ctrl) + hostHelper.EXPECT().SetRDMASubsystem("").Return(nil).AnyTimes() + hostHelper.EXPECT().GetCurrentKernelArgs().Return("", nil).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgPciRealloc).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaExclusive).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaShared).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPassthrough).Return(false).AnyTimes() + + hostHelper.EXPECT().RunCommand(gomock.Any(), gomock.Any()).Return("", "", nil).AnyTimes() genericPlugin, err = NewGenericPlugin(hostHelper) Expect(err).ToNot(HaveOccurred()) @@ -898,20 +908,21 @@ var _ = Describe("Generic plugin", func() { }, } + rdmaState := &sriovnetworkv1.SriovNetworkNodeState{ + Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{System: sriovnetworkv1.System{ + RdmaMode: consts.RdmaSubsystemModeShared, + }}, + Status: sriovnetworkv1.SriovNetworkNodeStateStatus{}, + } + It("should detect changes on status due to missing kernel args", func() { hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorIntel, nil) // Load required kernel args. genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) - Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ - consts.KernelArgIntelIommu: false, - consts.KernelArgIommuPt: false, - })) - - hostHelper.EXPECT().GetCurrentKernelArgs().Return("", nil) - hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false) - hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgIntelIommu]).To(BeTrue()) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgIommuPt]).To(BeTrue()) changed, err := genericPlugin.CheckStatusChanges(vfioNetworkNodeState) Expect(err).ToNot(HaveOccurred()) @@ -921,17 +932,52 @@ var _ = Describe("Generic plugin", func() { It("should set the correct kernel args on AMD CPUs", func() { hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorAMD, nil) genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) - Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ - consts.KernelArgIommuPt: false, - })) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgIommuPt]).To(BeTrue()) }) It("should set the correct kernel args on ARM CPUs", func() { hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorARM, nil) genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) - Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ - consts.KernelArgIommuPassthrough: false, - })) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgIommuPassthrough]).To(BeTrue()) + }) + + It("should enable rdma shared mode", func() { + hostHelper.EXPECT().SetRDMASubsystem(consts.RdmaSubsystemModeShared).Return(nil) + err := genericPlugin.(*GenericPlugin).configRdmaKernelArg(rdmaState) + Expect(err).ToNot(HaveOccurred()) + + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaShared]).To(BeTrue()) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaExclusive]).To(BeFalse()) + + changed, err := genericPlugin.CheckStatusChanges(rdmaState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeTrue()) + }) + It("should enable rdma exclusive mode", func() { + hostHelper.EXPECT().SetRDMASubsystem(consts.RdmaSubsystemModeExclusive).Return(nil) + rdmaState.Spec.System.RdmaMode = consts.RdmaSubsystemModeExclusive + err := genericPlugin.(*GenericPlugin).configRdmaKernelArg(rdmaState) + Expect(err).ToNot(HaveOccurred()) + + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaShared]).To(BeFalse()) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaExclusive]).To(BeTrue()) + + changed, err := genericPlugin.CheckStatusChanges(rdmaState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeTrue()) + }) + It("should not configure RDMA kernel args", func() { + hostHelper.EXPECT().SetRDMASubsystem("").Return(nil) + rdmaState.Spec.System = sriovnetworkv1.System{} + err := genericPlugin.(*GenericPlugin).configRdmaKernelArg(rdmaState) + Expect(err).ToNot(HaveOccurred()) + + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaShared]).To(BeFalse()) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaExclusive]).To(BeFalse()) + + changed, err := genericPlugin.CheckStatusChanges(rdmaState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeFalse()) }) }) diff --git a/test/conformance/tests/test_networkpool.go b/test/conformance/tests/test_networkpool.go new file mode 100644 index 000000000..47d929013 --- /dev/null +++ b/test/conformance/tests/test_networkpool.go @@ -0,0 +1,345 @@ +package tests + +import ( + "fmt" + "strconv" + "strings" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "golang.org/x/net/context" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/cluster" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/discovery" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/network" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/pod" +) + +var _ = Describe("[sriov] NetworkPool", Ordered, func() { + var testNode string + var interfaces []*sriovv1.InterfaceExt + + BeforeAll(func() { + err := namespaces.Create(namespaces.Test, clients) + Expect(err).ToNot(HaveOccurred()) + err = namespaces.Clean(operatorNamespace, namespaces.Test, clients, discovery.Enabled()) + Expect(err).ToNot(HaveOccurred()) + + sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace) + Expect(err).ToNot(HaveOccurred()) + Expect(len(sriovInfos.Nodes)).ToNot(BeZero()) + + testNode, interfaces, err = sriovInfos.FindSriovDevicesAndNode() + Expect(err).ToNot(HaveOccurred()) + + By(fmt.Sprintf("Testing on node %s, %d devices found", testNode, len(interfaces))) + WaitForSRIOVStable() + }) + + AfterEach(func() { + err := namespaces.Clean(operatorNamespace, namespaces.Test, clients, discovery.Enabled()) + Expect(err).ToNot(HaveOccurred()) + + err = clients.DeleteAllOf(context.Background(), &sriovv1.SriovNetworkPoolConfig{}, client.InNamespace(operatorNamespace)) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + }) + + Context("Configure rdma namespace mode", func() { + It("should switch rdma mode", func() { + By("create a pool with only that node") + networkPool := &sriovv1.SriovNetworkPoolConfig{ + ObjectMeta: metav1.ObjectMeta{Name: testNode, Namespace: operatorNamespace}, + Spec: sriovv1.SriovNetworkPoolConfigSpec{RdmaMode: consts.RdmaSubsystemModeExclusive, + NodeSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"kubernetes.io/hostname": testNode}}}} + + By("configure rdma mode to exclusive") + err := clients.Create(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + By("waiting for operator to finish the configuration") + WaitForSRIOVStable() + nodeState := &sriovv1.SriovNetworkNodeState{} + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeState.Spec.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeExclusive)) + Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeExclusive)) + + By("Checking rdma mode and kernel args") + output, _, err := runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=0 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "1")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/etc/modprobe.d/sriov_network_operator_modules_config.conf | grep mode=0 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "1")).To(BeTrue()) + + By("configure rdma mode to shared") + networkPool.Spec.RdmaMode = consts.RdmaSubsystemModeShared + err = clients.Update(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeState.Spec.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + + By("Checking rdma mode and kernel args") + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=0 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "1")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/etc/modprobe.d/sriov_network_operator_modules_config.conf | grep mode=1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "1")).To(BeTrue()) + + By("removing rdma mode configuration") + err = clients.Delete(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeState.Spec.System.RdmaMode).To(Equal("")) + Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + + By("Checking rdma mode and kernel args") + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=0 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "ls /host/etc/modprobe.d | grep sriov_network_operator_modules_config.conf | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + }) + }) + + Context("Check rdma metrics inside a pod in exclusive mode", func() { + var iface *sriovv1.InterfaceExt + + BeforeAll(func() { + sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace) + Expect(err).ToNot(HaveOccurred()) + Expect(len(sriovInfos.Nodes)).ToNot(BeZero()) + + for _, node := range sriovInfos.Nodes { + iface, err = sriovInfos.FindOneMellanoxSriovDevice(node) + if err == nil { + testNode = node + break + } + } + + if iface == nil { + Skip("no mellanox card available to test rdma") + } + + networkPool := &sriovv1.SriovNetworkPoolConfig{ + ObjectMeta: metav1.ObjectMeta{Name: testNode, Namespace: operatorNamespace}, + Spec: sriovv1.SriovNetworkPoolConfigSpec{RdmaMode: consts.RdmaSubsystemModeExclusive, + NodeSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"kubernetes.io/hostname": testNode}}}} + + err = clients.Create(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + By("waiting for operator to finish the configuration") + WaitForSRIOVStable() + }) + + It("should run pod with RDMA cni and expose nic metrics and another one without rdma info", func() { + By("creating a policy") + resourceName := "testrdma" + _, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, iface.Name, testNode, 5, resourceName, "netdevice", + func(policy *sriovv1.SriovNetworkNodePolicy) { policy.Spec.IsRdma = true }) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + + By("Creating sriov network to use the rdma device") + sriovNetwork := &sriovv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rdmanetwork", + Namespace: operatorNamespace, + }, + Spec: sriovv1.SriovNetworkSpec{ + ResourceName: resourceName, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + NetworkNamespace: namespaces.Test, + MetaPluginsConfig: `{"type": "rdma"}`, + }} + + err = clients.Create(context.Background(), sriovNetwork) + Expect(err).ToNot(HaveOccurred()) + waitForNetAttachDef("test-rdmanetwork", namespaces.Test) + + sriovNetwork = &sriovv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-nordmanetwork", + Namespace: operatorNamespace, + }, + Spec: sriovv1.SriovNetworkSpec{ + ResourceName: resourceName, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + NetworkNamespace: namespaces.Test, + }} + + err = clients.Create(context.Background(), sriovNetwork) + Expect(err).ToNot(HaveOccurred()) + waitForNetAttachDef("test-nordmanetwork", namespaces.Test) + + podDefinition := pod.DefineWithNetworks([]string{"test-rdmanetwork"}) + firstPod, err := clients.Pods(namespaces.Test).Create(context.Background(), podDefinition, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + podDefinition = pod.DefineWithNetworks([]string{"test-nordmanetwork"}) + secondPod, err := clients.Pods(namespaces.Test).Create(context.Background(), podDefinition, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + firstPod = waitForPodRunning(firstPod) + secondPod = waitForPodRunning(secondPod) + + testedNode := &corev1.Node{} + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode}, testedNode) + Expect(err).ToNot(HaveOccurred()) + resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)] + allocatable, _ := resNum.AsInt64() + Expect(allocatable).ToNot(Equal(5)) + + By("restart device plugin") + pods, err := clients.Pods(operatorNamespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: "app=sriov-device-plugin", + FieldSelector: "spec.nodeName=" + testNode, + }) + Expect(err).ToNot(HaveOccurred()) + + for _, podObj := range pods.Items { + err = clients.Delete(context.Background(), &podObj) + Expect(err).ToNot(HaveOccurred()) + Eventually(func() bool { + searchPod := &corev1.Pod{} + err = clients.Get(context.Background(), client.ObjectKey{Name: podObj.Name, Namespace: podObj.Namespace}, searchPod) + if err != nil && errors.IsNotFound(err) { + return true + } + return false + }, 2*time.Minute, time.Second).Should(BeTrue()) + } + + By("checking the amount of allocatable devices remains after device plugin reset") + Consistently(func() int64 { + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode}, testedNode) + Expect(err).ToNot(HaveOccurred()) + resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)] + newAllocatable, _ := resNum.AsInt64() + return newAllocatable + }, 1*time.Minute, 5*time.Second).Should(Equal(allocatable)) + + By("checking counters inside the pods") + strOut, _, err := pod.ExecCommand(clients, firstPod, "/bin/bash", "-c", "ip link show net1 | grep net1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(strOut, "1")).To(BeTrue()) + strOut, _, err = pod.ExecCommand(clients, firstPod, "/bin/bash", "-c", "ls /sys/bus/pci/devices/${PCIDEVICE_OPENSHIFT_IO_TESTRDMA}/infiniband/*/ports/*/hw_counters | wc -l") + strOut = strings.TrimSpace(strOut) + Expect(err).ToNot(HaveOccurred()) + num, err := strconv.Atoi(strOut) + Expect(err).ToNot(HaveOccurred()) + Expect(num).To(BeNumerically(">", 0)) + + strOut, _, err = pod.ExecCommand(clients, secondPod, "/bin/bash", "-c", "ls /sys/bus/pci/devices/${PCIDEVICE_OPENSHIFT_IO_TESTRDMA}/infiniband/ | wc -l") + Expect(err).ToNot(HaveOccurred()) + strOut = strings.TrimSpace(strOut) + num, err = strconv.Atoi(strOut) + Expect(err).ToNot(HaveOccurred()) + Expect(num).To(BeNumerically("==", 0)) + }) + }) + + Context("Check rdma metrics inside a pod in shared mode not exist", func() { + var iface *sriovv1.InterfaceExt + BeforeAll(func() { + sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace) + Expect(err).ToNot(HaveOccurred()) + Expect(len(sriovInfos.Nodes)).ToNot(BeZero()) + + for _, node := range sriovInfos.Nodes { + iface, err = sriovInfos.FindOneMellanoxSriovDevice(node) + if err == nil { + testNode = node + break + } + } + + if iface == nil { + Skip("no mellanox card available to test rdma") + } + + networkPool := &sriovv1.SriovNetworkPoolConfig{ + ObjectMeta: metav1.ObjectMeta{Name: testNode, Namespace: operatorNamespace}, + Spec: sriovv1.SriovNetworkPoolConfigSpec{RdmaMode: consts.RdmaSubsystemModeShared, + NodeSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"kubernetes.io/hostname": testNode}}}} + + err = clients.Create(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + By("waiting for operator to finish the configuration") + WaitForSRIOVStable() + }) + + It("should run pod without RDMA cni and not expose nic metrics", func() { + By("creating a policy") + resourceName := "testrdma" + _, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, iface.Name, testNode, 5, resourceName, "netdevice", + func(policy *sriovv1.SriovNetworkNodePolicy) { policy.Spec.IsRdma = true }) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + + By("Creating sriov network to use the rdma device") + sriovNetwork := &sriovv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rdmanetwork", + Namespace: operatorNamespace, + }, + Spec: sriovv1.SriovNetworkSpec{ + ResourceName: resourceName, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + NetworkNamespace: namespaces.Test, + }} + + err = clients.Create(context.Background(), sriovNetwork) + Expect(err).ToNot(HaveOccurred()) + waitForNetAttachDef("test-rdmanetwork", namespaces.Test) + + podDefinition := pod.DefineWithNetworks([]string{"test-rdmanetwork"}) + firstPod, err := clients.Pods(namespaces.Test).Create(context.Background(), podDefinition, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + firstPod = waitForPodRunning(firstPod) + + strOut, _, err := pod.ExecCommand(clients, firstPod, "/bin/bash", "-c", "ip link show net1 | grep net1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(strOut, "1")).To(BeTrue()) + strOut, _, err = pod.ExecCommand(clients, firstPod, "/bin/bash", "-c", "ls /sys/bus/pci/devices/${PCIDEVICE_OPENSHIFT_IO_TESTRDMA}/infiniband/*/ports/* | grep hw_counters | wc -l") + strOut = strings.TrimSpace(strOut) + Expect(err).ToNot(HaveOccurred()) + num, err := strconv.Atoi(strOut) + Expect(err).ToNot(HaveOccurred()) + Expect(num).To(BeNumerically("==", 0)) + }) + }) +}) diff --git a/test/scripts/enable-kargs_test.sh b/test/scripts/kargs_test.sh similarity index 61% rename from test/scripts/enable-kargs_test.sh rename to test/scripts/kargs_test.sh index 93a985700..053bd5200 100755 --- a/test/scripts/enable-kargs_test.sh +++ b/test/scripts/kargs_test.sh @@ -2,14 +2,14 @@ SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" -SUT_SCRIPT="${SCRIPTPATH}/../../bindata/scripts/enable-kargs.sh" +SUT_SCRIPT="${SCRIPTPATH}/../../bindata/scripts/kargs.sh" test_RpmOstree_Add_All_Arguments() { echo "a b c=d eee=fff" > ${FAKE_HOST}/proc/cmdline touch ${FAKE_HOST}/run/ostree-booted - output=`$SUT_SCRIPT X=Y W=Z` + output=`$SUT_SCRIPT add X=Y W=Z` assertEquals 0 $? assertEquals "2" $output @@ -22,7 +22,7 @@ test_RpmOstree_Add_Only_Missing_Arguments() { echo "a b c=d eee=fff K=L" > ${FAKE_HOST}/proc/cmdline touch ${FAKE_HOST}/run/ostree-booted - output=`$SUT_SCRIPT K=L X=Y` + output=`$SUT_SCRIPT add K=L X=Y` assertEquals 0 $? assertEquals "1" $output @@ -30,6 +30,29 @@ test_RpmOstree_Add_Only_Missing_Arguments() { assertNotContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--append K=L" } +test_RpmOstree_Delete_All_Arguments() { + echo "a b c=d eee=fff X=Y W=Z" > ${FAKE_HOST}/proc/cmdline + touch ${FAKE_HOST}/run/ostree-booted + + output=`$SUT_SCRIPT remove X=Y W=Z` + assertEquals 0 $? + assertEquals "2" $output + + assertContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--delete X=Y" + assertContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--delete W=Z" +} + +test_RpmOstree_Delete_Only_Exist_Arguments() { + echo "a b c=d eee=fff X=Y" > ${FAKE_HOST}/proc/cmdline + touch ${FAKE_HOST}/run/ostree-booted + + output=`$SUT_SCRIPT remove X=Y W=Z` + assertEquals 0 $? + assertEquals "1" $output + + assertContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--delete X=Y" + assertContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--delete W=Z" +} ###### Mock /host directory ###### export FAKE_HOST="$(mktemp -d)" diff --git a/test/scripts/rpm-ostree_mock b/test/scripts/rpm-ostree_mock index db6f66040..06e6b1905 100755 --- a/test/scripts/rpm-ostree_mock +++ b/test/scripts/rpm-ostree_mock @@ -10,3 +10,9 @@ then # Caller is trying to read kernel arguments. cat /proc/cmdline fi + +if ! echo "$*" | grep -q "\--delete" +then + # Caller is trying to read kernel arguments. + cat /proc/cmdline +fi From baa41c97adeb9249f30c5707f4bc8deee5e30c31 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 7 Nov 2024 11:55:28 +0200 Subject: [PATCH 32/59] redesign device plugin always deploy sriov network device plugin and use a label to enable or disable it on the nodes Signed-off-by: Sebastian Sch --- .../plugins/sriov-device-plugin.yaml | 2 +- controllers/helper.go | 130 ++----- controllers/helper_test.go | 330 ------------------ .../sriovnetworknodepolicy_controller.go | 50 ++- .../sriovnetworknodepolicy_controller_test.go | 137 +++++++- controllers/sriovoperatorconfig_controller.go | 8 +- .../sriovoperatorconfig_controller_test.go | 110 +++--- controllers/suite_test.go | 7 + deploy/clusterrole.yaml | 6 - deploy/role.yaml | 12 +- .../templates/clusterrole.yaml | 6 - .../templates/role.yaml | 11 +- pkg/consts/constants.go | 4 + pkg/utils/cluster.go | 82 ++++- 14 files changed, 353 insertions(+), 542 deletions(-) delete mode 100644 controllers/helper_test.go diff --git a/bindata/manifests/plugins/sriov-device-plugin.yaml b/bindata/manifests/plugins/sriov-device-plugin.yaml index a0f433a06..3660ebf79 100644 --- a/bindata/manifests/plugins/sriov-device-plugin.yaml +++ b/bindata/manifests/plugins/sriov-device-plugin.yaml @@ -27,7 +27,7 @@ spec: hostNetwork: true nodeSelector: {{- range $key, $value := .NodeSelectorField }} - {{ $key }}: {{ $value }} + {{ $key }}: "{{ $value }}" {{- end }} tolerations: - operator: Exists diff --git a/controllers/helper.go b/controllers/helper.go index b90ad44f8..58c3ae697 100644 --- a/controllers/helper.go +++ b/controllers/helper.go @@ -22,7 +22,6 @@ import ( "encoding/json" "fmt" "os" - "sort" "strings" errs "github.com/pkg/errors" @@ -51,7 +50,7 @@ import ( ) var ( - webhooks = map[string](string){ + webhooks = map[string]string{ constants.InjectorWebHookName: constants.InjectorWebHookPath, constants.OperatorWebHookName: constants.OperatorWebHookPath, } @@ -162,29 +161,33 @@ func formatJSON(str string) (string, error) { return prettyJSON.String(), nil } +// GetDefaultNodeSelector return a nodeSelector with worker and linux os func GetDefaultNodeSelector() map[string]string { - return map[string]string{"node-role.kubernetes.io/worker": "", - "kubernetes.io/os": "linux"} + return map[string]string{ + "node-role.kubernetes.io/worker": "", + "kubernetes.io/os": "linux", + } } -// hasNoValidPolicy returns true if no SriovNetworkNodePolicy -// or only the (deprecated) "default" policy is present -func hasNoValidPolicy(pl []sriovnetworkv1.SriovNetworkNodePolicy) bool { - switch len(pl) { - case 0: - return true - case 1: - return pl[0].Name == constants.DefaultPolicyName - default: - return false +// GetDefaultNodeSelectorForDevicePlugin return a nodeSelector with worker linux os +// and the enabled sriov device plugin +func GetNodeSelectorForDevicePlugin(dc *sriovnetworkv1.SriovOperatorConfig) map[string]string { + if len(dc.Spec.ConfigDaemonNodeSelector) == 0 { + return map[string]string{ + "kubernetes.io/os": "linux", + constants.SriovDevicePluginLabel: constants.SriovDevicePluginLabelEnabled, + } } + + tmp := dc.Spec.DeepCopy() + tmp.ConfigDaemonNodeSelector[constants.SriovDevicePluginLabel] = constants.SriovDevicePluginLabelEnabled + return tmp.ConfigDaemonNodeSelector } func syncPluginDaemonObjs(ctx context.Context, client k8sclient.Client, scheme *runtime.Scheme, - dc *sriovnetworkv1.SriovOperatorConfig, - pl *sriovnetworkv1.SriovNetworkNodePolicyList) error { + dc *sriovnetworkv1.SriovOperatorConfig) error { logger := log.Log.WithName("syncPluginDaemonObjs") logger.V(1).Info("Start to sync sriov daemons objects") @@ -195,7 +198,7 @@ func syncPluginDaemonObjs(ctx context.Context, data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION") data.Data["ResourcePrefix"] = vars.ResourcePrefix data.Data["ImagePullSecrets"] = GetImagePullSecrets() - data.Data["NodeSelectorField"] = GetDefaultNodeSelector() + data.Data["NodeSelectorField"] = GetNodeSelectorForDevicePlugin(dc) data.Data["UseCDI"] = dc.Spec.UseCDI objs, err := renderDsForCR(constants.PluginPath, &data) if err != nil { @@ -203,34 +206,9 @@ func syncPluginDaemonObjs(ctx context.Context, return err } - if hasNoValidPolicy(pl.Items) { - for _, obj := range objs { - err := deleteK8sResource(ctx, client, obj) - if err != nil { - return err - } - } - return nil - } - // Sync DaemonSets for _, obj := range objs { - if obj.GetKind() == constants.DaemonSet && len(dc.Spec.ConfigDaemonNodeSelector) > 0 { - scheme := kscheme.Scheme - ds := &appsv1.DaemonSet{} - err = scheme.Convert(obj, ds, nil) - if err != nil { - logger.Error(err, "Fail to convert to DaemonSet") - return err - } - ds.Spec.Template.Spec.NodeSelector = dc.Spec.ConfigDaemonNodeSelector - err = scheme.Convert(ds, obj, nil) - if err != nil { - logger.Error(err, "Fail to convert to Unstructured") - return err - } - } - err = syncDsObject(ctx, client, scheme, dc, pl, obj) + err = syncDsObject(ctx, client, scheme, dc, obj) if err != nil { logger.Error(err, "Couldn't sync SR-IoV daemons objects") return err @@ -240,14 +218,7 @@ func syncPluginDaemonObjs(ctx context.Context, return nil } -func deleteK8sResource(ctx context.Context, client k8sclient.Client, in *uns.Unstructured) error { - if err := apply.DeleteObject(ctx, client, in); err != nil { - return fmt.Errorf("failed to delete object %v with err: %v", in, err) - } - return nil -} - -func syncDsObject(ctx context.Context, client k8sclient.Client, scheme *runtime.Scheme, dc *sriovnetworkv1.SriovOperatorConfig, pl *sriovnetworkv1.SriovNetworkNodePolicyList, obj *uns.Unstructured) error { +func syncDsObject(ctx context.Context, client k8sclient.Client, scheme *runtime.Scheme, dc *sriovnetworkv1.SriovOperatorConfig, obj *uns.Unstructured) error { logger := log.Log.WithName("syncDsObject") kind := obj.GetKind() logger.V(1).Info("Start to sync Objects", "Kind", kind) @@ -267,7 +238,7 @@ func syncDsObject(ctx context.Context, client k8sclient.Client, scheme *runtime. logger.Error(err, "Fail to convert to DaemonSet") return err } - err = syncDaemonSet(ctx, client, scheme, dc, pl, ds) + err = syncDaemonSet(ctx, client, scheme, dc, ds) if err != nil { logger.Error(err, "Fail to sync DaemonSet", "Namespace", ds.Namespace, "Name", ds.Name) return err @@ -276,54 +247,6 @@ func syncDsObject(ctx context.Context, client k8sclient.Client, scheme *runtime. return nil } -func setDsNodeAffinity(pl *sriovnetworkv1.SriovNetworkNodePolicyList, ds *appsv1.DaemonSet) error { - terms := nodeSelectorTermsForPolicyList(pl.Items) - if len(terms) > 0 { - ds.Spec.Template.Spec.Affinity = &corev1.Affinity{ - NodeAffinity: &corev1.NodeAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ - NodeSelectorTerms: terms, - }, - }, - } - } - return nil -} - -func nodeSelectorTermsForPolicyList(policies []sriovnetworkv1.SriovNetworkNodePolicy) []corev1.NodeSelectorTerm { - terms := []corev1.NodeSelectorTerm{} - for _, p := range policies { - // Note(adrianc): default policy is deprecated and ignored. - if p.Name == constants.DefaultPolicyName { - continue - } - - if len(p.Spec.NodeSelector) == 0 { - continue - } - expressions := []corev1.NodeSelectorRequirement{} - for k, v := range p.Spec.NodeSelector { - exp := corev1.NodeSelectorRequirement{ - Operator: corev1.NodeSelectorOpIn, - Key: k, - Values: []string{v}, - } - expressions = append(expressions, exp) - } - // sorting is needed to keep the daemon spec stable. - // the items are popped in a random order from the map - sort.Slice(expressions, func(i, j int) bool { - return expressions[i].Key < expressions[j].Key - }) - nodeSelector := corev1.NodeSelectorTerm{ - MatchExpressions: expressions, - } - terms = append(terms, nodeSelector) - } - - return terms -} - // renderDsForCR returns a busybox pod with the same name/namespace as the cr func renderDsForCR(path string, data *render.RenderData) ([]*uns.Unstructured, error) { logger := log.Log.WithName("renderDsForCR") @@ -336,16 +259,11 @@ func renderDsForCR(path string, data *render.RenderData) ([]*uns.Unstructured, e return objs, nil } -func syncDaemonSet(ctx context.Context, client k8sclient.Client, scheme *runtime.Scheme, dc *sriovnetworkv1.SriovOperatorConfig, pl *sriovnetworkv1.SriovNetworkNodePolicyList, in *appsv1.DaemonSet) error { +func syncDaemonSet(ctx context.Context, client k8sclient.Client, scheme *runtime.Scheme, dc *sriovnetworkv1.SriovOperatorConfig, in *appsv1.DaemonSet) error { logger := log.Log.WithName("syncDaemonSet") logger.V(1).Info("Start to sync DaemonSet", "Namespace", in.Namespace, "Name", in.Name) var err error - if pl != nil { - if err = setDsNodeAffinity(pl, in); err != nil { - return err - } - } if err = controllerutil.SetControllerReference(dc, in, scheme); err != nil { return err } diff --git a/controllers/helper_test.go b/controllers/helper_test.go deleted file mode 100644 index d998cf0da..000000000 --- a/controllers/helper_test.go +++ /dev/null @@ -1,330 +0,0 @@ -/* - - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package controllers - -import ( - "context" - "sync" - "testing" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - - "github.com/google/go-cmp/cmp" - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - controllerruntime "sigs.k8s.io/controller-runtime" - - sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" -) - -func TestNodeSelectorMerge(t *testing.T) { - table := []struct { - tname string - policies []sriovnetworkv1.SriovNetworkNodePolicy - expected []corev1.NodeSelectorTerm - }{ - { - tname: "testoneselector", - policies: []sriovnetworkv1.SriovNetworkNodePolicy{ - { - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{ - "foo": "bar", - }, - }, - }, - { - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{ - "bb": "cc", - }, - }, - }, - }, - expected: []corev1.NodeSelectorTerm{ - { - MatchExpressions: []corev1.NodeSelectorRequirement{ - { - Operator: corev1.NodeSelectorOpIn, - Key: "foo", - Values: []string{"bar"}, - }, - }, - }, - { - MatchExpressions: []corev1.NodeSelectorRequirement{ - { - Operator: corev1.NodeSelectorOpIn, - Key: "bb", - Values: []string{"cc"}, - }, - }, - }, - }, - }, - { - tname: "testtwoselectors", - policies: []sriovnetworkv1.SriovNetworkNodePolicy{ - { - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{ - "foo": "bar", - "foo1": "bar1", - }, - }, - }, - { - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{ - "bb": "cc", - "bb1": "cc1", - "bb2": "cc2", - }, - }, - }, - }, - expected: []corev1.NodeSelectorTerm{ - { - MatchExpressions: []corev1.NodeSelectorRequirement{ - { - Operator: corev1.NodeSelectorOpIn, - Key: "foo", - Values: []string{"bar"}, - }, - { - Operator: corev1.NodeSelectorOpIn, - Key: "foo1", - Values: []string{"bar1"}, - }, - }, - }, - { - MatchExpressions: []corev1.NodeSelectorRequirement{ - { - Operator: corev1.NodeSelectorOpIn, - Key: "bb", - Values: []string{"cc"}, - }, - { - Operator: corev1.NodeSelectorOpIn, - Key: "bb1", - Values: []string{"cc1"}, - }, - { - Operator: corev1.NodeSelectorOpIn, - Key: "bb2", - Values: []string{"cc2"}, - }, - }, - }, - }, - }, - { - tname: "testemptyselector", - policies: []sriovnetworkv1.SriovNetworkNodePolicy{ - { - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{}, - }, - }, - }, - expected: []corev1.NodeSelectorTerm{}, - }, - } - - for _, tc := range table { - t.Run(tc.tname, func(t *testing.T) { - selectors := nodeSelectorTermsForPolicyList(tc.policies) - if !cmp.Equal(selectors, tc.expected) { - t.Error(tc.tname, "Selectors not as expected", cmp.Diff(selectors, tc.expected)) - } - }) - } -} - -var _ = Describe("Helper Validation", Ordered, func() { - - var cancel context.CancelFunc - var ctx context.Context - var dc *sriovnetworkv1.SriovOperatorConfig - var in *appsv1.DaemonSet - - BeforeAll(func() { - By("Setup controller manager") - k8sManager, err := setupK8sManagerForTest() - Expect(err).ToNot(HaveOccurred()) - - ctx, cancel = context.WithCancel(context.Background()) - - wg := sync.WaitGroup{} - wg.Add(1) - go func() { - defer wg.Done() - defer GinkgoRecover() - By("Start controller manager") - err := k8sManager.Start(ctx) - Expect(err).ToNot(HaveOccurred()) - }() - - DeferCleanup(func() { - By("Shutdown controller manager") - cancel() - wg.Wait() - }) - }) - - BeforeEach(func() { - dc = &sriovnetworkv1.SriovOperatorConfig{ - ObjectMeta: controllerruntime.ObjectMeta{ - Name: "default", - Namespace: vars.Namespace, - UID: "12312312"}} - in = &appsv1.DaemonSet{ - ObjectMeta: controllerruntime.ObjectMeta{ - Name: "sriov-device-plugin", - Namespace: vars.Namespace}, - Spec: appsv1.DaemonSetSpec{ - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"app": "sriov-device-plugin"}}, - Template: corev1.PodTemplateSpec{ - ObjectMeta: controllerruntime.ObjectMeta{ - Labels: map[string]string{"app": "sriov-device-plugin"}}, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Image: "test:latest", - Name: "test", - }, - }, - }, - }}} - - err := k8sClient.Delete(ctx, in) - if err != nil { - Expect(errors.IsNotFound(err)).To(BeTrue()) - } - }) - - Context("syncDaemonSet", func() { - It("should create a new daemon", func() { - pl := &sriovnetworkv1.SriovNetworkNodePolicyList{Items: []sriovnetworkv1.SriovNetworkNodePolicy{ - {ObjectMeta: controllerruntime.ObjectMeta{Name: "test", Namespace: vars.Namespace}}, - }} - err := syncDaemonSet(ctx, k8sClient, vars.Scheme, dc, pl, in) - Expect(err).ToNot(HaveOccurred()) - Expect(in.Spec.Template.Spec.Affinity).To(BeNil()) - }) - It("should update affinity", func() { - pl := &sriovnetworkv1.SriovNetworkNodePolicyList{Items: []sriovnetworkv1.SriovNetworkNodePolicy{ - { - ObjectMeta: controllerruntime.ObjectMeta{ - Name: "test", - Namespace: vars.Namespace, - }, - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{"test": "test"}, - }, - }, - }} - - err := k8sClient.Create(ctx, in) - Expect(err).ToNot(HaveOccurred()) - - err = syncDaemonSet(ctx, k8sClient, vars.Scheme, dc, pl, in) - Expect(err).ToNot(HaveOccurred()) - Expect(in.Spec.Template.Spec.Affinity).ToNot(BeNil()) - Expect(in.Spec.Template.Spec.Affinity.NodeAffinity).ToNot(BeNil()) - Expect(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution).ToNot(BeNil()) - Expect(len(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms)).To(Equal(1)) - }) - It("should update affinity with multiple", func() { - pl := &sriovnetworkv1.SriovNetworkNodePolicyList{Items: []sriovnetworkv1.SriovNetworkNodePolicy{ - { - ObjectMeta: controllerruntime.ObjectMeta{ - Name: "test", - Namespace: vars.Namespace, - }, - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{"test": "test"}, - }, - }, - { - ObjectMeta: controllerruntime.ObjectMeta{ - Name: "test1", - Namespace: vars.Namespace, - }, - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{"test1": "test"}, - }, - }, - }} - - err := k8sClient.Create(ctx, in) - Expect(err).ToNot(HaveOccurred()) - - err = syncDaemonSet(ctx, k8sClient, vars.Scheme, dc, pl, in) - Expect(err).ToNot(HaveOccurred()) - Expect(in.Spec.Template.Spec.Affinity).ToNot(BeNil()) - Expect(in.Spec.Template.Spec.Affinity.NodeAffinity).ToNot(BeNil()) - Expect(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution).ToNot(BeNil()) - Expect(len(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms)).To(Equal(2)) - }) - It("should switch affinity", func() { - pl := &sriovnetworkv1.SriovNetworkNodePolicyList{Items: []sriovnetworkv1.SriovNetworkNodePolicy{ - { - ObjectMeta: controllerruntime.ObjectMeta{ - Name: "test1", - Namespace: vars.Namespace, - }, - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{"test1": "test"}, - }, - }, - }} - - in.Spec.Template.Spec.Affinity = &corev1.Affinity{ - NodeAffinity: &corev1.NodeAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ - NodeSelectorTerms: []corev1.NodeSelectorTerm{{ - MatchExpressions: []corev1.NodeSelectorRequirement{{ - Operator: corev1.NodeSelectorOpIn, - Key: "test", - Values: []string{"test"}, - }}, - }}, - }, - }, - } - - err := k8sClient.Create(ctx, in) - Expect(err).ToNot(HaveOccurred()) - - err = syncDaemonSet(ctx, k8sClient, vars.Scheme, dc, pl, in) - Expect(err).ToNot(HaveOccurred()) - Expect(in.Spec.Template.Spec.Affinity).ToNot(BeNil()) - Expect(in.Spec.Template.Spec.Affinity.NodeAffinity).ToNot(BeNil()) - Expect(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution).ToNot(BeNil()) - Expect(len(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms)).To(Equal(1)) - Expect(len(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions)).To(Equal(1)) - Expect(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions[0].Key).To(Equal("test1")) - }) - }) -}) diff --git a/controllers/sriovnetworknodepolicy_controller.go b/controllers/sriovnetworknodepolicy_controller.go index 62218436f..29438b176 100644 --- a/controllers/sriovnetworknodepolicy_controller.go +++ b/controllers/sriovnetworknodepolicy_controller.go @@ -46,6 +46,7 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) @@ -133,10 +134,6 @@ func (r *SriovNetworkNodePolicyReconciler) Reconcile(ctx context.Context, req ct if err = r.syncDevicePluginConfigMap(ctx, defaultOpConf, policyList, nodeList); err != nil { return reconcile.Result{}, err } - // Render and sync Daemon objects - if err = syncPluginDaemonObjs(ctx, r.Client, r.Scheme, defaultOpConf, policyList); err != nil { - return reconcile.Result{}, err - } // All was successful. Request that this be re-triggered after ResyncPeriod, // so we can reconcile state again. @@ -182,6 +179,12 @@ func (r *SriovNetworkNodePolicyReconciler) SetupWithManager(mgr ctrl.Manager) er Info("Enqueuing sync for create event", "resource", e.Object.GetName()) qHandler(q) }, + UpdateFunc: func(ctx context.Context, e event.UpdateEvent, q workqueue.RateLimitingInterface) { + reflect.DeepEqual(e.ObjectOld.GetLabels(), e.ObjectNew.GetLabels()) + log.Log.WithName("SriovNetworkNodePolicy"). + Info("Enqueuing sync for create event", "resource", e.ObjectNew.GetName()) + qHandler(q) + }, DeleteFunc: func(ctx context.Context, e event.DeleteEvent, q workqueue.RateLimitingInterface) { log.Log.WithName("SriovNetworkNodePolicy"). Info("Enqueuing sync for delete event", "resource", e.Object.GetName()) @@ -220,6 +223,30 @@ func (r *SriovNetworkNodePolicyReconciler) syncDevicePluginConfigMap(ctx context return err } configData[node.Name] = string(config) + + if data.ResourceList == nil || len(data.ResourceList) == 0 { + // if we don't have policies we should add the disabled label for the device plugin + err = utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginLabel, constants.SriovDevicePluginLabelDisabled, r.Client) + if err != nil { + logger.Error(err, "failed to label node for device plugin label", + "labelKey", + constants.SriovDevicePluginLabel, + "labelValue", + constants.SriovDevicePluginLabelDisabled) + return err + } + } else { + // if we have policies we should add the enabled label for the device plugin + err = utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginLabel, constants.SriovDevicePluginLabelEnabled, r.Client) + if err != nil { + logger.Error(err, "failed to label node for device plugin label", + "labelKey", + constants.SriovDevicePluginLabel, + "labelValue", + constants.SriovDevicePluginLabelEnabled) + return err + } + } } cm := &corev1.ConfigMap{ @@ -304,8 +331,15 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con } } if !found { + // remove device plugin labels + logger.Info("removing device plugin label from node as SriovNetworkNodeState doesn't exist", "nodeStateName", ns.Name) + err = utils.RemoveLabelFromNode(ctx, ns.Name, constants.SriovDevicePluginLabel, r.Client) + if err != nil { + logger.Error(err, "Fail to remove device plugin label from node", "node", ns.Name) + return err + } logger.Info("Deleting SriovNetworkNodeState as node with that name doesn't exist", "nodeStateName", ns.Name) - err := r.Delete(ctx, &ns, &client.DeleteOptions{}) + err = r.Delete(ctx, &ns, &client.DeleteOptions{}) if err != nil { logger.Error(err, "Fail to Delete", "SriovNetworkNodeState CR:", ns.GetName()) return err @@ -423,13 +457,13 @@ func (r *SriovNetworkNodePolicyReconciler) renderDevicePluginConfigData(ctx cont found, i := resourceNameInList(p.Spec.ResourceName, &rcl) if found { - err := updateDevicePluginResource(ctx, &rcl.ResourceList[i], &p, nodeState) + err := updateDevicePluginResource(&rcl.ResourceList[i], &p, nodeState) if err != nil { return rcl, err } logger.V(1).Info("Update resource", "Resource", rcl.ResourceList[i]) } else { - rc, err := createDevicePluginResource(ctx, &p, nodeState) + rc, err := createDevicePluginResource(&p, nodeState) if err != nil { return rcl, err } @@ -450,7 +484,6 @@ func resourceNameInList(name string, rcl *dptypes.ResourceConfList) (bool, int) } func createDevicePluginResource( - ctx context.Context, p *sriovnetworkv1.SriovNetworkNodePolicy, nodeState *sriovnetworkv1.SriovNetworkNodeState) (*dptypes.ResourceConfig, error) { netDeviceSelectors := dptypes.NetDeviceSelectors{} @@ -524,7 +557,6 @@ func createDevicePluginResource( } func updateDevicePluginResource( - ctx context.Context, rc *dptypes.ResourceConfig, p *sriovnetworkv1.SriovNetworkNodePolicy, nodeState *sriovnetworkv1.SriovNetworkNodeState) error { diff --git a/controllers/sriovnetworknodepolicy_controller_test.go b/controllers/sriovnetworknodepolicy_controller_test.go index a116efe87..abdddbc91 100644 --- a/controllers/sriovnetworknodepolicy_controller_test.go +++ b/controllers/sriovnetworknodepolicy_controller_test.go @@ -3,14 +3,20 @@ package controllers import ( "context" "encoding/json" + "sync" "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" "github.com/google/go-cmp/cmp" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" - + k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" dptypes "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types" @@ -126,3 +132,132 @@ func TestRenderDevicePluginConfigData(t *testing.T) { }) } } + +var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() { + var cancel context.CancelFunc + var ctx context.Context + + BeforeAll(func() { + By("Create SriovOperatorConfig controller k8s objs") + config := makeDefaultSriovOpConfig() + Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) + DeferCleanup(func() { + err := k8sClient.Delete(context.Background(), config) + Expect(err).ToNot(HaveOccurred()) + }) + + // setup controller manager + By("Setup controller manager") + k8sManager, err := setupK8sManagerForTest() + Expect(err).ToNot(HaveOccurred()) + + err = (&SriovNetworkNodePolicyReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + FeatureGate: featuregate.New(), + }).SetupWithManager(k8sManager) + Expect(err).ToNot(HaveOccurred()) + + ctx, cancel = context.WithCancel(context.Background()) + + wg := sync.WaitGroup{} + wg.Add(1) + go func() { + defer wg.Done() + defer GinkgoRecover() + By("Start controller manager") + err := k8sManager.Start(ctx) + Expect(err).ToNot(HaveOccurred()) + }() + + DeferCleanup(func() { + By("Shut down manager") + cancel() + wg.Wait() + }) + }) + AfterEach(func() { + err := k8sClient.DeleteAllOf(context.Background(), &corev1.Node{}) + Expect(err).ToNot(HaveOccurred()) + + err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodePolicy{}, k8sclient.InNamespace(vars.Namespace)) + Expect(err).ToNot(HaveOccurred()) + + err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodeState{}, k8sclient.InNamespace(vars.Namespace)) + Expect(err).ToNot(HaveOccurred()) + }) + Context("device plugin labels", func() { + It("Should add the right labels to the nodes", func() { + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{"kubernetes.io/os": "linux", + "node-role.kubernetes.io/worker": ""}, + }} + Expect(k8sClient.Create(ctx, node)).To(Succeed()) + + nodeState := &sriovnetworkv1.SriovNetworkNodeState{} + Eventually(func(g Gomega) { + err := k8sClient.Get(context.TODO(), k8sclient.ObjectKey{Name: "node0", Namespace: testNamespace}, nodeState) + g.Expect(err).ToNot(HaveOccurred()) + }, time.Minute, time.Second).Should(Succeed()) + + Eventually(func(g Gomega) { + err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: node.Name}, node) + g.Expect(err).ToNot(HaveOccurred()) + value, exist := node.Labels[consts.SriovDevicePluginLabel] + g.Expect(exist).To(BeTrue()) + g.Expect(value).To(Equal(consts.SriovDevicePluginLabelDisabled)) + }, time.Minute, time.Second).Should(Succeed()) + + nodeState.Status.Interfaces = sriovnetworkv1.InterfaceExts{ + sriovnetworkv1.InterfaceExt{ + Vendor: "8086", + Driver: "i40e", + Mtu: 1500, + Name: "ens803f0", + PciAddress: "0000:86:00.0", + NumVfs: 0, + TotalVfs: 64, + }, + } + err := k8sClient.Status().Update(context.Background(), nodeState) + Expect(err).ToNot(HaveOccurred()) + + somePolicy := &sriovnetworkv1.SriovNetworkNodePolicy{} + somePolicy.SetNamespace(testNamespace) + somePolicy.SetName("some-policy") + somePolicy.Spec = sriovnetworkv1.SriovNetworkNodePolicySpec{ + NumVfs: 5, + NodeSelector: map[string]string{"node-role.kubernetes.io/worker": ""}, + NicSelector: sriovnetworkv1.SriovNetworkNicSelector{Vendor: "8086"}, + Priority: 20, + } + Expect(k8sClient.Create(context.Background(), somePolicy)).ToNot(HaveOccurred()) + + Eventually(func(g Gomega) { + err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: node.Name}, node) + g.Expect(err).ToNot(HaveOccurred()) + value, exist := node.Labels[consts.SriovDevicePluginLabel] + g.Expect(exist).To(BeTrue()) + g.Expect(value).To(Equal(consts.SriovDevicePluginLabelEnabled)) + }, time.Minute, time.Second).Should(Succeed()) + + delete(node.Labels, "node-role.kubernetes.io/worker") + err = k8sClient.Update(context.Background(), node) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func(g Gomega) { + err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: node.Name}, node) + g.Expect(err).ToNot(HaveOccurred()) + _, exist := node.Labels[consts.SriovDevicePluginLabel] + g.Expect(exist).To(BeFalse()) + }, time.Minute, time.Second).Should(Succeed()) + + Eventually(func(g Gomega) { + err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: node.Name, Namespace: testNamespace}, nodeState) + Expect(err).To(HaveOccurred()) + Expect(errors.IsNotFound(err)).To(BeTrue()) + }, time.Minute, time.Second).Should(Succeed()) + }) + }) +}) diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index c9f21f428..f79614c44 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -44,12 +44,12 @@ import ( machinev1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - apply "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/apply" - consts "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/apply" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms" - render "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/render" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/render" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) @@ -140,7 +140,7 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. return reconcile.Result{}, err } - if err = syncPluginDaemonObjs(ctx, r.Client, r.Scheme, defaultConfig, policyList); err != nil { + if err = syncPluginDaemonObjs(ctx, r.Client, r.Scheme, defaultConfig); err != nil { return reconcile.Result{}, err } diff --git a/controllers/sriovoperatorconfig_controller_test.go b/controllers/sriovoperatorconfig_controller_test.go index 47e4fc09d..4674bd5b9 100644 --- a/controllers/sriovoperatorconfig_controller_test.go +++ b/controllers/sriovoperatorconfig_controller_test.go @@ -2,7 +2,6 @@ package controllers import ( "context" - "fmt" "os" "strings" "sync" @@ -30,7 +29,7 @@ import ( mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" - util "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" ) var _ = Describe("SriovOperatorConfig controller", Ordered, func() { @@ -41,10 +40,6 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { By("Create SriovOperatorConfig controller k8s objs") config := makeDefaultSriovOpConfig() Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) - DeferCleanup(func() { - err := k8sClient.Delete(context.Background(), config) - Expect(err).ToNot(HaveOccurred()) - }) somePolicy := &sriovnetworkv1.SriovNetworkNodePolicy{} somePolicy.SetNamespace(testNamespace) @@ -56,10 +51,6 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { Priority: 20, } Expect(k8sClient.Create(context.Background(), somePolicy)).ToNot(HaveOccurred()) - DeferCleanup(func() { - err := k8sClient.Delete(context.Background(), somePolicy) - Expect(err).ToNot(HaveOccurred()) - }) // setup controller manager By("Setup controller manager") @@ -101,6 +92,27 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { }) Context("When is up", func() { + AfterAll(func() { + err := k8sClient.DeleteAllOf(context.Background(), &corev1.Node{}) + Expect(err).ToNot(HaveOccurred()) + + err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodePolicy{}, client.InNamespace(vars.Namespace)) + Expect(err).ToNot(HaveOccurred()) + + err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodeState{}, client.InNamespace(vars.Namespace)) + Expect(err).ToNot(HaveOccurred()) + + err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovOperatorConfig{}, client.InNamespace(vars.Namespace)) + Expect(err).ToNot(HaveOccurred()) + + operatorConfigList := &sriovnetworkv1.SriovOperatorConfigList{} + Eventually(func(g Gomega) { + err = k8sClient.List(context.Background(), operatorConfigList, &client.ListOptions{Namespace: vars.Namespace}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(len(operatorConfigList.Items)).To(Equal(0)) + }, time.Minute, time.Second).Should(Succeed()) + }) + BeforeEach(func() { var err error config := &sriovnetworkv1.SriovOperatorConfig{} @@ -286,7 +298,6 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { daemonSet := &appsv1.DaemonSet{} Eventually(func() map[string]string { - // By("wait for DaemonSet NodeSelector") err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-network-config-daemon", Namespace: testNamespace}, daemonSet) if err != nil { return nil @@ -295,6 +306,32 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { }, util.APITimeout, util.RetryInterval).Should(Equal(nodeSelector)) }) + It("should be able to update the node selector of sriov-network-device-plugin", func() { + By("specify the configDaemonNodeSelector") + daemonSet := &appsv1.DaemonSet{} + Eventually(func(g Gomega) { + err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet) + g.Expect(err).ToNot(HaveOccurred()) + _, exist := daemonSet.Spec.Template.Spec.NodeSelector["node-role.kubernetes.io/worker"] + g.Expect(exist).To(BeFalse()) + _, exist = daemonSet.Spec.Template.Spec.NodeSelector[consts.SriovDevicePluginLabel] + g.Expect(exist).To(BeTrue()) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + + nodeSelector := map[string]string{"node-role.kubernetes.io/worker": ""} + restore := updateConfigDaemonNodeSelector(nodeSelector) + DeferCleanup(restore) + + Eventually(func(g Gomega) { + err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet) + g.Expect(err).ToNot(HaveOccurred()) + _, exist := daemonSet.Spec.Template.Spec.NodeSelector["node-role.kubernetes.io/worker"] + g.Expect(exist).To(BeTrue()) + _, exist = daemonSet.Spec.Template.Spec.NodeSelector[consts.SriovDevicePluginLabel] + g.Expect(exist).To(BeTrue()) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + It("should be able to do multiple updates to the node selector of sriov-network-config-daemon", func() { By("changing the configDaemonNodeSelector") firstNodeSelector := map[string]string{"labelA": "", "labelB": "", "labelC": ""} @@ -427,8 +464,8 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { metricsDaemonset := appsv1.DaemonSet{} err := util.WaitForNamespacedObject(&metricsDaemonset, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout) g.Expect(err).NotTo(HaveOccurred()) - g.Expect(metricsDaemonset.Spec.Template.Spec.NodeSelector).To((Equal(nodeSelector))) - }).Should(Succeed()) + g.Expect(metricsDaemonset.Spec.Template.Spec.NodeSelector).To(Equal(nodeSelector)) + }, time.Minute, time.Second).Should(Succeed()) }) It("should deploy extra configuration when the Prometheus operator is installed", func() { @@ -521,53 +558,6 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { g.Expect(injectorCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("ca-bundle-2\n"))) }, "1s").Should(Succeed()) }) - - It("should reconcile to a converging state when multiple node policies are set", func() { - By("Creating a consistent number of node policies") - for i := 0; i < 30; i++ { - p := &sriovnetworkv1.SriovNetworkNodePolicy{ - ObjectMeta: metav1.ObjectMeta{Namespace: testNamespace, Name: fmt.Sprintf("p%d", i)}, - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - Priority: 99, - NodeSelector: map[string]string{"foo": fmt.Sprintf("v%d", i)}, - }, - } - err := k8sClient.Create(context.Background(), p) - Expect(err).NotTo(HaveOccurred()) - } - - By("Triggering a the reconcile loop") - config := &sriovnetworkv1.SriovOperatorConfig{} - err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config) - Expect(err).NotTo(HaveOccurred()) - if config.ObjectMeta.Labels == nil { - config.ObjectMeta.Labels = make(map[string]string) - } - config.ObjectMeta.Labels["trigger-test"] = "test-reconcile-daemonset" - err = k8sClient.Update(context.Background(), config) - Expect(err).NotTo(HaveOccurred()) - - By("Wait until device-plugin Daemonset's affinity has been calculated") - var expectedAffinity *corev1.Affinity - - Eventually(func(g Gomega) { - daemonSet := &appsv1.DaemonSet{} - err = k8sClient.Get(context.Background(), types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet) - g.Expect(err).NotTo(HaveOccurred()) - // Wait until the last policy (with NodeSelector foo=v29) has been considered at least one time - g.Expect(daemonSet.Spec.Template.Spec.Affinity.String()).To(ContainSubstring("v29")) - expectedAffinity = daemonSet.Spec.Template.Spec.Affinity - }, "3s", "1s").Should(Succeed()) - - By("Verify device-plugin Daemonset's affinity doesn't change over time") - Consistently(func(g Gomega) { - daemonSet := &appsv1.DaemonSet{} - err = k8sClient.Get(context.Background(), types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(daemonSet.Spec.Template.Spec.Affinity). - To(Equal(expectedAffinity)) - }, "3s", "1s").Should(Succeed()) - }) }) }) diff --git a/controllers/suite_test.go b/controllers/suite_test.go index bc2f13b8e..9d5492e21 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -188,6 +188,13 @@ var _ = BeforeSuite(func() { } Expect(k8sClient.Create(context.Background(), ns)).Should(Succeed()) + sa := &corev1.ServiceAccount{TypeMeta: metav1.TypeMeta{}, + ObjectMeta: metav1.ObjectMeta{ + Name: "default", + Namespace: testNamespace, + }} + Expect(k8sClient.Create(context.Background(), sa)).Should(Succeed()) + // Create openshift Infrastructure infra := &openshiftconfigv1.Infrastructure{ ObjectMeta: metav1.ObjectMeta{ diff --git a/deploy/clusterrole.yaml b/deploy/clusterrole.yaml index e7a596061..e7a84394e 100644 --- a/deploy/clusterrole.yaml +++ b/deploy/clusterrole.yaml @@ -45,12 +45,6 @@ rules: - apiGroups: [""] resources: ["nodes"] verbs: ["get", "list", "watch", "patch", "update"] -- apiGroups: [""] - resources: ["pods"] - verbs: ["*"] -- apiGroups: ["apps"] - resources: ["daemonsets"] - verbs: ["get"] - apiGroups: [ "config.openshift.io" ] resources: [ "infrastructures" ] verbs: [ "get", "list", "watch" ] diff --git a/deploy/role.yaml b/deploy/role.yaml index 0a6c27a21..3bdcdc145 100644 --- a/deploy/role.yaml +++ b/deploy/role.yaml @@ -1,7 +1,6 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: - creationTimestamp: null name: sriov-network-operator rules: - apiGroups: @@ -76,13 +75,10 @@ rules: resources: - pods verbs: - - '*' -- apiGroups: - - apps - resources: - - daemonsets - verbs: - - '*' + - "get" + - "list" + - "watch" + - "delete" - apiGroups: - sriovnetwork.openshift.io resources: diff --git a/deployment/sriov-network-operator-chart/templates/clusterrole.yaml b/deployment/sriov-network-operator-chart/templates/clusterrole.yaml index 7cd8fd014..519d2c05c 100644 --- a/deployment/sriov-network-operator-chart/templates/clusterrole.yaml +++ b/deployment/sriov-network-operator-chart/templates/clusterrole.yaml @@ -49,12 +49,6 @@ rules: - apiGroups: [""] resources: ["nodes"] verbs: ["get", "list", "watch", "patch", "update"] - - apiGroups: [""] - resources: ["pods"] - verbs: ["*"] - - apiGroups: ["apps"] - resources: ["daemonsets"] - verbs: ["get"] - apiGroups: [ "config.openshift.io" ] resources: [ "infrastructures" ] verbs: [ "get", "list", "watch" ] diff --git a/deployment/sriov-network-operator-chart/templates/role.yaml b/deployment/sriov-network-operator-chart/templates/role.yaml index 6551b5775..56e5a5487 100644 --- a/deployment/sriov-network-operator-chart/templates/role.yaml +++ b/deployment/sriov-network-operator-chart/templates/role.yaml @@ -82,13 +82,10 @@ rules: resources: - pods verbs: - - '*' - - apiGroups: - - apps - resources: - - daemonsets - verbs: - - '*' + - "get" + - "list" + - "watch" + - "delete" - apiGroups: - sriovnetwork.openshift.io resources: diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index ba1830f5b..4ce478730 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -70,6 +70,10 @@ const ( MachineConfigPoolPausedAnnotationIdle = "Idle" MachineConfigPoolPausedAnnotationPaused = "Paused" + SriovDevicePluginLabel = "sriovnetwork.openshift.io/device-plugin" + SriovDevicePluginLabelEnabled = "Enabled" + SriovDevicePluginLabelDisabled = "Disabled" + NodeDrainAnnotation = "sriovnetwork.openshift.io/state" NodeStateDrainAnnotation = "sriovnetwork.openshift.io/desired-state" NodeStateDrainAnnotationCurrent = "sriovnetwork.openshift.io/current-state" diff --git a/pkg/utils/cluster.go b/pkg/utils/cluster.go index c5f1f333a..5f9aa7065 100644 --- a/pkg/utils/cluster.go +++ b/pkg/utils/cluster.go @@ -127,16 +127,17 @@ func ObjectHasAnnotation(obj metav1.Object, annoKey string, value string) bool { // AnnotateObject adds annotation to a kubernetes object func AnnotateObject(ctx context.Context, obj client.Object, key, value string, c client.Client) error { - log.Log.V(2).Info("AnnotateObject(): Annotate object", - "objectName", obj.GetName(), - "objectKind", obj.GetObjectKind(), - "annotation", value) newObj := obj.DeepCopyObject().(client.Object) if newObj.GetAnnotations() == nil { newObj.SetAnnotations(map[string]string{}) } if newObj.GetAnnotations()[key] != value { + log.Log.V(2).Info("AnnotateObject(): Annotate object", + "objectName", obj.GetName(), + "objectKind", obj.GetObjectKind(), + "annotationKey", key, + "annotationValue", value) newObj.GetAnnotations()[key] = value patch := client.MergeFrom(obj) err := c.Patch(ctx, @@ -160,3 +161,76 @@ func AnnotateNode(ctx context.Context, nodeName string, key, value string, c cli return AnnotateObject(ctx, node, key, value, c) } + +// labelObject adds label to a kubernetes object +func labelObject(ctx context.Context, obj client.Object, key, value string, c client.Client) error { + newObj := obj.DeepCopyObject().(client.Object) + if newObj.GetLabels() == nil { + newObj.SetLabels(map[string]string{}) + } + + if newObj.GetLabels()[key] != value { + log.Log.V(2).Info("labelObject(): label object", + "objectName", obj.GetName(), + "objectKind", obj.GetObjectKind(), + "labelKey", key, + "labelValue", value) + newObj.GetLabels()[key] = value + patch := client.MergeFrom(obj) + err := c.Patch(ctx, + newObj, patch) + if err != nil { + log.Log.Error(err, "labelObject(): Failed to patch object") + return err + } + } + + return nil +} + +// removeLabelObject remove a label from a kubernetes object +func removeLabelObject(ctx context.Context, obj client.Object, key string, c client.Client) error { + newObj := obj.DeepCopyObject().(client.Object) + if newObj.GetLabels() == nil { + newObj.SetLabels(map[string]string{}) + } + + _, exist := newObj.GetLabels()[key] + if exist { + log.Log.V(2).Info("removeLabelObject(): remove label from object", + "objectName", obj.GetName(), + "objectKind", obj.GetObjectKind(), + "labelKey", key) + delete(newObj.GetLabels(), key) + patch := client.MergeFrom(obj) + err := c.Patch(ctx, + newObj, patch) + if err != nil { + log.Log.Error(err, "removeLabelObject(): Failed to patch object") + return err + } + } + + return nil +} + +// LabelNode add label to a node +func LabelNode(ctx context.Context, nodeName string, key, value string, c client.Client) error { + node := &corev1.Node{} + err := c.Get(context.TODO(), client.ObjectKey{Name: nodeName}, node) + if err != nil { + return err + } + + return labelObject(ctx, node, key, value, c) +} + +func RemoveLabelFromNode(ctx context.Context, nodeName string, key string, c client.Client) error { + node := &corev1.Node{} + err := c.Get(context.TODO(), client.ObjectKey{Name: nodeName}, node) + if err != nil { + return err + } + + return removeLabelObject(ctx, node, key, c) +} From 8950f76a9c3b4661073385520ab0f0f3dc55ef2c Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Thu, 14 Nov 2024 09:02:53 -0500 Subject: [PATCH 33/59] deploy: relax Operator node affinity In the context of Hypershift (Hosted Clusters with OpenShift), where a Nodepool (terminology for a worker Node in HCP) is not a control-plane or a master Node but a worker, we can't force the Operator to be deployed on a master node that doesn't exist. Instead, we want to deploy it on a worker. The proposal here is to relax the rule and use `preferredDuringSchedulingIgnoredDuringExecution` instead so the scheduler will try to find a master node or fallback on other nodes if not found. --- deploy/operator.yaml | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/deploy/operator.yaml b/deploy/operator.yaml index e9fb25de3..f95d80c59 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -20,14 +20,22 @@ spec: spec: affinity: nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: node-role.kubernetes.io/master - operator: Exists - - matchExpressions: - - key: node-role.kubernetes.io/control-plane - operator: Exists + # In the context of Hypershift, the SR-IOV network + # Operator is deployed on Nodepools which are labeled + # as workers. So we relax the node affinity to prefer + # masters/control-plane when possible otherwise we + # schedule where it's possible. + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: "node-role.kubernetes.io/master" + operator: Exists + - weight: 1 + preference: + matchExpressions: + - key: "node-role.kubernetes.io/control-plane" + operator: Exists tolerations: - effect: NoSchedule key: node-role.kubernetes.io/master From f6ea7b47a5ee2309f8b372e814e8cc0640b55fd6 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Mon, 11 Nov 2024 12:15:47 +0200 Subject: [PATCH 34/59] Fix corner case for pausing machine config Signed-off-by: Sebastian Sch --- pkg/platforms/openshift/openshift.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pkg/platforms/openshift/openshift.go b/pkg/platforms/openshift/openshift.go index 3f7d3421c..b55b9c70d 100644 --- a/pkg/platforms/openshift/openshift.go +++ b/pkg/platforms/openshift/openshift.go @@ -228,6 +228,18 @@ func (c *openshiftContext) OpenshiftAfterCompleteDrainNode(ctx context.Context, return false, err } + value, exist := mcp.Annotations[consts.MachineConfigPoolPausedAnnotation] + // if the label doesn't exist we just return true here + // this can be a case where the node was moved to another MCP in the time we start the drain + if !exist { + return true, nil + } + // check if the sriov annotation on mcp is idle + // if the value is idle we just return here + if value == consts.MachineConfigPoolPausedAnnotationIdle { + return true, nil + } + // get all the nodes that belong to this machine config pool to validate this is the last node // request to complete the drain nodesInPool := &corev1.NodeList{} From 8f45dce3db760b2185613835ac79f80167112f65 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Mon, 11 Nov 2024 12:16:23 +0200 Subject: [PATCH 35/59] re-organize drain controller package Signed-off-by: Sebastian Sch --- controllers/drain_controller.go | 191 ++-------------- controllers/drain_controller_helper.go | 288 +++++++++++++++++++++++++ controllers/helper.go | 10 +- pkg/drain/drainer.go | 2 +- 4 files changed, 307 insertions(+), 184 deletions(-) create mode 100644 controllers/drain_controller_helper.go diff --git a/controllers/drain_controller.go b/controllers/drain_controller.go index b96458fa7..5d976a380 100644 --- a/controllers/drain_controller.go +++ b/controllers/drain_controller.go @@ -20,7 +20,6 @@ import ( "context" "fmt" "sync" - "time" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -107,19 +106,23 @@ func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl } // create the drain state annotation if it doesn't exist in the sriovNetworkNodeState object - nodeStateDrainAnnotationCurrent, err := dr.ensureAnnotationExists(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent) + nodeStateDrainAnnotationCurrent, nodeStateExist, err := dr.ensureAnnotationExists(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent) if err != nil { reqLogger.Error(err, "failed to ensure nodeStateDrainAnnotation") return ctrl.Result{}, err } // create the drain state annotation if it doesn't exist in the node object - nodeDrainAnnotation, err := dr.ensureAnnotationExists(ctx, node, constants.NodeDrainAnnotation) + nodeDrainAnnotation, nodeExist, err := dr.ensureAnnotationExists(ctx, node, constants.NodeDrainAnnotation) if err != nil { reqLogger.Error(err, "failed to ensure nodeStateDrainAnnotation") return ctrl.Result{}, err } + // requeue the request if we needed to add any of the annotations + if !nodeExist || !nodeStateExist { + return ctrl.Result{Requeue: true}, nil + } reqLogger.V(2).Info("Drain annotations", "nodeAnnotation", nodeDrainAnnotation, "nodeStateAnnotation", nodeStateDrainAnnotationCurrent) // Check the node request @@ -141,98 +144,14 @@ func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl // doesn't need to drain anymore, so we can stop the drain if nodeStateDrainAnnotationCurrent == constants.DrainComplete || nodeStateDrainAnnotationCurrent == constants.Draining { - completed, err := dr.drainer.CompleteDrainNode(ctx, node) - if err != nil { - reqLogger.Error(err, "failed to complete drain on node") - dr.recorder.Event(nodeNetworkState, - corev1.EventTypeWarning, - "DrainController", - "failed to drain node") - return ctrl.Result{}, err - } - - // if we didn't manage to complete the un drain of the node we retry - if !completed { - reqLogger.Info("complete drain was not completed re queueing the request") - dr.recorder.Event(nodeNetworkState, - corev1.EventTypeWarning, - "DrainController", - "node complete drain was not completed") - // TODO: make this time configurable - return reconcile.Result{RequeueAfter: 5 * time.Second}, nil - } - - // move the node state back to idle - err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainIdle, dr.Client) - if err != nil { - reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.DrainIdle) - return ctrl.Result{}, err - } - - reqLogger.Info("completed the un drain for node") - dr.recorder.Event(nodeNetworkState, - corev1.EventTypeWarning, - "DrainController", - "node un drain completed") - return ctrl.Result{}, nil - } - } else if nodeDrainAnnotation == constants.DrainRequired || nodeDrainAnnotation == constants.RebootRequired { - // this cover the case a node request to drain or reboot - - // nothing to do here we need to wait for the node to move back to idle - if nodeStateDrainAnnotationCurrent == constants.DrainComplete { - reqLogger.Info("node requested a drain and nodeState is on drain completed nothing todo") - return ctrl.Result{}, nil - } - - // we need to start the drain, but first we need to check that we can drain the node - if nodeStateDrainAnnotationCurrent == constants.DrainIdle { - result, err := dr.tryDrainNode(ctx, node) - if err != nil { - reqLogger.Error(err, "failed to check if we can drain the node") - return ctrl.Result{}, err - } - - // in case we need to wait because we just to the max number of draining nodes - if result != nil { - return *result, nil - } - } - - // class the drain function that will also call drain to other platform providers like openshift - drained, err := dr.drainer.DrainNode(ctx, node, nodeDrainAnnotation == constants.RebootRequired) - if err != nil { - reqLogger.Error(err, "error trying to drain the node") - dr.recorder.Event(nodeNetworkState, - corev1.EventTypeWarning, - "DrainController", - "failed to drain node") - return reconcile.Result{}, err - } - - // if we didn't manage to complete the drain of the node we retry - if !drained { - reqLogger.Info("the nodes was not drained re queueing the request") - dr.recorder.Event(nodeNetworkState, - corev1.EventTypeWarning, - "DrainController", - "node drain operation was not completed") - return reconcile.Result{RequeueAfter: 5 * time.Second}, nil - } - - // if we manage to drain we label the node state with drain completed and finish - err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainComplete, dr.Client) - if err != nil { - reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.DrainComplete) - return ctrl.Result{}, err + return dr.handleNodeIdleNodeStateDrainingOrCompleted(ctx, &reqLogger, node, nodeNetworkState) } + } - reqLogger.Info("node drained successfully") - dr.recorder.Event(nodeNetworkState, - corev1.EventTypeWarning, - "DrainController", - "node drain completed") - return ctrl.Result{}, nil + // this cover the case a node request to drain or reboot + if nodeDrainAnnotation == constants.DrainRequired || + nodeDrainAnnotation == constants.RebootRequired { + return dr.handleNodeDrainOrReboot(ctx, &reqLogger, node, nodeNetworkState, nodeDrainAnnotation, nodeStateDrainAnnotationCurrent) } reqLogger.Error(nil, "unexpected node drain annotation") @@ -250,93 +169,17 @@ func (dr *DrainReconcile) getObject(ctx context.Context, req ctrl.Request, objec return true, nil } -func (dr *DrainReconcile) ensureAnnotationExists(ctx context.Context, object client.Object, key string) (string, error) { +func (dr *DrainReconcile) ensureAnnotationExists(ctx context.Context, object client.Object, key string) (string, bool, error) { value, exist := object.GetAnnotations()[key] if !exist { - err := utils.AnnotateObject(ctx, object, constants.NodeStateDrainAnnotationCurrent, constants.DrainIdle, dr.Client) - if err != nil { - return "", err - } - return constants.DrainIdle, nil - } - - return value, nil -} - -func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) (*reconcile.Result, error) { - // configure logs - reqLogger := log.FromContext(ctx) - reqLogger.Info("checkForNodeDrain():") - - //critical section we need to check if we can start the draining - dr.drainCheckMutex.Lock() - defer dr.drainCheckMutex.Unlock() - - // find the relevant node pool - nodePool, nodeList, err := dr.findNodePoolConfig(ctx, node) - if err != nil { - reqLogger.Error(err, "failed to find the pool for the requested node") - return nil, err - } - - // check how many nodes we can drain in parallel for the specific pool - maxUnv, err := nodePool.MaxUnavailable(len(nodeList)) - if err != nil { - reqLogger.Error(err, "failed to calculate max unavailable") - return nil, err - } - - current := 0 - snns := &sriovnetworkv1.SriovNetworkNodeState{} - - var currentSnns *sriovnetworkv1.SriovNetworkNodeState - for _, nodeObj := range nodeList { - err = dr.Get(ctx, client.ObjectKey{Name: nodeObj.GetName(), Namespace: vars.Namespace}, snns) + err := utils.AnnotateObject(ctx, object, key, constants.DrainIdle, dr.Client) if err != nil { - if errors.IsNotFound(err) { - reqLogger.V(2).Info("node doesn't have a sriovNetworkNodePolicy") - continue - } - return nil, err - } - - if snns.GetName() == node.GetName() { - currentSnns = snns.DeepCopy() - } - - if utils.ObjectHasAnnotation(snns, constants.NodeStateDrainAnnotationCurrent, constants.Draining) || - utils.ObjectHasAnnotation(snns, constants.NodeStateDrainAnnotationCurrent, constants.DrainComplete) { - current++ + return "", false, err } + return constants.DrainIdle, false, nil } - reqLogger.Info("Max node allowed to be draining at the same time", "MaxParallelNodeConfiguration", maxUnv) - reqLogger.Info("Count of draining", "drainingNodes", current) - - // if maxUnv is zero this means we drain all the nodes in parallel without a limit - if maxUnv == -1 { - reqLogger.Info("draining all the nodes in parallel") - } else if current >= maxUnv { - // the node requested to be drained, but we are at the limit so we re-enqueue the request - reqLogger.Info("MaxParallelNodeConfiguration limit reached for draining nodes re-enqueue the request") - // TODO: make this time configurable - return &reconcile.Result{RequeueAfter: 5 * time.Second}, nil - } - - if currentSnns == nil { - return nil, fmt.Errorf("failed to find sriov network node state for requested node") - } - - err = utils.AnnotateObject(ctx, currentSnns, constants.NodeStateDrainAnnotationCurrent, constants.Draining, dr.Client) - if err != nil { - reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.Draining) - return nil, err - } - - return nil, nil -} -func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.Node) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) { - return findNodePoolConfig(ctx, node, dr.Client) + return value, true, nil } // SetupWithManager sets up the controller with the Manager. diff --git a/controllers/drain_controller_helper.go b/controllers/drain_controller_helper.go new file mode 100644 index 000000000..c9e6bf550 --- /dev/null +++ b/controllers/drain_controller_helper.go @@ -0,0 +1,288 @@ +package controllers + +import ( + "context" + "fmt" + "time" + + "github.com/go-logr/logr" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" +) + +func (dr *DrainReconcile) handleNodeIdleNodeStateDrainingOrCompleted(ctx context.Context, + reqLogger *logr.Logger, + node *corev1.Node, + nodeNetworkState *sriovnetworkv1.SriovNetworkNodeState) (ctrl.Result, error) { + completed, err := dr.drainer.CompleteDrainNode(ctx, node) + if err != nil { + reqLogger.Error(err, "failed to complete drain on node") + dr.recorder.Event(nodeNetworkState, + corev1.EventTypeWarning, + "DrainController", + "failed to drain node") + return ctrl.Result{}, err + } + + // if we didn't manage to complete the un drain of the node we retry + if !completed { + reqLogger.Info("complete drain was not completed re queueing the request") + dr.recorder.Event(nodeNetworkState, + corev1.EventTypeWarning, + "DrainController", + "node complete drain was not completed") + // TODO: make this time configurable + return reconcile.Result{RequeueAfter: 5 * time.Second}, nil + } + + // move the node state back to idle + err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainIdle, dr.Client) + if err != nil { + reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.DrainIdle) + return ctrl.Result{}, err + } + + reqLogger.Info("completed the un drain for node") + dr.recorder.Event(nodeNetworkState, + corev1.EventTypeWarning, + "DrainController", + "node un drain completed") + return ctrl.Result{}, nil +} + +func (dr *DrainReconcile) handleNodeDrainOrReboot(ctx context.Context, + reqLogger *logr.Logger, + node *corev1.Node, + nodeNetworkState *sriovnetworkv1.SriovNetworkNodeState, + nodeDrainAnnotation, + nodeStateDrainAnnotationCurrent string) (ctrl.Result, error) { + // nothing to do here we need to wait for the node to move back to idle + if nodeStateDrainAnnotationCurrent == constants.DrainComplete { + reqLogger.Info("node requested a drain and nodeState is on drain completed nothing todo") + return ctrl.Result{}, nil + } + + // we need to start the drain, but first we need to check that we can drain the node + if nodeStateDrainAnnotationCurrent == constants.DrainIdle { + result, err := dr.tryDrainNode(ctx, node) + if err != nil { + reqLogger.Error(err, "failed to check if we can drain the node") + return ctrl.Result{}, err + } + + // in case we need to wait because we just to the max number of draining nodes + if result != nil { + return *result, nil + } + } + + // call the drain function that will also call drain to other platform providers like openshift + drained, err := dr.drainer.DrainNode(ctx, node, nodeDrainAnnotation == constants.RebootRequired) + if err != nil { + reqLogger.Error(err, "error trying to drain the node") + dr.recorder.Event(nodeNetworkState, + corev1.EventTypeWarning, + "DrainController", + "failed to drain node") + return reconcile.Result{}, err + } + + // if we didn't manage to complete the drain of the node we retry + if !drained { + reqLogger.Info("the nodes was not drained re queueing the request") + dr.recorder.Event(nodeNetworkState, + corev1.EventTypeWarning, + "DrainController", + "node drain operation was not completed") + return reconcile.Result{RequeueAfter: 5 * time.Second}, nil + } + + // if we manage to drain we label the node state with drain completed and finish + err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainComplete, dr.Client) + if err != nil { + reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.DrainComplete) + return ctrl.Result{}, err + } + + reqLogger.Info("node drained successfully") + dr.recorder.Event(nodeNetworkState, + corev1.EventTypeWarning, + "DrainController", + "node drain completed") + return ctrl.Result{}, nil +} + +func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) (*reconcile.Result, error) { + // configure logs + reqLogger := log.FromContext(ctx) + reqLogger.Info("checkForNodeDrain():") + + //critical section we need to check if we can start the draining + dr.drainCheckMutex.Lock() + defer dr.drainCheckMutex.Unlock() + + // find the relevant node pool + nodePool, nodeList, err := dr.findNodePoolConfig(ctx, node) + if err != nil { + reqLogger.Error(err, "failed to find the pool for the requested node") + return nil, err + } + + // check how many nodes we can drain in parallel for the specific pool + maxUnv, err := nodePool.MaxUnavailable(len(nodeList)) + if err != nil { + reqLogger.Error(err, "failed to calculate max unavailable") + return nil, err + } + + current := 0 + snns := &sriovnetworkv1.SriovNetworkNodeState{} + + var currentSnns *sriovnetworkv1.SriovNetworkNodeState + for _, nodeObj := range nodeList { + err = dr.Get(ctx, client.ObjectKey{Name: nodeObj.GetName(), Namespace: vars.Namespace}, snns) + if err != nil { + if errors.IsNotFound(err) { + reqLogger.V(2).Info("node doesn't have a sriovNetworkNodePolicy") + continue + } + return nil, err + } + + if snns.GetName() == node.GetName() { + currentSnns = snns.DeepCopy() + } + + if utils.ObjectHasAnnotation(snns, constants.NodeStateDrainAnnotationCurrent, constants.Draining) || + utils.ObjectHasAnnotation(snns, constants.NodeStateDrainAnnotationCurrent, constants.DrainComplete) { + current++ + } + } + reqLogger.Info("Max node allowed to be draining at the same time", "MaxParallelNodeConfiguration", maxUnv) + reqLogger.Info("Count of draining", "drainingNodes", current) + + // if maxUnv is zero this means we drain all the nodes in parallel without a limit + if maxUnv == -1 { + reqLogger.Info("draining all the nodes in parallel") + } else if current >= maxUnv { + // the node requested to be drained, but we are at the limit so we re-enqueue the request + reqLogger.Info("MaxParallelNodeConfiguration limit reached for draining nodes re-enqueue the request") + // TODO: make this time configurable + return &reconcile.Result{RequeueAfter: 5 * time.Second}, nil + } + + if currentSnns == nil { + return nil, fmt.Errorf("failed to find sriov network node state for requested node") + } + + err = utils.AnnotateObject(ctx, currentSnns, constants.NodeStateDrainAnnotationCurrent, constants.Draining, dr.Client) + if err != nil { + reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.Draining) + return nil, err + } + + return nil, nil +} + +func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.Node) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) { + logger := log.FromContext(ctx) + logger.Info("findNodePoolConfig():") + // get all the sriov network pool configs + npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{} + err := dr.List(ctx, npcl) + if err != nil { + logger.Error(err, "failed to list sriovNetworkPoolConfig") + return nil, nil, err + } + + selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{} + nodesInPools := map[string]interface{}{} + + for _, npc := range npcl.Items { + // we skip hw offload objects + if npc.Spec.OvsHardwareOffloadConfig.Name != "" { + continue + } + + if npc.Spec.NodeSelector == nil { + npc.Spec.NodeSelector = &metav1.LabelSelector{} + } + + selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector) + if err != nil { + logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector) + return nil, nil, err + } + + if selector.Matches(labels.Set(node.Labels)) { + selectedNpcl = append(selectedNpcl, npc.DeepCopy()) + } + + nodeList := &corev1.NodeList{} + err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector}) + if err != nil { + logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector", + "machineConfigPoolName", npc, + "nodeSelector", npc.Spec.NodeSelector) + return nil, nil, err + } + + for _, nodeName := range nodeList.Items { + nodesInPools[nodeName.Name] = nil + } + } + + if len(selectedNpcl) > 1 { + // don't allow the node to be part of multiple pools + err = fmt.Errorf("node is part of more then one pool") + logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl) + return nil, nil, err + } else if len(selectedNpcl) == 1 { + // found one pool for our node + logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0]) + selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector) + if err != nil { + logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector) + return nil, nil, err + } + + // list all the nodes that are also part of this pool and return them + nodeList := &corev1.NodeList{} + err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector}) + if err != nil { + logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector) + return nil, nil, err + } + + return selectedNpcl[0], nodeList.Items, nil + } else { + // in this case we get all the nodes and remove the ones that already part of any pool + logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultPoolConfig) + nodeList := &corev1.NodeList{} + err = dr.List(ctx, nodeList) + if err != nil { + logger.Error(err, "failed to list all the nodes") + return nil, nil, err + } + + defaultNodeLists := []corev1.Node{} + for _, nodeObj := range nodeList.Items { + if _, exist := nodesInPools[nodeObj.Name]; !exist { + defaultNodeLists = append(defaultNodeLists, nodeObj) + } + } + return defaultPoolConfig, defaultNodeLists, nil + } +} diff --git a/controllers/helper.go b/controllers/helper.go index 58c3ae697..bf918bd3f 100644 --- a/controllers/helper.go +++ b/controllers/helper.go @@ -100,11 +100,7 @@ func (DrainAnnotationPredicate) Update(e event.UpdateEvent) bool { return true } - if oldAnno != newAnno { - return true - } - - return false + return oldAnno != newAnno } type DrainStateAnnotationPredicate struct { @@ -137,10 +133,6 @@ func (DrainStateAnnotationPredicate) Update(e event.UpdateEvent) bool { return true } - if oldAnno != newAnno { - return true - } - return oldAnno != newAnno } diff --git a/pkg/drain/drainer.go b/pkg/drain/drainer.go index a3500dc47..22dbed3df 100644 --- a/pkg/drain/drainer.go +++ b/pkg/drain/drainer.go @@ -98,7 +98,7 @@ func (d *Drainer) DrainNode(ctx context.Context, node *corev1.Node, fullNodeDrai reqLogger.Info("drainNode(): failed to drain node", "error", err) return false, err } - reqLogger.Info("drainNode(): drain complete") + reqLogger.Info("drainNode(): Drain completed") return true, nil } From 8d4ae2086899821b3a0f76786074566390ce70b0 Mon Sep 17 00:00:00 2001 From: Yury Kulazhenkov Date: Thu, 21 Nov 2024 15:59:12 +0200 Subject: [PATCH 36/59] Add waitForDevicesInitialization to systemd service This function ensures that the network devices specified in the configuration are registered and handled by UDEV. Sometimes, the initialization of network devices might take a significant amount of time, and the sriov-config systemd service may start before the devices are fully processed, leading to failure. --- cmd/sriov-network-config-daemon/service.go | 57 +++++++++++++++++++ .../service_test.go | 20 +++++++ pkg/helper/mock/mock_helper.go | 14 +++++ pkg/host/internal/udev/udev.go | 13 +++++ pkg/host/internal/udev/udev_test.go | 10 ++++ pkg/host/mock/mock_host.go | 14 +++++ pkg/host/types/interfaces.go | 3 + 7 files changed, 131 insertions(+) diff --git a/cmd/sriov-network-config-daemon/service.go b/cmd/sriov-network-config-daemon/service.go index 382ad976b..0209583cc 100644 --- a/cmd/sriov-network-config-daemon/service.go +++ b/cmd/sriov-network-config-daemon/service.go @@ -19,6 +19,7 @@ import ( "errors" "fmt" "os" + "time" "github.com/go-logr/logr" "github.com/spf13/cobra" @@ -40,6 +41,12 @@ import ( const ( PhasePre = "pre" PhasePost = "post" + + // InitializationDeviceDiscoveryTimeoutSec constant defines the number of + // seconds to wait for devices to be registered in the system with the expected name. + InitializationDeviceDiscoveryTimeoutSec = 60 + // InitializationDeviceUdevProcessingTimeoutSec constant defines the number of seconds to wait for udev rules to process + InitializationDeviceUdevProcessingTimeoutSec = 60 ) var ( @@ -104,6 +111,8 @@ func runServiceCmd(cmd *cobra.Command, args []string) error { return updateSriovResultErr(setupLog, phaseArg, fmt.Errorf("failed to create hostHelpers: %v", err)) } + waitForDevicesInitialization(setupLog, sriovConf, hostHelpers) + if phaseArg == PhasePre { err = phasePre(setupLog, sriovConf, hostHelpers) } else { @@ -303,3 +312,51 @@ func updateResult(setupLog logr.Logger, result, msg string) error { setupLog.V(0).Info("result file updated", "SyncStatus", sriovResult.SyncStatus, "LastSyncError", msg) return nil } + +// waitForDevicesInitialization should be executed in both the pre and post-networking stages. +// This function ensures that the network devices specified in the configuration are registered +// and handled by UDEV. Sometimes, the initialization of network devices might take a significant +// amount of time, and the sriov-config systemd service may start before the devices are fully +// processed, leading to failure. +// +// To address this, we not only check if the devices are registered with the correct name but also +// wait for the udev event queue to empty. This increases the likelihood that the service will start +// only when the devices are fully initialized. It is required to call this function in the +// "post-networking" phase as well because the OS network manager might change device configurations, +// and we need to ensure these changes are fully processed before starting the post-networking part. +// +// The timeouts used in this function are intentionally kept low to avoid blocking the OS loading +// process for too long in case of any issues. +// +// Note: Currently, this function handles only Baremetal clusters. We do not have evidence that +// this logic is required for virtual clusters. +func waitForDevicesInitialization(setupLog logr.Logger, conf *systemd.SriovConfig, hostHelpers helper.HostHelpersInterface) { + if conf.PlatformType != consts.Baremetal { + // skip waiting on virtual cluster + return + } + // wait for devices from the spec to be registered in the system with expected names + devicesToWait := make(map[string]string, len(conf.Spec.Interfaces)) + for _, d := range conf.Spec.Interfaces { + devicesToWait[d.PciAddress] = d.Name + } + deadline := time.Now().Add(time.Second * time.Duration(InitializationDeviceDiscoveryTimeoutSec)) + for time.Now().Before(deadline) { + for pciAddr, name := range devicesToWait { + if hostHelpers.TryGetInterfaceName(pciAddr) == name { + delete(devicesToWait, pciAddr) + } + } + if len(devicesToWait) == 0 { + break + } + time.Sleep(time.Second) + } + if len(devicesToWait) != 0 { + setupLog.Info("WARNING: some devices were not initialized", "devices", devicesToWait, "timeout", InitializationDeviceDiscoveryTimeoutSec) + } + if err := hostHelpers.WaitUdevEventsProcessed(InitializationDeviceUdevProcessingTimeoutSec); err != nil { + setupLog.Info("WARNING: failed to wait for udev events processing", "reason", err.Error(), + "timeout", InitializationDeviceUdevProcessingTimeoutSec) + } +} diff --git a/cmd/sriov-network-config-daemon/service_test.go b/cmd/sriov-network-config-daemon/service_test.go index 771cc3b1c..8ce4e2c5e 100644 --- a/cmd/sriov-network-config-daemon/service_test.go +++ b/cmd/sriov-network-config-daemon/service_test.go @@ -3,6 +3,7 @@ package main import ( "fmt" + "github.com/go-logr/logr" "github.com/golang/mock/gomock" "github.com/spf13/cobra" "gopkg.in/yaml.v3" @@ -158,6 +159,8 @@ var _ = Describe("Service", func() { "/etc/sriov-operator/sriov-interface-result.yaml": []byte("something"), }, }) + hostHelpers.EXPECT().TryGetInterfaceName("0000:d8:00.0").Return("enp216s0f0np0") + hostHelpers.EXPECT().WaitUdevEventsProcessed(60).Return(nil) hostHelpers.EXPECT().CheckRDMAEnabled().Return(true, nil) hostHelpers.EXPECT().TryEnableTun().Return() hostHelpers.EXPECT().TryEnableVhostNet().Return() @@ -211,6 +214,8 @@ var _ = Describe("Service", func() { "/etc/sriov-operator/sriov-interface-result.yaml": []byte("something"), }, }) + hostHelpers.EXPECT().TryGetInterfaceName("0000:d8:00.0").Return("enp216s0f0np0") + hostHelpers.EXPECT().WaitUdevEventsProcessed(60).Return(nil) hostHelpers.EXPECT().CheckRDMAEnabled().Return(true, nil) hostHelpers.EXPECT().TryEnableTun().Return() hostHelpers.EXPECT().TryEnableVhostNet().Return() @@ -236,6 +241,8 @@ var _ = Describe("Service", func() { "/etc/sriov-operator/sriov-interface-result.yaml": getTestResultFileContent("InProgress", ""), }, }) + hostHelpers.EXPECT().TryGetInterfaceName("0000:d8:00.0").Return("enp216s0f0np0") + hostHelpers.EXPECT().WaitUdevEventsProcessed(60).Return(nil) hostHelpers.EXPECT().DiscoverSriovDevices(hostHelpers).Return([]sriovnetworkv1.InterfaceExt{{ Name: "enp216s0f0np0", }}, nil) @@ -276,4 +283,17 @@ var _ = Describe("Service", func() { testHelpers.GinkgoAssertFileContentsEquals("/etc/sriov-operator/sriov-interface-result.yaml", string(getTestResultFileContent("Failed", "post: unexpected result of the pre phase: Failed, syncError: pretest"))) }) + It("waitForDevicesInitialization", func() { + cfg := &systemd.SriovConfig{Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{ + Interfaces: []sriovnetworkv1.Interface{ + {Name: "name1", PciAddress: "0000:d8:00.0"}, + {Name: "name2", PciAddress: "0000:d8:00.1"}}}} + hostHelpers.EXPECT().TryGetInterfaceName("0000:d8:00.0").Return("other") + hostHelpers.EXPECT().TryGetInterfaceName("0000:d8:00.1").Return("") + hostHelpers.EXPECT().TryGetInterfaceName("0000:d8:00.0").Return("name1") + hostHelpers.EXPECT().TryGetInterfaceName("0000:d8:00.1").Return("") + hostHelpers.EXPECT().TryGetInterfaceName("0000:d8:00.1").Return("name2") + hostHelpers.EXPECT().WaitUdevEventsProcessed(60).Return(nil) + waitForDevicesInitialization(logr.Discard(), cfg, hostHelpers) + }) }) diff --git a/pkg/helper/mock/mock_helper.go b/pkg/helper/mock/mock_helper.go index b413ecdee..8498d5c4d 100644 --- a/pkg/helper/mock/mock_helper.go +++ b/pkg/helper/mock/mock_helper.go @@ -1224,6 +1224,20 @@ func (mr *MockHostHelpersInterfaceMockRecorder) VFIsReady(pciAddr interface{}) * return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "VFIsReady", reflect.TypeOf((*MockHostHelpersInterface)(nil).VFIsReady), pciAddr) } +// WaitUdevEventsProcessed mocks base method. +func (m *MockHostHelpersInterface) WaitUdevEventsProcessed(timeout int) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "WaitUdevEventsProcessed", timeout) + ret0, _ := ret[0].(error) + return ret0 +} + +// WaitUdevEventsProcessed indicates an expected call of WaitUdevEventsProcessed. +func (mr *MockHostHelpersInterfaceMockRecorder) WaitUdevEventsProcessed(timeout interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "WaitUdevEventsProcessed", reflect.TypeOf((*MockHostHelpersInterface)(nil).WaitUdevEventsProcessed), timeout) +} + // WriteCheckpointFile mocks base method. func (m *MockHostHelpersInterface) WriteCheckpointFile(arg0 *v1.SriovNetworkNodeState) error { m.ctrl.T.Helper() diff --git a/pkg/host/internal/udev/udev.go b/pkg/host/internal/udev/udev.go index 3f828bb70..841bc71d7 100644 --- a/pkg/host/internal/udev/udev.go +++ b/pkg/host/internal/udev/udev.go @@ -5,6 +5,7 @@ import ( "os" "path" "path/filepath" + "strconv" "strings" "sigs.k8s.io/controller-runtime/pkg/log" @@ -126,6 +127,18 @@ func (u *udev) LoadUdevRules() error { return nil } +// WaitUdevEventsProcessed calls `udevadm settle“ with provided timeout +// The command watches the udev event queue, and exits if all current events are handled. +func (u *udev) WaitUdevEventsProcessed(timeout int) error { + log.Log.V(2).Info("WaitUdevEventsProcessed()") + _, stderr, err := u.utilsHelper.RunCommand("udevadm", "settle", "-t", strconv.Itoa(timeout)) + if err != nil { + log.Log.Error(err, "WaitUdevEventsProcessed(): failed to wait for udev rules to process", "error", stderr, "timeout", timeout) + return err + } + return nil +} + func (u *udev) addUdevRule(pfPciAddress, ruleName, ruleContent string) error { log.Log.V(2).Info("addUdevRule()", "device", pfPciAddress, "rule", ruleName) rulePath := u.getRuleFolderPath() diff --git a/pkg/host/internal/udev/udev_test.go b/pkg/host/internal/udev/udev_test.go index 4a2e17e7e..fd1107af3 100644 --- a/pkg/host/internal/udev/udev_test.go +++ b/pkg/host/internal/udev/udev_test.go @@ -210,4 +210,14 @@ var _ = Describe("UDEV", func() { Expect(s.LoadUdevRules()).To(MatchError(testError)) }) }) + Context("WaitUdevEventsProcessed", func() { + It("Succeed", func() { + utilsMock.EXPECT().RunCommand("udevadm", "settle", "-t", "10").Return("", "", nil) + Expect(s.WaitUdevEventsProcessed(10)).NotTo(HaveOccurred()) + }) + It("Command Failed", func() { + utilsMock.EXPECT().RunCommand("udevadm", "settle", "-t", "20").Return("", "", testError) + Expect(s.WaitUdevEventsProcessed(20)).To(MatchError(testError)) + }) + }) }) diff --git a/pkg/host/mock/mock_host.go b/pkg/host/mock/mock_host.go index 095d270a9..b7f9271c8 100644 --- a/pkg/host/mock/mock_host.go +++ b/pkg/host/mock/mock_host.go @@ -1038,3 +1038,17 @@ func (mr *MockHostManagerInterfaceMockRecorder) VFIsReady(pciAddr interface{}) * mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "VFIsReady", reflect.TypeOf((*MockHostManagerInterface)(nil).VFIsReady), pciAddr) } + +// WaitUdevEventsProcessed mocks base method. +func (m *MockHostManagerInterface) WaitUdevEventsProcessed(timeout int) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "WaitUdevEventsProcessed", timeout) + ret0, _ := ret[0].(error) + return ret0 +} + +// WaitUdevEventsProcessed indicates an expected call of WaitUdevEventsProcessed. +func (mr *MockHostManagerInterfaceMockRecorder) WaitUdevEventsProcessed(timeout interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "WaitUdevEventsProcessed", reflect.TypeOf((*MockHostManagerInterface)(nil).WaitUdevEventsProcessed), timeout) +} diff --git a/pkg/host/types/interfaces.go b/pkg/host/types/interfaces.go index 6844ee5ae..2ffcb3268 100644 --- a/pkg/host/types/interfaces.go +++ b/pkg/host/types/interfaces.go @@ -164,6 +164,9 @@ type UdevInterface interface { RemoveVfRepresentorUdevRule(pfPciAddress string) error // LoadUdevRules triggers udev rules for network subsystem LoadUdevRules() error + // WaitUdevEventsProcessed calls `udevadm settle“ with provided timeout + // The command watches the udev event queue, and exits if all current events are handled. + WaitUdevEventsProcessed(timeout int) error } type VdpaInterface interface { From 9fd85a2a0bb0aa7f39136925265cc935498903fa Mon Sep 17 00:00:00 2001 From: Ido Heyvi Date: Sun, 24 Nov 2024 11:04:14 +0200 Subject: [PATCH 37/59] Fixing pre-delete-webhook template for missing imagePullSecrets Signed-off-by: Ido Heyvi --- .../templates/pre-delete-webooks.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml b/deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml index 8fc7fa06b..202275607 100644 --- a/deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml +++ b/deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml @@ -14,6 +14,12 @@ spec: template: spec: serviceAccountName: {{ include "sriov-network-operator.fullname" . }} + {{- if .Values.imagePullSecrets }} + imagePullSecrets: + {{- range .Values.imagePullSecrets }} + - name: {{ . }} + {{- end }} + {{- end }} containers: - name: cleanup image: {{ .Values.images.operator }} From 045c858994b3eb1270ec0bf24c2c6a3f38947db7 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Tue, 26 Nov 2024 17:28:50 +0100 Subject: [PATCH 38/59] logging: Increase verbosity of `NeedToUpdateSriov` Knowing the reason that triggered a node reconfiguration is an critical point while debugging production system. Increase the verbosity of all the case where `NeedToUpdateSriov` returns true. Signed-off-by: Andrea Panattoni --- api/v1/helper.go | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/api/v1/helper.go b/api/v1/helper.go index 62ea0d2a5..300992acb 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -261,23 +261,23 @@ func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool { if ifaceSpec.Mtu > 0 { mtu := ifaceSpec.Mtu if mtu > ifaceStatus.Mtu { - log.V(2).Info("NeedToUpdateSriov(): MTU needs update", "desired", mtu, "current", ifaceStatus.Mtu) + log.V(0).Info("NeedToUpdateSriov(): MTU needs update", "desired", mtu, "current", ifaceStatus.Mtu) return true } } currentEswitchMode := GetEswitchModeFromStatus(ifaceStatus) desiredEswitchMode := GetEswitchModeFromSpec(ifaceSpec) if currentEswitchMode != desiredEswitchMode { - log.V(2).Info("NeedToUpdateSriov(): EswitchMode needs update", "desired", desiredEswitchMode, "current", currentEswitchMode) + log.V(0).Info("NeedToUpdateSriov(): EswitchMode needs update", "desired", desiredEswitchMode, "current", currentEswitchMode) return true } if ifaceSpec.NumVfs != ifaceStatus.NumVfs { - log.V(2).Info("NeedToUpdateSriov(): NumVfs needs update", "desired", ifaceSpec.NumVfs, "current", ifaceStatus.NumVfs) + log.V(0).Info("NeedToUpdateSriov(): NumVfs needs update", "desired", ifaceSpec.NumVfs, "current", ifaceStatus.NumVfs) return true } if ifaceStatus.LinkAdminState == consts.LinkAdminStateDown { - log.V(2).Info("NeedToUpdateSriov(): PF link status needs update", "desired to include", "up", "current", ifaceStatus.LinkAdminState) + log.V(0).Info("NeedToUpdateSriov(): PF link status needs update", "desired to include", "up", "current", ifaceStatus.LinkAdminState) return true } @@ -286,24 +286,24 @@ func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool { for _, groupSpec := range ifaceSpec.VfGroups { if IndexInRange(vfStatus.VfID, groupSpec.VfRange) { if vfStatus.Driver == "" { - log.V(2).Info("NeedToUpdateSriov(): Driver needs update - has no driver", + log.V(0).Info("NeedToUpdateSriov(): Driver needs update - has no driver", "desired", groupSpec.DeviceType) return true } if groupSpec.DeviceType != "" && groupSpec.DeviceType != consts.DeviceTypeNetDevice { if groupSpec.DeviceType != vfStatus.Driver { - log.V(2).Info("NeedToUpdateSriov(): Driver needs update", + log.V(0).Info("NeedToUpdateSriov(): Driver needs update", "desired", groupSpec.DeviceType, "current", vfStatus.Driver) return true } } else { if StringInArray(vfStatus.Driver, vars.DpdkDrivers) { - log.V(2).Info("NeedToUpdateSriov(): Driver needs update", + log.V(0).Info("NeedToUpdateSriov(): Driver needs update", "desired", groupSpec.DeviceType, "current", vfStatus.Driver) return true } if vfStatus.Mtu != 0 && groupSpec.Mtu != 0 && vfStatus.Mtu != groupSpec.Mtu { - log.V(2).Info("NeedToUpdateSriov(): VF MTU needs update", + log.V(0).Info("NeedToUpdateSriov(): VF MTU needs update", "vf", vfStatus.VfID, "desired", groupSpec.Mtu, "current", vfStatus.Mtu) return true } @@ -313,20 +313,20 @@ func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool { // Node GUID. We intentionally skip empty Node GUID in vfStatus because this may happen // when the VF is allocated to a workload. if vfStatus.GUID == consts.UninitializedNodeGUID { - log.V(2).Info("NeedToUpdateSriov(): VF GUID needs update", + log.V(0).Info("NeedToUpdateSriov(): VF GUID needs update", "vf", vfStatus.VfID, "current", vfStatus.GUID) return true } } // this is needed to be sure the admin mac address is configured as expected if ifaceSpec.ExternallyManaged { - log.V(2).Info("NeedToUpdateSriov(): need to update the device as it's externally manage", + log.V(0).Info("NeedToUpdateSriov(): need to update the device as it's externally manage", "device", ifaceStatus.PciAddress) return true } } if groupSpec.VdpaType != vfStatus.VdpaType { - log.V(2).Info("NeedToUpdateSriov(): VF VdpaType mismatch", + log.V(0).Info("NeedToUpdateSriov(): VF VdpaType mismatch", "desired", groupSpec.VdpaType, "current", vfStatus.VdpaType) return true } From 5ad4ae9af8a18108bda70f119973a25574b24293 Mon Sep 17 00:00:00 2001 From: Yury Kulazhenkov Date: Mon, 28 Oct 2024 17:01:15 +0200 Subject: [PATCH 39/59] feat: Update controller logic to handle stale SriovNetworkNodeState CRs with delay - Changed the logic in the sriov-network-operator controller to handle stale SriovNetworkNodeState CRs (those with no matching Nodes with daemon). - Introduced a delay (30 minutes by default) before removing stale state CRs to manage scenarios where the user temporarily removes the daemon from the node but does not want to lose the state stored in the SriovNetworkNodeState. - Added the `STALE_NODE_STATE_CLEANUP_DELAY_MINUTES` environment variable to configure the required delay in minutes (default is 30 minutes). --- api/v1/helper.go | 41 ++++++++++ .../sriovnetworknodepolicy_controller.go | 66 ++++++++++++++-- .../sriovnetworknodepolicy_controller_test.go | 78 +++++++++++++++++-- .../templates/operator.yaml | 2 + .../sriov-network-operator-chart/values.yaml | 4 + pkg/consts/constants.go | 8 ++ 6 files changed, 188 insertions(+), 11 deletions(-) diff --git a/api/v1/helper.go b/api/v1/helper.go index 300992acb..1f30be618 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -12,6 +12,7 @@ import ( "sort" "strconv" "strings" + "time" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -1005,3 +1006,43 @@ func GenerateBridgeName(iface *InterfaceExt) string { func NeedToUpdateBridges(bridgeSpec, bridgeStatus *Bridges) bool { return !reflect.DeepEqual(bridgeSpec, bridgeStatus) } + +// SetKeepUntilTime sets an annotation to hold the "keep until time" for the node’s state. +// The "keep until time" specifies the earliest time at which the state object can be removed +// if the daemon's pod is not found on the node. +func (s *SriovNetworkNodeState) SetKeepUntilTime(t time.Time) { + ts := t.Format(time.RFC3339) + annotations := s.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + annotations[consts.NodeStateKeepUntilAnnotation] = ts + s.SetAnnotations(annotations) +} + +// GetKeepUntilTime returns the value that is stored in the "keep until time" annotation. +// The "keep until time" specifies the earliest time at which the state object can be removed +// if the daemon's pod is not found on the node. +// Return zero time instant if annotaion is not found on the object or if it has a wrong format. +func (s *SriovNetworkNodeState) GetKeepUntilTime() time.Time { + t, err := time.Parse(time.RFC3339, s.GetAnnotations()[consts.NodeStateKeepUntilAnnotation]) + if err != nil { + return time.Time{} + } + return t +} + +// ResetKeepUntilTime removes "keep until time" annotation from the state object. +// The "keep until time" specifies the earliest time at which the state object can be removed +// if the daemon's pod is not found on the node. +// Returns true if the value was removed, false otherwise. +func (s *SriovNetworkNodeState) ResetKeepUntilTime() bool { + annotations := s.GetAnnotations() + _, exist := annotations[consts.NodeStateKeepUntilAnnotation] + if !exist { + return false + } + delete(annotations, consts.NodeStateKeepUntilAnnotation) + s.SetAnnotations(annotations) + return true +} diff --git a/controllers/sriovnetworknodepolicy_controller.go b/controllers/sriovnetworknodepolicy_controller.go index 29438b176..f8811ed97 100644 --- a/controllers/sriovnetworknodepolicy_controller.go +++ b/controllers/sriovnetworknodepolicy_controller.go @@ -20,8 +20,10 @@ import ( "context" "encoding/json" "fmt" + "os" "reflect" "sort" + "strconv" "strings" "time" @@ -338,10 +340,7 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con logger.Error(err, "Fail to remove device plugin label from node", "node", ns.Name) return err } - logger.Info("Deleting SriovNetworkNodeState as node with that name doesn't exist", "nodeStateName", ns.Name) - err = r.Delete(ctx, &ns, &client.DeleteOptions{}) - if err != nil { - logger.Error(err, "Fail to Delete", "SriovNetworkNodeState CR:", ns.GetName()) + if err := r.handleStaleNodeState(ctx, &ns); err != nil { return err } } @@ -350,6 +349,56 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con return nil } +// handleStaleNodeState handles stale SriovNetworkNodeState CR (the CR which no longer have a corresponding node with the daemon). +// If the CR has the "keep until time" annotation, indicating the earliest time the state object can be removed, +// this function will compare it to the current time to determine if deletion is permissible and do deletion if allowed. +// If the annotation is absent, the function will create one with a timestamp in future, using either the default or a configured offset. +// If STALE_NODE_STATE_CLEANUP_DELAY_MINUTES env variable is set to 0, removes the CR immediately +func (r *SriovNetworkNodePolicyReconciler) handleStaleNodeState(ctx context.Context, ns *sriovnetworkv1.SriovNetworkNodeState) error { + logger := log.Log.WithName("handleStaleNodeState") + + var delayMinutes int + var err error + + envValue, found := os.LookupEnv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES") + if found { + delayMinutes, err = strconv.Atoi(envValue) + if err != nil || delayMinutes < 0 { + delayMinutes = constants.DefaultNodeStateCleanupDelayMinutes + logger.Error(err, "invalid value in STALE_NODE_STATE_CLEANUP_DELAY_MINUTES env variable, use default delay", + "delay", delayMinutes) + } + } else { + delayMinutes = constants.DefaultNodeStateCleanupDelayMinutes + } + + if delayMinutes != 0 { + now := time.Now().UTC() + keepUntilTime := ns.GetKeepUntilTime() + if keepUntilTime.IsZero() { + keepUntilTime = now.Add(time.Minute * time.Duration(delayMinutes)) + logger.V(2).Info("SriovNetworkNodeState has no matching node, configure cleanup delay for the state object", + "nodeStateName", ns.Name, "delay", delayMinutes, "keepUntilTime", keepUntilTime.String()) + ns.SetKeepUntilTime(keepUntilTime) + if err := r.Update(ctx, ns); err != nil { + logger.Error(err, "Fail to update SriovNetworkNodeState CR", "name", ns.GetName()) + return err + } + return nil + } + if now.Before(keepUntilTime) { + return nil + } + } + // remove the object if delayMinutes is 0 or if keepUntilTime is already passed + logger.Info("Deleting SriovNetworkNodeState as node with that name doesn't exist", "nodeStateName", ns.Name) + if err := r.Delete(ctx, ns, &client.DeleteOptions{}); err != nil { + logger.Error(err, "Fail to delete SriovNetworkNodeState CR", "name", ns.GetName()) + return err + } + return nil +} + func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context.Context, dc *sriovnetworkv1.SriovOperatorConfig, npl *sriovnetworkv1.SriovNetworkNodePolicyList, @@ -375,9 +424,16 @@ func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context return fmt.Errorf("failed to get SriovNetworkNodeState: %v", err) } } else { + keepUntilAnnotationUpdated := found.ResetKeepUntilTime() + if len(found.Status.Interfaces) == 0 { logger.Info("SriovNetworkNodeState Status Interfaces are empty. Skip update of policies in spec", "namespace", ns.Namespace, "name", ns.Name) + if keepUntilAnnotationUpdated { + if err := r.Update(ctx, found); err != nil { + return fmt.Errorf("couldn't update SriovNetworkNodeState: %v", err) + } + } return nil } @@ -420,7 +476,7 @@ func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context // Note(adrianc): we check same ownerReferences since SriovNetworkNodeState // was owned by a default SriovNetworkNodePolicy. if we encounter a descripancy // we need to update. - if reflect.DeepEqual(newVersion.OwnerReferences, found.OwnerReferences) && + if !keepUntilAnnotationUpdated && reflect.DeepEqual(newVersion.OwnerReferences, found.OwnerReferences) && equality.Semantic.DeepEqual(newVersion.Spec, found.Spec) { logger.V(1).Info("SriovNetworkNodeState did not change, not updating") return nil diff --git a/controllers/sriovnetworknodepolicy_controller_test.go b/controllers/sriovnetworknodepolicy_controller_test.go index abdddbc91..7a7b7a8ec 100644 --- a/controllers/sriovnetworknodepolicy_controller_test.go +++ b/controllers/sriovnetworknodepolicy_controller_test.go @@ -3,6 +3,7 @@ package controllers import ( "context" "encoding/json" + "os" "sync" "testing" "time" @@ -11,18 +12,17 @@ import ( . "github.com/onsi/gomega" "github.com/google/go-cmp/cmp" + dptypes "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - dptypes "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types" - sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - v1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" @@ -48,7 +48,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) { { tname: "testVirtioVdpaVirtio", policy: sriovnetworkv1.SriovNetworkNodePolicy{ - Spec: v1.SriovNetworkNodePolicySpec{ + Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ ResourceName: "resourceName", DeviceType: consts.DeviceTypeNetDevice, VdpaType: consts.VdpaTypeVirtio, @@ -67,7 +67,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) { }, { tname: "testVhostVdpaVirtio", policy: sriovnetworkv1.SriovNetworkNodePolicy{ - Spec: v1.SriovNetworkNodePolicySpec{ + Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ ResourceName: "resourceName", DeviceType: consts.DeviceTypeNetDevice, VdpaType: consts.VdpaTypeVhost, @@ -87,7 +87,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) { { tname: "testExcludeTopology", policy: sriovnetworkv1.SriovNetworkNodePolicy{ - Spec: v1.SriovNetworkNodePolicySpec{ + Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ ResourceName: "resourceName", ExcludeTopology: true, }, @@ -138,6 +138,10 @@ var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() { var ctx context.Context BeforeAll(func() { + // disable stale state cleanup delay to check that the controller can cleanup state objects + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "0") + By("Create SriovOperatorConfig controller k8s objs") config := makeDefaultSriovOpConfig() Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) @@ -261,3 +265,65 @@ var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() { }) }) }) + +var _ = Describe("SriovNetworkNodePolicyReconciler", Ordered, func() { + Context("handleStaleNodeState", func() { + var ( + ctx context.Context + r *SriovNetworkNodePolicyReconciler + nodeState *sriovnetworkv1.SriovNetworkNodeState + ) + + BeforeEach(func() { + ctx = context.Background() + scheme := runtime.NewScheme() + utilruntime.Must(sriovnetworkv1.AddToScheme(scheme)) + nodeState = &sriovnetworkv1.SriovNetworkNodeState{ObjectMeta: metav1.ObjectMeta{Name: "node1"}} + r = &SriovNetworkNodePolicyReconciler{Client: fake.NewClientBuilder().WithObjects(nodeState).Build()} + }) + It("should set default delay", func() { + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(time.Now().UTC().Before(nodeState.GetKeepUntilTime())).To(BeTrue()) + }) + It("should remove CR if wait time expired", func() { + nodeState := nodeState.DeepCopy() + nodeState.SetKeepUntilTime(time.Now().UTC().Add(-time.Minute)) + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(errors.IsNotFound(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState))).To(BeTrue()) + }) + It("should keep existing wait time if already set", func() { + nodeState := nodeState.DeepCopy() + nodeState.SetKeepUntilTime(time.Now().UTC().Add(time.Minute)) + testTime := nodeState.GetKeepUntilTime() + r.Update(ctx, nodeState) + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(nodeState.GetKeepUntilTime()).To(Equal(testTime)) + }) + It("non default dealy", func() { + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "60") + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(time.Until(nodeState.GetKeepUntilTime()) > 30*time.Minute).To(BeTrue()) + }) + It("invalid non default delay - should use default", func() { + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "-20") + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(time.Until(nodeState.GetKeepUntilTime()) > 20*time.Minute).To(BeTrue()) + }) + It("should remove CR if delay is zero", func() { + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "0") + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(errors.IsNotFound(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState))).To(BeTrue()) + }) + }) +}) diff --git a/deployment/sriov-network-operator-chart/templates/operator.yaml b/deployment/sriov-network-operator-chart/templates/operator.yaml index 0e89d1959..c2a813fc8 100644 --- a/deployment/sriov-network-operator-chart/templates/operator.yaml +++ b/deployment/sriov-network-operator-chart/templates/operator.yaml @@ -112,6 +112,8 @@ spec: value: {{ .Values.operator.cniBinPath }} - name: CLUSTER_TYPE value: {{ .Values.operator.clusterType }} + - name: STALE_NODE_STATE_CLEANUP_DELAY_MINUTES + value: "{{ .Values.operator.staleNodeStateCleanupDelayMinutes }}" {{- if .Values.operator.admissionControllers.enabled }} - name: ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_SECRET_NAME value: {{ .Values.operator.admissionControllers.certificates.secretNames.operator }} diff --git a/deployment/sriov-network-operator-chart/values.yaml b/deployment/sriov-network-operator-chart/values.yaml index c70d6e323..ec9323bf7 100644 --- a/deployment/sriov-network-operator-chart/values.yaml +++ b/deployment/sriov-network-operator-chart/values.yaml @@ -27,6 +27,10 @@ operator: resourcePrefix: "openshift.io" cniBinPath: "/opt/cni/bin" clusterType: "kubernetes" + # minimal amount of time (in minutes) the operator will wait before removing + # stale SriovNetworkNodeState objects (objects that doesn't match node with the daemon) + # "0" means no extra delay, in this case the CR will be removed by the next reconcilation cycle (may take up to 5 minutes) + staleNodeStateCleanupDelayMinutes: "30" metricsExporter: port: "9110" certificates: diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index 4ce478730..6aadef648 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -93,6 +93,14 @@ const ( MCPPauseAnnotationState = "sriovnetwork.openshift.io/state" MCPPauseAnnotationTime = "sriovnetwork.openshift.io/time" + // NodeStateKeepUntilAnnotation contains name of the "keep until time" annotation for SriovNetworkNodeState object. + // The "keep until time" specifies the earliest time at which the state object can be removed + // if the daemon's pod is not found on the node. + NodeStateKeepUntilAnnotation = "sriovnetwork.openshift.io/keep-state-until" + // DefaultNodeStateCleanupDelayMinutes contains default delay before removing stale SriovNetworkNodeState CRs + // (the CRs that no longer have a corresponding node with the daemon). + DefaultNodeStateCleanupDelayMinutes = 30 + CheckpointFileName = "sno-initial-node-state.json" Unknown = "Unknown" From 093893c81386142fa8691897741d89a696e17af9 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Wed, 4 Dec 2024 14:59:01 +0100 Subject: [PATCH 40/59] e2e: Avoid setting wrong routes for `host-local` IPAM The IPAM configuration: ``` { "type":"host-local", "subnet":"10.10.10.0{"type":"host-local", "subnet":"10.10.10.0/24", "rangeStart":"10.10.10.171", "rangeEnd":"10.10.10.181", "routes":[{"dst":"0.0.0.0/0"}], "gateway":"10.10.10.1" } ``` Can lead to the following pod `ip route` configuration: ``` default via 10.10.10.1 dev net1 default via 10.128.0.1 dev eth0 10.10.10.0/24 dev net1 proto kernel scope link src 10.10.10.172 10.128.0.0/23 dev eth0 proto kernel scope link src 10.128.0.135 10.128.0.0/14 via 10.128.0.1 dev eth0 ``` which causes connectivity issues. Avoid setting default routes to unknown gateways. Signed-off-by: Andrea Panattoni --- test/conformance/tests/test_networkpool.go | 6 ++-- .../tests/test_policy_configuration.go | 8 ++---- test/conformance/tests/test_sriov_operator.go | 28 ++++++------------- 3 files changed, 15 insertions(+), 27 deletions(-) diff --git a/test/conformance/tests/test_networkpool.go b/test/conformance/tests/test_networkpool.go index 47d929013..8e6cc2775 100644 --- a/test/conformance/tests/test_networkpool.go +++ b/test/conformance/tests/test_networkpool.go @@ -181,7 +181,7 @@ var _ = Describe("[sriov] NetworkPool", Ordered, func() { }, Spec: sriovv1.SriovNetworkSpec{ ResourceName: resourceName, - IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181"}`, NetworkNamespace: namespaces.Test, MetaPluginsConfig: `{"type": "rdma"}`, }} @@ -197,7 +197,7 @@ var _ = Describe("[sriov] NetworkPool", Ordered, func() { }, Spec: sriovv1.SriovNetworkSpec{ ResourceName: resourceName, - IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181"}`, NetworkNamespace: namespaces.Test, }} @@ -318,7 +318,7 @@ var _ = Describe("[sriov] NetworkPool", Ordered, func() { }, Spec: sriovv1.SriovNetworkSpec{ ResourceName: resourceName, - IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181"}`, NetworkNamespace: namespaces.Test, }} diff --git a/test/conformance/tests/test_policy_configuration.go b/test/conformance/tests/test_policy_configuration.go index 28a2c95cc..35a079ff2 100644 --- a/test/conformance/tests/test_policy_configuration.go +++ b/test/conformance/tests/test_policy_configuration.go @@ -90,7 +90,7 @@ var _ = Describe("[sriov] operator", Ordered, func() { }, Spec: sriovv1.SriovNetworkSpec{ ResourceName: resourceName, - IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181"}`, NetworkNamespace: namespaces.Test, }} @@ -483,9 +483,7 @@ var _ = Describe("[sriov] operator", Ordered, func() { "type":"host-local", "subnet":"10.10.10.0/24", "rangeStart":"10.10.10.171", - "rangeEnd":"10.10.10.181", - "routes":[{"dst":"0.0.0.0/0"}], - "gateway":"10.10.10.1" + "rangeEnd":"10.10.10.181" }` err = network.CreateSriovNetwork(clients, unusedSriovDevice, sriovNetworkName, namespaces.Test, operatorNamespace, resourceName, ipam) @@ -600,7 +598,7 @@ var _ = Describe("[sriov] operator", Ordered, func() { }, Spec: sriovv1.SriovNetworkSpec{ ResourceName: resourceName, - IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181"}`, NetworkNamespace: namespaces.Test, }} diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index cac6b356c..cdd0edadc 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -388,7 +388,7 @@ var _ = Describe("[sriov] operator", func() { }, Spec: sriovv1.SriovNetworkSpec{ ResourceName: resourceName, - IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181"}`, NetworkNamespace: namespaces.Test, }} err := clients.Create(context.Background(), sriovNetwork) @@ -441,7 +441,7 @@ var _ = Describe("[sriov] operator", func() { }, Spec: sriovv1.SriovNetworkSpec{ ResourceName: resourceName, - IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181"}`, NetworkNamespace: namespaces.Test, }} err := clients.Create(context.Background(), sriovNetwork) @@ -563,9 +563,7 @@ var _ = Describe("[sriov] operator", func() { IPAM: `{"type":"host-local", "subnet":"10.10.10.0/24", "rangeStart":"10.10.10.171", - "rangeEnd":"10.10.10.181", - "routes":[{"dst":"0.0.0.0/0"}], - "gateway":"10.10.10.1"}`, + "rangeEnd":"10.10.10.181"}`, NetworkNamespace: namespaces.Test, }} @@ -608,9 +606,7 @@ var _ = Describe("[sriov] operator", func() { IPAM: `{"type":"host-local", "subnet":"10.10.10.0/24", "rangeStart":"10.10.10.171", - "rangeEnd":"10.10.10.181", - "routes":[{"dst":"0.0.0.0/0"}], - "gateway":"10.10.10.1"}`, + "rangeEnd":"10.10.10.181"}`, NetworkNamespace: namespaces.Test, }} @@ -657,9 +653,7 @@ var _ = Describe("[sriov] operator", func() { IPAM: `{"type":"host-local", "subnet":"10.10.10.0/24", "rangeStart":"10.10.10.171", - "rangeEnd":"10.10.10.181", - "routes":[{"dst":"0.0.0.0/0"}], - "gateway":"10.10.10.1"}`, + "rangeEnd":"10.10.10.181"}`, NetworkNamespace: namespaces.Test, }} @@ -730,9 +724,7 @@ var _ = Describe("[sriov] operator", func() { IPAM: `{"type":"host-local", "subnet":"10.10.10.0/24", "rangeStart":"10.10.10.171", - "rangeEnd":"10.10.10.181", - "routes":[{"dst":"0.0.0.0/0"}], - "gateway":"10.10.10.1"}`, + "rangeEnd":"10.10.10.181"}`, MaxTxRate: &maxTxRate, MinTxRate: &minTxRate, NetworkNamespace: namespaces.Test, @@ -766,9 +758,7 @@ var _ = Describe("[sriov] operator", func() { IPAM: `{"type":"host-local", "subnet":"10.10.10.0/24", "rangeStart":"10.10.10.171", - "rangeEnd":"10.10.10.181", - "routes":[{"dst":"0.0.0.0/0"}], - "gateway":"10.10.10.1"}`, + "rangeEnd":"10.10.10.181"}`, Vlan: 1, VlanQoS: 2, NetworkNamespace: namespaces.Test, @@ -1310,7 +1300,7 @@ var _ = Describe("[sriov] operator", func() { }, Spec: sriovv1.SriovNetworkSpec{ ResourceName: resourceName, - IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181"}`, NetworkNamespace: namespaces.Test, }} @@ -1602,7 +1592,7 @@ var _ = Describe("[sriov] operator", func() { }, Spec: sriovv1.SriovNetworkSpec{ ResourceName: resourceName, - IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181"}`, NetworkNamespace: namespaces.Test, }} From df2a973347e06616286c34d14221a3e37a50f35c Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Thu, 5 Dec 2024 09:53:25 +0100 Subject: [PATCH 41/59] e2e: Improve `findUnusedSriovDevices` errors Signed-off-by: Andrea Panattoni --- test/conformance/tests/test_sriov_operator.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index cdd0edadc..bf7a917d3 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -1846,9 +1846,17 @@ func findUnusedSriovDevices(testNode string, sriovDevices []*sriovv1.InterfaceEx if isDefaultRouteInterface(device.Name, routes) { continue } - stdout, _, err = pod.ExecCommand(clients, createdPod, "ip", "link", "show", device.Name) - Expect(err).ToNot(HaveOccurred()) - Expect(len(stdout)).Should(Not(Equal(0)), "Unable to query link state") + stdout, stderr, err := pod.ExecCommand(clients, createdPod, "ip", "link", "show", device.Name) + if err != nil { + fmt.Printf("Can't query link state for device [%s]: %s", device.Name, err.Error()) + continue + } + + if len(stdout) == 0 { + fmt.Printf("Can't query link state for device [%s]: stderr:[%s]", device.Name, stderr) + continue + } + if strings.Contains(stdout, "master ovs-system") { continue // The interface is not active } From c621f29eefcdd72e018133549eb8751c640e6a21 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Mon, 9 Dec 2024 14:45:19 +0100 Subject: [PATCH 42/59] e2e: Don't allocate TTY when executing commands Executing commands on pods allocating TTY may produce additional, unwanted characters like: ``` \x1b[1;31m2024-12-06T20:33:56.630784Z: ``` We saw this in some CI run like https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/logs/periodic-ci-openshift-release-master-nightly-4.18-e2e-telco5g-sno-cnftests/1865094117650862080/artifacts/e2e-telco5g-sno-cnftests/telco5g-cnf-tests/artifacts/test_results.html Avoid allocating TTY in the automated test suite. Signed-off-by: Andrea Panattoni --- test/util/pod/pod.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/util/pod/pod.go b/test/util/pod/pod.go index 541eda1ad..c99ae00b2 100644 --- a/test/util/pod/pod.go +++ b/test/util/pod/pod.go @@ -117,7 +117,6 @@ func ExecCommand(cs *testclient.ClientSet, pod *corev1.Pod, command ...string) ( Command: command, Stdout: true, Stderr: true, - TTY: true, }, scheme.ParameterCodec) exec, err := remotecommand.NewSPDYExecutor(cs.Config, "POST", req.URL()) @@ -125,10 +124,9 @@ func ExecCommand(cs *testclient.ClientSet, pod *corev1.Pod, command ...string) ( return buf.String(), errbuf.String(), err } - err = exec.Stream(remotecommand.StreamOptions{ + err = exec.StreamWithContext(context.Background(), remotecommand.StreamOptions{ Stdout: &buf, Stderr: &errbuf, - Tty: true, }) if err != nil { return buf.String(), errbuf.String(), err From 7f8eff4823f5b383c43f1ea4622a969bc3750b73 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Tue, 10 Dec 2024 16:54:24 +0100 Subject: [PATCH 43/59] e2e: Use `cat` instead of `more` to read files Signed-off-by: Andrea Panattoni --- test/conformance/tests/test_sriov_operator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index bf7a917d3..c9a60474d 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -937,7 +937,7 @@ var _ = Describe("[sriov] operator", func() { waitForNetAttachDef(sriovNetworkName, namespaces.Test) testPod := createTestPod(node, []string{sriovNetworkName}) - stdout, _, err := pod.ExecCommand(clients, testPod, "more", "/proc/sys/net/ipv4/conf/net1/accept_redirects") + stdout, _, err := pod.ExecCommand(clients, testPod, "cat", "/proc/sys/net/ipv4/conf/net1/accept_redirects") Expect(err).ToNot(HaveOccurred()) Expect(strings.TrimSpace(stdout)).To(Equal("1")) From c33774a4811911e8c94f24854a66c028137793c8 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Thu, 5 Dec 2024 18:00:08 +0100 Subject: [PATCH 44/59] Unit test for RdmaMode Signed-off-by: Andrea Panattoni --- .../sriovnetworknodepolicy_controller_test.go | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/controllers/sriovnetworknodepolicy_controller_test.go b/controllers/sriovnetworknodepolicy_controller_test.go index abdddbc91..d5534f55e 100644 --- a/controllers/sriovnetworknodepolicy_controller_test.go +++ b/controllers/sriovnetworknodepolicy_controller_test.go @@ -260,4 +260,64 @@ var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() { }, time.Minute, time.Second).Should(Succeed()) }) }) + + Context("RdmaMode", func() { + BeforeEach(func() { + Expect( + k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkPoolConfig{}, k8sclient.InNamespace(vars.Namespace)), + ).ToNot(HaveOccurred()) + }) + + It("field is correctly written to the SriovNetworkNodeState", func() { + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{ + "node-role.kubernetes.io/worker": "", + "kubernetes.io/os": "linux", + "test": "", + }, + }} + Expect(k8sClient.Create(ctx, node)).To(Succeed()) + + nodeState := &sriovnetworkv1.SriovNetworkNodeState{} + Eventually(func(g Gomega) { + err := k8sClient.Get(context.TODO(), k8sclient.ObjectKey{Name: "node0", Namespace: testNamespace}, nodeState) + g.Expect(err).ToNot(HaveOccurred()) + }, time.Minute, time.Second).Should(Succeed()) + + nodeState.Status.Interfaces = sriovnetworkv1.InterfaceExts{ + sriovnetworkv1.InterfaceExt{ + Vendor: "8086", + Driver: "i40e", + Mtu: 1500, + Name: "ens803f0", + PciAddress: "0000:86:00.0", + NumVfs: 0, + TotalVfs: 64, + }, + } + err := k8sClient.Status().Update(context.Background(), nodeState) + Expect(err).ToNot(HaveOccurred()) + + poolConfig := &sriovnetworkv1.SriovNetworkPoolConfig{} + poolConfig.SetNamespace(testNamespace) + poolConfig.SetName("test-workers") + poolConfig.Spec = sriovnetworkv1.SriovNetworkPoolConfigSpec{ + NodeSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "test": "", + }, + }, + RdmaMode: "exclusive", + } + Expect(k8sClient.Create(ctx, poolConfig)).To(Succeed()) + + Eventually(func(g Gomega) { + err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: node.Name, Namespace: testNamespace}, nodeState) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(nodeState.Spec.System.RdmaMode).To(Equal("exclusive")) + }).WithPolling(time.Second).WithTimeout(time.Minute).Should(Succeed()) + + }) + }) }) From fdffa3d56fb29a5359d641ed729cfbd01e65847e Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Thu, 5 Dec 2024 18:07:47 +0100 Subject: [PATCH 45/59] Avoid overwriting coverage files for different `CLUSTER_TYPE`s Signed-off-by: Andrea Panattoni --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f5ca7edc8..e70b591ac 100644 --- a/Makefile +++ b/Makefile @@ -229,7 +229,7 @@ test-bindata-scripts: fakechroot fakechroot ./test/scripts/kargs_test.sh test-%: generate manifests envtest - KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir=/tmp -p path)" HOME="$(shell pwd)" go test ./$*/... -coverprofile cover-$*.out -coverpkg ./... -v + KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir=/tmp -p path)" HOME="$(shell pwd)" go test ./$*/... -coverprofile cover-$*-$(CLUSTER_TYPE).out -coverpkg ./... -v GOCOVMERGE = $(BIN_DIR)/gocovmerge gocovmerge: ## Download gocovmerge locally if necessary. From 8e565e9112992ba09724bde9884a77b26afceba4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Dec 2024 00:10:30 +0000 Subject: [PATCH 46/59] build(deps): bump golang.org/x/crypto from 0.23.0 to 0.31.0 Bumps [golang.org/x/crypto](https://github.com/golang/crypto) from 0.23.0 to 0.31.0. - [Commits](https://github.com/golang/crypto/compare/v0.23.0...v0.31.0) --- updated-dependencies: - dependency-name: golang.org/x/crypto dependency-type: indirect ... Signed-off-by: dependabot[bot] --- go.mod | 10 +++++----- go.sum | 20 ++++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/go.mod b/go.mod index 31d70d572..b97dd6239 100644 --- a/go.mod +++ b/go.mod @@ -143,14 +143,14 @@ require ( go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect go.uber.org/multierr v1.11.0 // indirect go4.org v0.0.0-20200104003542-c7e774b10ea0 // indirect - golang.org/x/crypto v0.23.0 // indirect + golang.org/x/crypto v0.31.0 // indirect golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect golang.org/x/mod v0.17.0 // indirect golang.org/x/oauth2 v0.13.0 // indirect - golang.org/x/sync v0.7.0 // indirect - golang.org/x/sys v0.20.0 // indirect - golang.org/x/term v0.20.0 // indirect - golang.org/x/text v0.16.0 // indirect + golang.org/x/sync v0.10.0 // indirect + golang.org/x/sys v0.28.0 // indirect + golang.org/x/term v0.27.0 // indirect + golang.org/x/text v0.21.0 // indirect golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/appengine v1.6.8 // indirect diff --git a/go.sum b/go.sum index 4d8d4c171..905b92b08 100644 --- a/go.sum +++ b/go.sum @@ -491,8 +491,8 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= -golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= +golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -592,8 +592,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -641,14 +641,14 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= -golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= -golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= +golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -658,8 +658,8 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= From 60a777c9174604eec17f1598552b16b55ff5964e Mon Sep 17 00:00:00 2001 From: Ivan Kolodiazhnyi Date: Fri, 13 Dec 2024 16:51:59 +0200 Subject: [PATCH 47/59] Do not configure BlueField NICs in DPU mode Signed-off-by: Ivan Kolodiazhnyi --- pkg/vendors/mellanox/mellanox.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pkg/vendors/mellanox/mellanox.go b/pkg/vendors/mellanox/mellanox.go index 82410c7f8..a29429db4 100644 --- a/pkg/vendors/mellanox/mellanox.go +++ b/pkg/vendors/mellanox/mellanox.go @@ -6,6 +6,7 @@ import ( "strconv" "strings" + "github.com/pkg/errors" kerrors "k8s.io/apimachinery/pkg/util/errors" "sigs.k8s.io/controller-runtime/pkg/log" @@ -162,6 +163,15 @@ func (m *mellanoxHelper) MlxResetFW(pciAddresses []string) error { func (m *mellanoxHelper) MlxConfigFW(attributesToChange map[string]MlxNic) error { log.Log.Info("mellanox-plugin configFW()") for pciAddr, fwArgs := range attributesToChange { + bfMode, err := m.GetMellanoxBlueFieldMode(pciAddr) + if err != nil { + // NIC is not a DPU or mstconfig failed. It's safe to continue FW configuration + log.Log.V(2).Info("mellanox-plugin: configFW(): can't get DPU mode for NIC", "pciAddress", pciAddr) + } + if bfMode == BluefieldDpu { + // Host reboot won't re-load NIC firmware in DPU mode. To apply FW changes power cycle is required or mstfwreset could be used. + return errors.Errorf("NIC %s is in DPU mode. Firmware configuration changes are not supported in this mode.", pciAddr) + } cmdArgs := []string{"-d", pciAddr, "-y", "set"} if fwArgs.EnableSriov { cmdArgs = append(cmdArgs, fmt.Sprintf("%s=True", EnableSriov)) From 0c4edb3577f6065851653f239435c183ffc53add Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Mon, 16 Dec 2024 17:16:56 +0200 Subject: [PATCH 48/59] functest: add retry for rdma functest this is needed because after a reboot on a single node the operator webhook may not be ready Signed-off-by: Sebastian Sch --- test/conformance/tests/test_networkpool.go | 44 ++++++++++------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/test/conformance/tests/test_networkpool.go b/test/conformance/tests/test_networkpool.go index 8e6cc2775..1129dc2ca 100644 --- a/test/conformance/tests/test_networkpool.go +++ b/test/conformance/tests/test_networkpool.go @@ -74,15 +74,13 @@ var _ = Describe("[sriov] NetworkPool", Ordered, func() { Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeExclusive)) By("Checking rdma mode and kernel args") - output, _, err := runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=0 | wc -l") + cmdlineOutput, _, err := runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline") + errDescription := fmt.Sprintf("kernel args are not right, printing current kernel args %s", cmdlineOutput) Expect(err).ToNot(HaveOccurred()) - Expect(strings.HasPrefix(output, "1")).To(BeTrue()) - - output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=1 | wc -l") - Expect(err).ToNot(HaveOccurred()) - Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + Expect(cmdlineOutput).To(ContainSubstring("ib_core.netns_mode=0"), errDescription) + Expect(cmdlineOutput).ToNot(ContainSubstring("ib_core.netns_mode=1"), errDescription) - output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/etc/modprobe.d/sriov_network_operator_modules_config.conf | grep mode=0 | wc -l") + output, _, err := runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/etc/modprobe.d/sriov_network_operator_modules_config.conf | grep mode=0 | wc -l") Expect(err).ToNot(HaveOccurred()) Expect(strings.HasPrefix(output, "1")).To(BeTrue()) @@ -97,21 +95,22 @@ var _ = Describe("[sriov] NetworkPool", Ordered, func() { Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) By("Checking rdma mode and kernel args") - output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=0 | wc -l") + cmdlineOutput, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline") + errDescription = fmt.Sprintf("kernel args are not right, printing current kernel args %s", cmdlineOutput) Expect(err).ToNot(HaveOccurred()) - Expect(strings.HasPrefix(output, "0")).To(BeTrue()) - - output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=1 | wc -l") - Expect(err).ToNot(HaveOccurred()) - Expect(strings.HasPrefix(output, "1")).To(BeTrue()) + Expect(cmdlineOutput).ToNot(ContainSubstring("ib_core.netns_mode=0"), errDescription) + Expect(cmdlineOutput).To(ContainSubstring("ib_core.netns_mode=1"), errDescription) output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/etc/modprobe.d/sriov_network_operator_modules_config.conf | grep mode=1 | wc -l") Expect(err).ToNot(HaveOccurred()) - Expect(strings.HasPrefix(output, "1")).To(BeTrue()) + Expect(strings.HasPrefix(output, "1")).To(BeTrue(), fmt.Sprintf("kernel args are not right, printing current kernel args %s", cmdlineOutput)) By("removing rdma mode configuration") - err = clients.Delete(context.Background(), networkPool) - Expect(err).ToNot(HaveOccurred()) + Eventually(func(g Gomega) { + err = clients.Delete(context.Background(), networkPool) + g.Expect(err).ToNot(HaveOccurred()) + }, 5*time.Minute, 5*time.Second).Should(Succeed()) + WaitForSRIOVStable() err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) @@ -120,17 +119,14 @@ var _ = Describe("[sriov] NetworkPool", Ordered, func() { Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) By("Checking rdma mode and kernel args") - output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=0 | wc -l") - Expect(err).ToNot(HaveOccurred()) - Expect(strings.HasPrefix(output, "0")).To(BeTrue()) - - output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=1 | wc -l") - Expect(err).ToNot(HaveOccurred()) - Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + cmdlineOutput, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline") + errDescription = fmt.Sprintf("kernel args are not right, printing current kernel args %s", cmdlineOutput) + Expect(cmdlineOutput).ToNot(ContainSubstring("ib_core.netns_mode=0"), errDescription) + Expect(cmdlineOutput).ToNot(ContainSubstring("ib_core.netns_mode=1"), errDescription) output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "ls /host/etc/modprobe.d | grep sriov_network_operator_modules_config.conf | wc -l") Expect(err).ToNot(HaveOccurred()) - Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue(), fmt.Sprintf("kernel args are not right, printing current kernel args %s", cmdlineOutput)) }) }) From ed25caaa819dda402735c82443bcb03be9a9f21a Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Thu, 19 Dec 2024 12:05:42 +0100 Subject: [PATCH 49/59] hack/env.sh: move checking of environment variables outside SKIP_VAR_SET block When "$SKIP_VAR_SET" is unset and the environment variables fallback to the default, the check for valid values should be done. Move the check out of the $SKIP_VAR_SET block for that. For the current "hack/env.sh" this maybe not make an actual difference, because probably the code to assign default values will ensure that always valid value are set. Note that the openshift variant of the above code will detect the default via skopeo, which can fail. For that reason, this change makes more sense for openshift. However, also for the current code, performing the same error checking after filling out default values, ensures that the detected values are considered valid Even if that is in fact always the case, it's not entirely trivial to see. --- hack/env.sh | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/hack/env.sh b/hack/env.sh index 64f79212d..58a40ee29 100755 --- a/hack/env.sh +++ b/hack/env.sh @@ -14,22 +14,27 @@ if [ -z $SKIP_VAR_SET ]; then export METRICS_EXPORTER_IMAGE=${METRICS_EXPORTER_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-metrics-exporter} export SRIOV_NETWORK_OPERATOR_IMAGE=${SRIOV_NETWORK_OPERATOR_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-operator} export METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE=${METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE:-gcr.io/kubebuilder/kube-rbac-proxy:v0.15.0} + fail_msg_detect="is empty and failed to detect" else - # ensure that OVS_CNI_IMAGE is set, empty string is a valid value - OVS_CNI_IMAGE=${OVS_CNI_IMAGE:-} - # ensure that RDMA_CNI_IMAGE is set, empty string is a valid value - RDMA_CNI_IMAGE=${RDMA_CNI_IMAGE:-} - METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE=${METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE:-} - [ -z $SRIOV_CNI_IMAGE ] && echo "SRIOV_CNI_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 - [ -z $SRIOV_INFINIBAND_CNI_IMAGE ] && echo "SRIOV_INFINIBAND_CNI_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 - [ -z $SRIOV_DEVICE_PLUGIN_IMAGE ] && echo "SRIOV_DEVICE_PLUGIN_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 - [ -z $NETWORK_RESOURCES_INJECTOR_IMAGE ] && echo "NETWORK_RESOURCES_INJECTOR_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 - [ -z $SRIOV_NETWORK_CONFIG_DAEMON_IMAGE ] && echo "SRIOV_NETWORK_CONFIG_DAEMON_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 - [ -z $SRIOV_NETWORK_WEBHOOK_IMAGE ] && echo "SRIOV_NETWORK_WEBHOOK_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 - [ -z $METRICS_EXPORTER_IMAGE ] && echo "METRICS_EXPORTER_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 - [ -z $SRIOV_NETWORK_OPERATOR_IMAGE ] && echo "SRIOV_NETWORK_OPERATOR_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 + fail_msg_detect="is empty but SKIP_VAR_SET is set" fi +# ensure that OVS_CNI_IMAGE is set, empty string is a valid value +OVS_CNI_IMAGE=${OVS_CNI_IMAGE:-} +# ensure that RDMA_CNI_IMAGE is set, empty string is a valid value +RDMA_CNI_IMAGE=${RDMA_CNI_IMAGE:-} +METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE=${METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE:-} +[ -z $SRIOV_CNI_IMAGE ] && echo "SRIOV_CNI_IMAGE $fail_msg_detect" && exit 1 +[ -z $SRIOV_INFINIBAND_CNI_IMAGE ] && echo "SRIOV_INFINIBAND_CNI_IMAGE $fail_msg_detect" && exit 1 +[ -z $SRIOV_DEVICE_PLUGIN_IMAGE ] && echo "SRIOV_DEVICE_PLUGIN_IMAGE $fail_msg_detect" && exit 1 +[ -z $NETWORK_RESOURCES_INJECTOR_IMAGE ] && echo "NETWORK_RESOURCES_INJECTOR_IMAGE $fail_msg_detect" && exit 1 +[ -z $SRIOV_NETWORK_CONFIG_DAEMON_IMAGE ] && echo "SRIOV_NETWORK_CONFIG_DAEMON_IMAGE $fail_msg_detect" && exit 1 +[ -z $SRIOV_NETWORK_WEBHOOK_IMAGE ] && echo "SRIOV_NETWORK_WEBHOOK_IMAGE $fail_msg_detect" && exit 1 +[ -z $METRICS_EXPORTER_IMAGE ] && echo "METRICS_EXPORTER_IMAGE $fail_msg_detect" && exit 1 +[ -z $SRIOV_NETWORK_OPERATOR_IMAGE ] && echo "SRIOV_NETWORK_OPERATOR_IMAGE $fail_msg_detect" && exit 1 + +unset fail_msg_detect + set -x export RELEASE_VERSION=4.7.0 From 4164d694f258798e4f3bab17b482aed35110e935 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Fri, 20 Dec 2024 12:29:42 +0100 Subject: [PATCH 50/59] bump `golang.org/x/net` to `v0.33.0` [CVE-2024-45338](https://github.com/advisories/GHSA-w32m-9786-jp63) Signed-off-by: Andrea Panattoni --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index b97dd6239..eabcfa6a5 100644 --- a/go.mod +++ b/go.mod @@ -38,7 +38,7 @@ require ( github.com/vishvananda/netlink v1.2.1-beta.2.0.20240221172127-ec7bcb248e94 github.com/vishvananda/netns v0.0.4 go.uber.org/zap v1.25.0 - golang.org/x/net v0.25.0 + golang.org/x/net v0.33.0 golang.org/x/time v0.3.0 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c gopkg.in/yaml.v3 v3.0.1 diff --git a/go.sum b/go.sum index 905b92b08..e33f58ee6 100644 --- a/go.sum +++ b/go.sum @@ -567,8 +567,8 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= -golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= -golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= From d7d2e577caad3133c3ca128b52393c8a7e5722d7 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Mon, 23 Dec 2024 20:32:30 +0200 Subject: [PATCH 51/59] add link state enable on test if we run on a system where the PF is not connected to the network we can still use it for tests but we need the link state to not be auto. Signed-off-by: Sebastian Sch --- test/conformance/tests/test_policy_configuration.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/conformance/tests/test_policy_configuration.go b/test/conformance/tests/test_policy_configuration.go index 35a079ff2..6d5d8b259 100644 --- a/test/conformance/tests/test_policy_configuration.go +++ b/test/conformance/tests/test_policy_configuration.go @@ -602,6 +602,11 @@ var _ = Describe("[sriov] operator", Ordered, func() { NetworkNamespace: namespaces.Test, }} + // for real BM env we enable link state + if !cluster.VirtualCluster() { + sriovNetwork.Spec.LinkState = "enable" + } + // We need this to be able to run the connectivity checks on Mellanox cards if intf.DeviceID == "1015" { sriovNetwork.Spec.SpoofChk = off From 260d7eb7b4d38f2315bdf85c005e79569b1c7519 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Tue, 24 Dec 2024 13:55:56 +0200 Subject: [PATCH 52/59] functest: Fix ip link command output This will fix the issue we sometime see ` : Dump was interrupted and may be inconsistent.\n` https://docs.kernel.org/userspace-api/netlink/intro.html#dump-consistency Signed-off-by: Sebastian Sch --- test/conformance/tests/test_sriov_operator.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index c9a60474d..73c53412c 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -501,9 +501,14 @@ var _ = Describe("[sriov] operator", func() { Expect(err).ToNot(HaveOccurred()) Eventually(func() bool { - stdout, stderr, err := pod.ExecCommand(clients, hostNetPod, "ip", "link", "show") - Expect(err).ToNot(HaveOccurred()) - Expect(stderr).To(Equal("")) + var stdout, stderr string + // Adding a retry because some of the time we get `Dump was interrupted and may be inconsistent.` + // output from the ip link command + Eventually(func(g Gomega) { + stdout, stderr, err = pod.ExecCommand(clients, hostNetPod, "ip", "link", "show") + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(stderr).To(Equal("")) + }, time.Minute, 2*time.Second).Should(Succeed()) found := false for _, line := range strings.Split(stdout, "\n") { From 81c67cce3f56d53fb935835bbeab042ac4ebac6f Mon Sep 17 00:00:00 2001 From: Ivan Kolodiazhnyi Date: Tue, 17 Dec 2024 14:19:38 +0200 Subject: [PATCH 53/59] Skip kernel parameters configuration for Ubuntu It's enouph to configure ib_core module in /etc/moprobe.d/ for Ubuntu OS to change RDMA subsystem mode. Also this commit add OS check into kargs.sh error because 'grubby' isn't available in official Ubuntu repositories. Kernel param configuration support in Ubuntu should be implemented in a separate commit. Signed-off-by: Ivan Kolodiazhnyi --- bindata/scripts/kargs.sh | 8 ++++++++ test/scripts/kargs_test.sh | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/bindata/scripts/kargs.sh b/bindata/scripts/kargs.sh index 8d118456e..07e283600 100755 --- a/bindata/scripts/kargs.sh +++ b/bindata/scripts/kargs.sh @@ -7,6 +7,14 @@ declare -a kargs=( "$@" ) ret=0 args=$(chroot /host/ cat /proc/cmdline) +IS_OS_UBUNTU=true; [[ "$(chroot /host/ grep -i ubuntu /etc/os-release -c)" == "0" ]] && IS_OS_UBUNTU=false + +# Kernel args configuration isn't supported for Ubuntu now, so we shouldn't do anything here +if ${IS_OS_UBUNTU} ; then + echo $ret + exit 0 +fi + if chroot /host/ test -f /run/ostree-booted ; then for t in "${kargs[@]}";do if [[ $command == "add" ]];then diff --git a/test/scripts/kargs_test.sh b/test/scripts/kargs_test.sh index 053bd5200..3e191f230 100755 --- a/test/scripts/kargs_test.sh +++ b/test/scripts/kargs_test.sh @@ -6,6 +6,7 @@ SUT_SCRIPT="${SCRIPTPATH}/../../bindata/scripts/kargs.sh" test_RpmOstree_Add_All_Arguments() { + echo "ID=\"rhel\"" > ${FAKE_HOST}/etc/os-release echo "a b c=d eee=fff" > ${FAKE_HOST}/proc/cmdline touch ${FAKE_HOST}/run/ostree-booted @@ -19,6 +20,7 @@ test_RpmOstree_Add_All_Arguments() { test_RpmOstree_Add_Only_Missing_Arguments() { + echo "ID=\"rhel\"" > ${FAKE_HOST}/etc/os-release echo "a b c=d eee=fff K=L" > ${FAKE_HOST}/proc/cmdline touch ${FAKE_HOST}/run/ostree-booted @@ -31,6 +33,7 @@ test_RpmOstree_Add_Only_Missing_Arguments() { } test_RpmOstree_Delete_All_Arguments() { + echo "ID=\"rhel\"" > ${FAKE_HOST}/etc/os-release echo "a b c=d eee=fff X=Y W=Z" > ${FAKE_HOST}/proc/cmdline touch ${FAKE_HOST}/run/ostree-booted @@ -43,6 +46,7 @@ test_RpmOstree_Delete_All_Arguments() { } test_RpmOstree_Delete_Only_Exist_Arguments() { + echo "ID=\"rhel\"" > ${FAKE_HOST}/etc/os-release echo "a b c=d eee=fff X=Y" > ${FAKE_HOST}/proc/cmdline touch ${FAKE_HOST}/run/ostree-booted From d7988566a950779e64a673dd9c76e3f62098312f Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Wed, 1 Jan 2025 13:41:58 +0200 Subject: [PATCH 54/59] Bump the k8s version we use in the CI system to 1.32.0 Signed-off-by: Sebastian Sch --- hack/run-e2e-conformance-virtual-cluster.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/run-e2e-conformance-virtual-cluster.sh b/hack/run-e2e-conformance-virtual-cluster.sh index d6fa44fd9..353c8528d 100755 --- a/hack/run-e2e-conformance-virtual-cluster.sh +++ b/hack/run-e2e-conformance-virtual-cluster.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -xeo pipefail -cluster_version=${CLUSTER_VERSION:-1.29.3} +cluster_version=${CLUSTER_VERSION:-1.32.0} cluster_name=${CLUSTER_NAME:-virtual} domain_name=$cluster_name.lab From 1a8d74cbeb7e3e0781d072b44ca57b50fc41ffe1 Mon Sep 17 00:00:00 2001 From: Ivan Kolodiazhnyi Date: Mon, 6 Jan 2025 10:43:24 +0200 Subject: [PATCH 55/59] Do not return DPU mode on error Signed-off-by: Ivan Kolodiazhnyi --- pkg/vendors/mellanox/mellanox.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/vendors/mellanox/mellanox.go b/pkg/vendors/mellanox/mellanox.go index a29429db4..65106ab0f 100644 --- a/pkg/vendors/mellanox/mellanox.go +++ b/pkg/vendors/mellanox/mellanox.go @@ -99,27 +99,27 @@ func (m *mellanoxHelper) GetMellanoxBlueFieldMode(PciAddress string) (BlueFieldM internalCPUPageSupplierstatus, exist := mstCurrentData[internalCPUPageSupplier] if !exist { - return 0, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUPageSupplier) + return -1, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUPageSupplier) } internalCPUEswitchManagerStatus, exist := mstCurrentData[internalCPUEswitchManager] if !exist { - return 0, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUEswitchManager) + return -1, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUEswitchManager) } internalCPUIbVportoStatus, exist := mstCurrentData[internalCPUIbVporto] if !exist { - return 0, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUIbVporto) + return -1, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUIbVporto) } internalCPUOffloadEngineStatus, exist := mstCurrentData[internalCPUOffloadEngine] if !exist { - return 0, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUOffloadEngine) + return -1, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUOffloadEngine) } internalCPUModelStatus, exist := mstCurrentData[internalCPUModel] if !exist { - return 0, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUModel) + return -1, fmt.Errorf("failed to find %s in the mstconfig output command", internalCPUModel) } // check for DPU From e49dac0fe85507ef21d2f5d9af4c700e779ebe1b Mon Sep 17 00:00:00 2001 From: Fred Rolland Date: Mon, 6 Jan 2025 09:03:16 +0200 Subject: [PATCH 56/59] Support mtu_request for OVS For using non-default MTU, OVS supports "mtu_request" field when adding a port to the bridge. eg: https://docs.openvswitch.org/en/latest/topics/dpdk/jumbo-frames/ Signed-off-by: Fred Rolland --- api/v1/helper.go | 4 ++++ api/v1/sriovnetworknodepolicy_types.go | 2 ++ api/v1/zz_generated.deepcopy.go | 5 +++++ ...iovnetwork.openshift.io_sriovnetworknodepolicies.yaml | 4 ++++ ...sriovnetwork.openshift.io_sriovnetworknodestates.yaml | 8 ++++++++ ...iovnetwork.openshift.io_sriovnetworknodepolicies.yaml | 4 ++++ ...sriovnetwork.openshift.io_sriovnetworknodestates.yaml | 8 ++++++++ pkg/host/internal/bridge/ovs/models.go | 1 + pkg/host/internal/bridge/ovs/ovs.go | 6 ++++++ pkg/host/internal/bridge/ovs/ovs_test.go | 7 +++++++ pkg/host/internal/bridge/ovs/test_db.ovsschema | 9 +++++++++ 11 files changed, 58 insertions(+) diff --git a/api/v1/helper.go b/api/v1/helper.go index 300992acb..b6baed012 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -483,6 +483,10 @@ func (p *SriovNetworkNodePolicy) ApplyBridgeConfig(state *SriovNetworkNodeState) Interface: p.Spec.Bridge.OVS.Uplink.Interface, }}, } + if p.Spec.Mtu > 0 { + mtu := p.Spec.Mtu + ovsBridge.Uplinks[0].Interface.MTURequest = &mtu + } log.Info("Update bridge for interface", "name", iface.Name, "bridge", ovsBridge.Name) // We need to keep slices with bridges ordered to avoid unnecessary updates in the K8S API. diff --git a/api/v1/sriovnetworknodepolicy_types.go b/api/v1/sriovnetworknodepolicy_types.go index 40c53e0bf..a4417ed65 100644 --- a/api/v1/sriovnetworknodepolicy_types.go +++ b/api/v1/sriovnetworknodepolicy_types.go @@ -125,6 +125,8 @@ type OVSInterfaceConfig struct { ExternalIDs map[string]string `json:"externalIDs,omitempty"` // other_config field in the Interface table in OVSDB OtherConfig map[string]string `json:"otherConfig,omitempty"` + // mtu_request field in the Interface table in OVSDB + MTURequest *int `json:"mtuRequest,omitempty"` } // SriovNetworkNodePolicyStatus defines the observed state of SriovNetworkNodePolicy diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 0209c0573..0d9d3c4cf 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -264,6 +264,11 @@ func (in *OVSInterfaceConfig) DeepCopyInto(out *OVSInterfaceConfig) { (*out)[key] = val } } + if in.MTURequest != nil { + in, out := &in.MTURequest, &out.MTURequest + *out = new(int) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OVSInterfaceConfig. diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml index 36c1050ea..524c5124e 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml @@ -81,6 +81,10 @@ spec: description: external_ids field in the Interface table in OVSDB type: object + mtuRequest: + description: mtu_request field in the Interface table + in OVSDB + type: integer options: additionalProperties: type: string diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml index 31ddf3bf1..7535346ef 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml @@ -102,6 +102,10 @@ spec: description: external_ids field in the Interface table in OVSDB type: object + mtuRequest: + description: mtu_request field in the Interface + table in OVSDB + type: integer options: additionalProperties: type: string @@ -237,6 +241,10 @@ spec: description: external_ids field in the Interface table in OVSDB type: object + mtuRequest: + description: mtu_request field in the Interface + table in OVSDB + type: integer options: additionalProperties: type: string diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml index 36c1050ea..524c5124e 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml @@ -81,6 +81,10 @@ spec: description: external_ids field in the Interface table in OVSDB type: object + mtuRequest: + description: mtu_request field in the Interface table + in OVSDB + type: integer options: additionalProperties: type: string diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml index 31ddf3bf1..7535346ef 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml @@ -102,6 +102,10 @@ spec: description: external_ids field in the Interface table in OVSDB type: object + mtuRequest: + description: mtu_request field in the Interface + table in OVSDB + type: integer options: additionalProperties: type: string @@ -237,6 +241,10 @@ spec: description: external_ids field in the Interface table in OVSDB type: object + mtuRequest: + description: mtu_request field in the Interface + table in OVSDB + type: integer options: additionalProperties: type: string diff --git a/pkg/host/internal/bridge/ovs/models.go b/pkg/host/internal/bridge/ovs/models.go index 4bd356312..234a4ef32 100644 --- a/pkg/host/internal/bridge/ovs/models.go +++ b/pkg/host/internal/bridge/ovs/models.go @@ -36,6 +36,7 @@ type InterfaceEntry struct { Options map[string]string `ovsdb:"options"` ExternalIDs map[string]string `ovsdb:"external_ids"` OtherConfig map[string]string `ovsdb:"other_config"` + MTURequest *int `ovsdb:"mtu_request"` } // PortEntry represents some fields of the object in the Port table diff --git a/pkg/host/internal/bridge/ovs/ovs.go b/pkg/host/internal/bridge/ovs/ovs.go index 7ad8a3e8c..e4bae9f81 100644 --- a/pkg/host/internal/bridge/ovs/ovs.go +++ b/pkg/host/internal/bridge/ovs/ovs.go @@ -156,6 +156,7 @@ func (o *ovs) CreateOVSBridge(ctx context.Context, conf *sriovnetworkv1.OVSConfi Options: conf.Uplinks[0].Interface.Options, ExternalIDs: conf.Uplinks[0].Interface.ExternalIDs, OtherConfig: conf.Uplinks[0].Interface.OtherConfig, + MTURequest: conf.Uplinks[0].Interface.MTURequest, }); err != nil { funcLog.Error(err, "CreateOVSBridge(): failed to add uplink interface to the bridge") return err @@ -592,6 +593,10 @@ func (o *ovs) getCurrentBridgeState(ctx context.Context, dbClient client.Client, OtherConfig: updateMap(knownConfigUplink.Interface.OtherConfig, iface.OtherConfig), }, }} + if iface.MTURequest != nil { + mtu := *iface.MTURequest + currentConfig.Uplinks[0].Interface.MTURequest = &mtu + } return currentConfig, nil } @@ -707,6 +712,7 @@ func getClient(ctx context.Context) (client.Client, error) { &interfaceEntry.Options, &interfaceEntry.ExternalIDs, &interfaceEntry.OtherConfig, + &interfaceEntry.MTURequest, ), client.WithTable(portEntry, &portEntry.UUID, diff --git a/pkg/host/internal/bridge/ovs/ovs_test.go b/pkg/host/internal/bridge/ovs/ovs_test.go index 666fe9218..66be5e8ed 100644 --- a/pkg/host/internal/bridge/ovs/ovs_test.go +++ b/pkg/host/internal/bridge/ovs/ovs_test.go @@ -27,6 +27,7 @@ import ( ) func getManagedBridges() map[string]*sriovnetworkv1.OVSConfigExt { + mtu := 5000 return map[string]*sriovnetworkv1.OVSConfigExt{ "br-0000_d8_00.0": { Name: "br-0000_d8_00.0", @@ -43,6 +44,7 @@ func getManagedBridges() map[string]*sriovnetworkv1.OVSConfigExt { ExternalIDs: map[string]string{"iface_externalID_key": "iface_externalID_value"}, OtherConfig: map[string]string{"iface_otherConfig_key": "iface_otherConfig_value"}, Options: map[string]string{"iface_options_key": "iface_options_value"}, + MTURequest: &mtu, }, }}, }, @@ -83,6 +85,7 @@ func (t *testDBEntries) GetCreateOperations(c client.Client) []ovsdb.Operation { } func getDefaultInitialDBContent() *testDBEntries { + mtu := 5000 iface := &InterfaceEntry{ Name: "enp216s0f0np0", UUID: uuid.NewString(), @@ -90,6 +93,7 @@ func getDefaultInitialDBContent() *testDBEntries { ExternalIDs: map[string]string{"iface_externalID_key": "iface_externalID_value"}, OtherConfig: map[string]string{"iface_otherConfig_key": "iface_otherConfig_value"}, Options: map[string]string{"iface_options_key": "iface_options_value"}, + MTURequest: &mtu, } port := &PortEntry{ Name: "enp216s0f0np0", @@ -156,6 +160,7 @@ func validateDBConfig(dbContent *testDBEntries, conf *sriovnetworkv1.OVSConfigEx Expect(iface.Type).To(Equal(conf.Uplinks[0].Interface.Type)) Expect(iface.OtherConfig).To(Equal(conf.Uplinks[0].Interface.OtherConfig)) Expect(iface.ExternalIDs).To(Equal(conf.Uplinks[0].Interface.ExternalIDs)) + Expect(iface.MTURequest).To(Equal(conf.Uplinks[0].Interface.MTURequest)) } var _ = Describe("OVS", func() { @@ -457,6 +462,7 @@ var _ = Describe("OVS", func() { initialDBContent := getDefaultInitialDBContent() initialDBContent.Bridge[0].ExternalIDs = nil initialDBContent.Bridge[0].OtherConfig = nil + initialDBContent.Interface[0].MTURequest = nil createInitialDBContent(ctx, ovsClient, initialDBContent) conf := getManagedBridges() store.EXPECT().GetManagedOVSBridges().Return(conf, nil) @@ -465,6 +471,7 @@ var _ = Describe("OVS", func() { Expect(ret).To(HaveLen(1)) Expect(ret[0].Bridge.ExternalIDs).To(BeEmpty()) Expect(ret[0].Bridge.OtherConfig).To(BeEmpty()) + Expect(ret[0].Uplinks[0].Interface.MTURequest).To(BeNil()) }) }) Context("RemoveOVSBridge", func() { diff --git a/pkg/host/internal/bridge/ovs/test_db.ovsschema b/pkg/host/internal/bridge/ovs/test_db.ovsschema index 46c59dd0c..2b7a96a9a 100644 --- a/pkg/host/internal/bridge/ovs/test_db.ovsschema +++ b/pkg/host/internal/bridge/ovs/test_db.ovsschema @@ -105,6 +105,15 @@ }, "type": { "type": "string" + }, + "mtu_request":{ + "type": { + "key": { + "minInteger":1, + "type": "integer" + }, + "min": 0 + } } }, "indexes": [ From 688cdde1a8ecf7092dbc1e05c2f98f591a5f8e58 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 9 Jan 2025 15:57:27 +0200 Subject: [PATCH 57/59] extend func-test timeout with the introduction of rdma system mode change on baremetal systems it takes more than 1h that is the default for ginkgo Signed-off-by: Sebastian Sch --- hack/run-e2e-conformance.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/run-e2e-conformance.sh b/hack/run-e2e-conformance.sh index 1e8666098..d6b55d429 100755 --- a/hack/run-e2e-conformance.sh +++ b/hack/run-e2e-conformance.sh @@ -7,4 +7,4 @@ GOPATH="${GOPATH:-$HOME/go}" JUNIT_OUTPUT="${JUNIT_OUTPUT:-/tmp/artifacts}" export PATH=$PATH:$GOPATH/bin -${root}/bin/ginkgo -output-dir=$JUNIT_OUTPUT --junit-report "unit_report.xml" -v "$SUITE" -- -report=$JUNIT_OUTPUT +${root}/bin/ginkgo --timeout=3h -output-dir=$JUNIT_OUTPUT --junit-report "unit_report.xml" -v "$SUITE" -- -report=$JUNIT_OUTPUT From 84d0a6d3e00fd79c0b8e76b72507a5b3b5ad1439 Mon Sep 17 00:00:00 2001 From: Fred Rolland Date: Sun, 12 Jan 2025 14:36:23 +0200 Subject: [PATCH 58/59] ovs: add internal interface When creating a bridge with ovs-vsctl, an internal interface is added by default. The same behavior is added in this commit ovs-vsctl code ref: https://github.com/openvswitch/ovs/blob/main/utilities/ovs-vsctl.c#L1597 Signed-off-by: Fred Rolland --- pkg/host/internal/bridge/ovs/ovs.go | 9 ++++++++ pkg/host/internal/bridge/ovs/ovs_test.go | 29 +++++++++++++++++++----- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/pkg/host/internal/bridge/ovs/ovs.go b/pkg/host/internal/bridge/ovs/ovs.go index e4bae9f81..2e7cf7015 100644 --- a/pkg/host/internal/bridge/ovs/ovs.go +++ b/pkg/host/internal/bridge/ovs/ovs.go @@ -148,6 +148,15 @@ func (o *ovs) CreateOVSBridge(ctx context.Context, conf *sriovnetworkv1.OVSConfi funcLog.Error(err, "CreateOVSBridge(): failed to get bridge after creation") return err } + funcLog.V(2).Info("CreateOVSBridge(): add internal interface to the bridge") + if err := o.addInterface(ctx, dbClient, bridge, &InterfaceEntry{ + Name: bridge.Name, + UUID: uuid.NewString(), + Type: "internal", + }); err != nil { + funcLog.Error(err, "CreateOVSBridge(): failed to add internal interface to the bridge") + return err + } funcLog.V(2).Info("CreateOVSBridge(): add uplink interface to the bridge") if err := o.addInterface(ctx, dbClient, bridge, &InterfaceEntry{ Name: conf.Uplinks[0].Name, diff --git a/pkg/host/internal/bridge/ovs/ovs_test.go b/pkg/host/internal/bridge/ovs/ovs_test.go index 66be5e8ed..290994307 100644 --- a/pkg/host/internal/bridge/ovs/ovs_test.go +++ b/pkg/host/internal/bridge/ovs/ovs_test.go @@ -141,26 +141,43 @@ func createInitialDBContent(ctx context.Context, c client.Client, expectedState func validateDBConfig(dbContent *testDBEntries, conf *sriovnetworkv1.OVSConfigExt) { Expect(dbContent.OpenVSwitch).To(HaveLen(1)) Expect(dbContent.Bridge).To(HaveLen(1)) - Expect(dbContent.Interface).To(HaveLen(1)) - Expect(dbContent.Port).To(HaveLen(1)) + Expect(dbContent.Interface).To(HaveLen(2)) + Expect(dbContent.Port).To(HaveLen(2)) ovs := dbContent.OpenVSwitch[0] br := dbContent.Bridge[0] - port := dbContent.Port[0] - iface := dbContent.Interface[0] + ports := make(map[string]*PortEntry, 0) + interfaces := make(map[string]*InterfaceEntry, 0) + for _, p := range dbContent.Port { + ports[p.Name] = p + } + for _, ifc := range dbContent.Interface { + interfaces[ifc.Name] = ifc + } Expect(ovs.Bridges).To(ContainElement(br.UUID)) Expect(br.Name).To(Equal(conf.Name)) Expect(br.DatapathType).To(Equal(conf.Bridge.DatapathType)) Expect(br.OtherConfig).To(Equal(conf.Bridge.OtherConfig)) Expect(br.ExternalIDs).To(Equal(conf.Bridge.ExternalIDs)) + port, ok := ports[conf.Uplinks[0].Name] + Expect(ok).To(BeTrue()) Expect(br.Ports).To(ContainElement(port.UUID)) - Expect(port.Name).To(Equal(conf.Uplinks[0].Name)) + iface, ok := interfaces[conf.Uplinks[0].Name] + Expect(ok).To(BeTrue()) Expect(port.Interfaces).To(ContainElement(iface.UUID)) - Expect(iface.Name).To(Equal(conf.Uplinks[0].Name)) Expect(iface.Options).To(Equal(conf.Uplinks[0].Interface.Options)) Expect(iface.Type).To(Equal(conf.Uplinks[0].Interface.Type)) Expect(iface.OtherConfig).To(Equal(conf.Uplinks[0].Interface.OtherConfig)) Expect(iface.ExternalIDs).To(Equal(conf.Uplinks[0].Interface.ExternalIDs)) Expect(iface.MTURequest).To(Equal(conf.Uplinks[0].Interface.MTURequest)) + internalPort, ok := ports[conf.Name] + Expect(ok).To(BeTrue()) + internalIface, ok := interfaces[conf.Name] + Expect(ok).To(BeTrue()) + Expect(internalPort.Interfaces).To(ContainElement(internalIface.UUID)) + Expect(internalIface.Options).To(BeNil()) + Expect(internalIface.Type).To(Equal("internal")) + Expect(internalIface.OtherConfig).To(BeNil()) + Expect(internalIface.ExternalIDs).To(BeNil()) } var _ = Describe("OVS", func() { From 009c45fc3736482b82ed172f1c520adede268ef1 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Tue, 31 Dec 2024 14:14:13 +0200 Subject: [PATCH 59/59] Make rdma functional tests robust for single node environments Signed-off-by: Sebastian Sch --- test/conformance/tests/test_networkpool.go | 132 +++++++++++++-------- 1 file changed, 80 insertions(+), 52 deletions(-) diff --git a/test/conformance/tests/test_networkpool.go b/test/conformance/tests/test_networkpool.go index 1129dc2ca..56684aae8 100644 --- a/test/conformance/tests/test_networkpool.go +++ b/test/conformance/tests/test_networkpool.go @@ -27,6 +27,7 @@ import ( var _ = Describe("[sriov] NetworkPool", Ordered, func() { var testNode string var interfaces []*sriovv1.InterfaceExt + var resourceName = "testrdma" BeforeAll(func() { err := namespaces.Create(namespaces.Test, clients) @@ -68,10 +69,12 @@ var _ = Describe("[sriov] NetworkPool", Ordered, func() { By("waiting for operator to finish the configuration") WaitForSRIOVStable() nodeState := &sriovv1.SriovNetworkNodeState{} - err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) - Expect(err).ToNot(HaveOccurred()) - Expect(nodeState.Spec.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeExclusive)) - Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeExclusive)) + Eventually(func(g Gomega) { + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(nodeState.Spec.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeExclusive)) + g.Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeExclusive)) + }, 20*time.Minute, 5*time.Second).Should(Succeed()) By("Checking rdma mode and kernel args") cmdlineOutput, _, err := runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline") @@ -85,14 +88,22 @@ var _ = Describe("[sriov] NetworkPool", Ordered, func() { Expect(strings.HasPrefix(output, "1")).To(BeTrue()) By("configure rdma mode to shared") - networkPool.Spec.RdmaMode = consts.RdmaSubsystemModeShared - err = clients.Update(context.Background(), networkPool) - Expect(err).ToNot(HaveOccurred()) + Eventually(func(g Gomega) { + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, networkPool) + g.Expect(err).ToNot(HaveOccurred()) + networkPool.Spec.RdmaMode = consts.RdmaSubsystemModeShared + err = clients.Update(context.Background(), networkPool) + g.Expect(err).ToNot(HaveOccurred()) + }, time.Minute, 5*time.Second).Should(Succeed()) + + By("waiting for operator to finish the configuration") WaitForSRIOVStable() - err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) - Expect(err).ToNot(HaveOccurred()) - Expect(nodeState.Spec.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) - Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + Eventually(func(g Gomega) { + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(nodeState.Spec.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + g.Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + }, 20*time.Minute, 5*time.Second).Should(Succeed()) By("Checking rdma mode and kernel args") cmdlineOutput, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline") @@ -107,16 +118,20 @@ var _ = Describe("[sriov] NetworkPool", Ordered, func() { By("removing rdma mode configuration") Eventually(func(g Gomega) { + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, networkPool) + g.Expect(err).ToNot(HaveOccurred()) err = clients.Delete(context.Background(), networkPool) g.Expect(err).ToNot(HaveOccurred()) }, 5*time.Minute, 5*time.Second).Should(Succeed()) + By("waiting for operator to finish the configuration") WaitForSRIOVStable() - - err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) - Expect(err).ToNot(HaveOccurred()) - Expect(nodeState.Spec.System.RdmaMode).To(Equal("")) - Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + Eventually(func(g Gomega) { + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(nodeState.Spec.System.RdmaMode).To(Equal("")) + g.Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + }, 20*time.Minute, 5*time.Second).Should(Succeed()) By("Checking rdma mode and kernel args") cmdlineOutput, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline") @@ -150,25 +165,6 @@ var _ = Describe("[sriov] NetworkPool", Ordered, func() { Skip("no mellanox card available to test rdma") } - networkPool := &sriovv1.SriovNetworkPoolConfig{ - ObjectMeta: metav1.ObjectMeta{Name: testNode, Namespace: operatorNamespace}, - Spec: sriovv1.SriovNetworkPoolConfigSpec{RdmaMode: consts.RdmaSubsystemModeExclusive, - NodeSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"kubernetes.io/hostname": testNode}}}} - - err = clients.Create(context.Background(), networkPool) - Expect(err).ToNot(HaveOccurred()) - By("waiting for operator to finish the configuration") - WaitForSRIOVStable() - }) - - It("should run pod with RDMA cni and expose nic metrics and another one without rdma info", func() { - By("creating a policy") - resourceName := "testrdma" - _, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, iface.Name, testNode, 5, resourceName, "netdevice", - func(policy *sriovv1.SriovNetworkNodePolicy) { policy.Spec.IsRdma = true }) - Expect(err).ToNot(HaveOccurred()) - WaitForSRIOVStable() - By("Creating sriov network to use the rdma device") sriovNetwork := &sriovv1.SriovNetwork{ ObjectMeta: metav1.ObjectMeta{ @@ -201,6 +197,32 @@ var _ = Describe("[sriov] NetworkPool", Ordered, func() { Expect(err).ToNot(HaveOccurred()) waitForNetAttachDef("test-nordmanetwork", namespaces.Test) + networkPool := &sriovv1.SriovNetworkPoolConfig{ + ObjectMeta: metav1.ObjectMeta{Name: testNode, Namespace: operatorNamespace}, + Spec: sriovv1.SriovNetworkPoolConfigSpec{RdmaMode: consts.RdmaSubsystemModeExclusive, + NodeSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"kubernetes.io/hostname": testNode}}}} + err = clients.Create(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + + By("waiting for operator to finish the configuration") + WaitForSRIOVStable() + nodeState := &sriovv1.SriovNetworkNodeState{} + Eventually(func(g Gomega) { + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(nodeState.Spec.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeExclusive)) + g.Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeExclusive)) + }, 20*time.Minute, 5*time.Second).Should(Succeed()) + }) + + It("should run pod with RDMA cni and expose nic metrics and another one without rdma info", func() { + By("creating a policy") + _, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, iface.Name, testNode, 5, resourceName, "netdevice", + func(policy *sriovv1.SriovNetworkNodePolicy) { policy.Spec.IsRdma = true }) + Expect(err).ToNot(HaveOccurred()) + + By("waiting for operator to finish the configuration") + WaitForSRIOVStable() podDefinition := pod.DefineWithNetworks([]string{"test-rdmanetwork"}) firstPod, err := clients.Pods(namespaces.Test).Create(context.Background(), podDefinition, metav1.CreateOptions{}) Expect(err).ToNot(HaveOccurred()) @@ -287,6 +309,22 @@ var _ = Describe("[sriov] NetworkPool", Ordered, func() { Skip("no mellanox card available to test rdma") } + By("Creating sriov network to use the rdma device") + sriovNetwork := &sriovv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rdmanetwork", + Namespace: operatorNamespace, + }, + Spec: sriovv1.SriovNetworkSpec{ + ResourceName: resourceName, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181"}`, + NetworkNamespace: namespaces.Test, + }} + + err = clients.Create(context.Background(), sriovNetwork) + Expect(err).ToNot(HaveOccurred()) + waitForNetAttachDef("test-rdmanetwork", namespaces.Test) + networkPool := &sriovv1.SriovNetworkPoolConfig{ ObjectMeta: metav1.ObjectMeta{Name: testNode, Namespace: operatorNamespace}, Spec: sriovv1.SriovNetworkPoolConfigSpec{RdmaMode: consts.RdmaSubsystemModeShared, @@ -296,32 +334,22 @@ var _ = Describe("[sriov] NetworkPool", Ordered, func() { Expect(err).ToNot(HaveOccurred()) By("waiting for operator to finish the configuration") WaitForSRIOVStable() + nodeState := &sriovv1.SriovNetworkNodeState{} + Eventually(func(g Gomega) { + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(nodeState.Spec.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + g.Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + }, 20*time.Minute, 5*time.Second).Should(Succeed()) }) It("should run pod without RDMA cni and not expose nic metrics", func() { By("creating a policy") - resourceName := "testrdma" _, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, iface.Name, testNode, 5, resourceName, "netdevice", func(policy *sriovv1.SriovNetworkNodePolicy) { policy.Spec.IsRdma = true }) Expect(err).ToNot(HaveOccurred()) WaitForSRIOVStable() - By("Creating sriov network to use the rdma device") - sriovNetwork := &sriovv1.SriovNetwork{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-rdmanetwork", - Namespace: operatorNamespace, - }, - Spec: sriovv1.SriovNetworkSpec{ - ResourceName: resourceName, - IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181"}`, - NetworkNamespace: namespaces.Test, - }} - - err = clients.Create(context.Background(), sriovNetwork) - Expect(err).ToNot(HaveOccurred()) - waitForNetAttachDef("test-rdmanetwork", namespaces.Test) - podDefinition := pod.DefineWithNetworks([]string{"test-rdmanetwork"}) firstPod, err := clients.Pods(namespaces.Test).Create(context.Background(), podDefinition, metav1.CreateOptions{}) Expect(err).ToNot(HaveOccurred())