From 85feccd5e18b1e52a5176922d116de5ec5268d4a Mon Sep 17 00:00:00 2001 From: evgenLevin Date: Tue, 3 Sep 2024 16:39:50 -0400 Subject: [PATCH 01/38] Refactor some conformance tests to utilize SRIOV_NODE_AND_DEVICE_NAME_FILTER variable --- test/conformance/tests/test_sriov_operator.go | 6 ++++-- test/util/cluster/cluster.go | 9 +++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index b665c99f0..23f477563 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -1060,9 +1060,11 @@ var _ = Describe("[sriov] operator", func() { findSriovDevice := func(vendorID, deviceID string) (string, sriovv1.InterfaceExt) { for _, node := range sriovInfos.Nodes { - for _, nic := range sriovInfos.States[node].Status.Interfaces { + devices, err := sriovInfos.FindSriovDevices(node) + Expect(err).ToNot(HaveOccurred()) + for _, nic := range devices { if vendorID != "" && deviceID != "" && nic.Vendor == vendorID && nic.DeviceID == deviceID { - return node, nic + return node, *nic } } } diff --git a/test/util/cluster/cluster.go b/test/util/cluster/cluster.go index b79e61ad2..e0cd2e45b 100644 --- a/test/util/cluster/cluster.go +++ b/test/util/cluster/cluster.go @@ -203,9 +203,14 @@ func (n *EnabledNodes) FindOneSriovNodeAndDevice() (string, *sriovv1.InterfaceEx // FindOneVfioSriovDevice retrieves a node with a valid sriov device for vfio func (n *EnabledNodes) FindOneVfioSriovDevice() (string, sriovv1.InterfaceExt) { for _, node := range n.Nodes { - for _, nic := range n.States[node].Status.Interfaces { + devices, err := n.FindSriovDevices(node) + if err != nil { + return "", sriovv1.InterfaceExt{} + } + + for _, nic := range devices { if nic.Vendor == intelVendorID && sriovv1.IsSupportedModel(nic.Vendor, nic.DeviceID) && nic.TotalVfs != 0 { - return node, nic + return node, *nic } } } From 91e04f6a00febea8efad5c0e50511f0327344be1 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Wed, 10 Jul 2024 16:09:20 +0200 Subject: [PATCH 02/38] metrics: Add PrometheusRule for namespaced metrics PrometheusRules allow recording pre-defined queries. Use `sriov_kubepoddevice` metric to add `pod|namespace` pair to the sriov metrics. Feature is enabled via the `METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULE` environment variable. Signed-off-by: Andrea Panattoni --- .../metrics-prometheus-rule.yaml | 38 +++++ controllers/sriovoperatorconfig_controller.go | 1 + .../sriovoperatorconfig_controller_test.go | 10 ++ deploy/operator.yaml | 2 + deploy/role.yaml | 1 + .../sriov-network-operator-chart/README.md | 1 + .../templates/operator.yaml | 2 + .../templates/role.yaml | 1 + .../sriov-network-operator-chart/values.yaml | 1 + hack/run-e2e-conformance-virtual-ocp.sh | 1 + .../tests/test_exporter_metrics.go | 68 ++++++++- ...monitoring.coreos.com_prometheusrules.yaml | 142 ++++++++++++++++++ 12 files changed, 265 insertions(+), 3 deletions(-) create mode 100644 bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml create mode 100644 test/util/crds/monitoring.coreos.com_prometheusrules.yaml diff --git a/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml b/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml new file mode 100644 index 000000000..efd760113 --- /dev/null +++ b/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml @@ -0,0 +1,38 @@ +--- +{{ if and .IsPrometheusOperatorInstalled .PrometheusOperatorDeployRules }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: sriov-vf-rules + namespace: {{.Namespace}} +spec: + groups: + - name: sriov-network-metrics-operator.rules + interval: 30s + rules: + - expr: | + sriov_vf_tx_packets * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_tx_packets + - expr: | + sriov_vf_rx_packets * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_rx_packets + - expr: | + sriov_vf_tx_bytes * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_tx_bytes + - expr: | + sriov_vf_rx_bytes * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_rx_bytes + - expr: | + sriov_vf_tx_dropped * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_tx_dropped + - expr: | + sriov_vf_rx_dropped * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_rx_dropped + - expr: | + sriov_vf_rx_broadcast * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_rx_broadcast + - expr: | + sriov_vf_rx_multicast * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + record: network:sriov_vf_rx_multicast +{{ end }} + diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index 8d028d8eb..1121b623f 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -241,6 +241,7 @@ func (r *SriovOperatorConfigReconciler) syncMetricsExporter(ctx context.Context, data.Data["IsOpenshift"] = r.PlatformHelper.IsOpenshiftCluster() data.Data["IsPrometheusOperatorInstalled"] = strings.ToLower(os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED")) == trueString + data.Data["PrometheusOperatorDeployRules"] = strings.ToLower(os.Getenv("METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES")) == trueString data.Data["PrometheusOperatorServiceAccount"] = os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT") data.Data["PrometheusOperatorNamespace"] = os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE") diff --git a/controllers/sriovoperatorconfig_controller_test.go b/controllers/sriovoperatorconfig_controller_test.go index 582d9781d..cff8ca7c8 100644 --- a/controllers/sriovoperatorconfig_controller_test.go +++ b/controllers/sriovoperatorconfig_controller_test.go @@ -368,6 +368,8 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { It("should deploy extra configuration when the Prometheus operator is installed", func() { DeferCleanup(os.Setenv, "METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED", os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED")) os.Setenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED", "true") + DeferCleanup(os.Setenv, "METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES", os.Getenv("METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES")) + os.Setenv("METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES", "true") err := util.WaitForNamespacedObject(&rbacv1.Role{}, k8sClient, testNamespace, "prometheus-k8s", util.RetryInterval, util.APITimeout) Expect(err).ToNot(HaveOccurred()) @@ -382,6 +384,14 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { Version: "v1", }, client.ObjectKey{Namespace: testNamespace, Name: "sriov-network-metrics-exporter"}) + + assertResourceExists( + schema.GroupVersionKind{ + Group: "monitoring.coreos.com", + Kind: "PrometheusRule", + Version: "v1", + }, + client.ObjectKey{Namespace: testNamespace, Name: "sriov-vf-rules"}) }) }) }) diff --git a/deploy/operator.yaml b/deploy/operator.yaml index b2aa302ab..e9fb25de3 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -78,6 +78,8 @@ spec: value: $METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE - name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED value: "$METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED" + - name: METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES + value: "$METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES" - name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT value: $METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT - name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE diff --git a/deploy/role.yaml b/deploy/role.yaml index a24f13729..d03c47e21 100644 --- a/deploy/role.yaml +++ b/deploy/role.yaml @@ -29,6 +29,7 @@ rules: - monitoring.coreos.com resources: - servicemonitors + - prometheusrules verbs: - get - create diff --git a/deployment/sriov-network-operator-chart/README.md b/deployment/sriov-network-operator-chart/README.md index 40b4e92a9..778726677 100644 --- a/deployment/sriov-network-operator-chart/README.md +++ b/deployment/sriov-network-operator-chart/README.md @@ -89,6 +89,7 @@ We have introduced the following Chart parameters. | `operator.metricsExporter.prometheusOperator.enabled` | bool | false | Wheter the operator shoud configure Prometheus resources or not (e.g. `ServiceMonitors`). | | `operator.metricsExporter.prometheusOperator.serviceAccount` | string | `prometheus-k8s` | The service account used by the Prometheus Operator. This is used to give Prometheus the permission to list resource in the SR-IOV operator namespace | | `operator.metricsExporter.prometheusOperator.namespace` | string | `monitoring` | The namespace where the Prometheus Operator is installed. Setting this variable makes the operator deploy `monitoring.coreos.com` resources. | +| `operator.metricsExporter.prometheusOperator.deployRules` | bool | false | Whether the operator should deploy `PrometheusRules` to scrape namespace version of metrics. | #### Admission Controllers parameters diff --git a/deployment/sriov-network-operator-chart/templates/operator.yaml b/deployment/sriov-network-operator-chart/templates/operator.yaml index 12a9cc660..0e89d1959 100644 --- a/deployment/sriov-network-operator-chart/templates/operator.yaml +++ b/deployment/sriov-network-operator-chart/templates/operator.yaml @@ -83,6 +83,8 @@ spec: {{- if .Values.operator.metricsExporter.prometheusOperator.enabled }} - name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED value: {{ .Values.operator.metricsExporter.prometheusOperator.enabled | quote}} + - name: METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES + value: {{ .Values.operator.metricsExporter.prometheusOperator.deployRules | quote}} - name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT value: {{ .Values.operator.metricsExporter.prometheusOperator.serviceAccount }} - name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE diff --git a/deployment/sriov-network-operator-chart/templates/role.yaml b/deployment/sriov-network-operator-chart/templates/role.yaml index 29cf80cce..28c5ff175 100644 --- a/deployment/sriov-network-operator-chart/templates/role.yaml +++ b/deployment/sriov-network-operator-chart/templates/role.yaml @@ -32,6 +32,7 @@ rules: - monitoring.coreos.com resources: - servicemonitors + - prometheusrules verbs: - get - create diff --git a/deployment/sriov-network-operator-chart/values.yaml b/deployment/sriov-network-operator-chart/values.yaml index e1c31b82d..8c6fea3a1 100644 --- a/deployment/sriov-network-operator-chart/values.yaml +++ b/deployment/sriov-network-operator-chart/values.yaml @@ -35,6 +35,7 @@ operator: enabled: false serviceAccount: "prometheus-k8s" namespace: "monitoring" + deployRules: false admissionControllers: enabled: false certificates: diff --git a/hack/run-e2e-conformance-virtual-ocp.sh b/hack/run-e2e-conformance-virtual-ocp.sh index a61906fb2..0092fcdad 100755 --- a/hack/run-e2e-conformance-virtual-ocp.sh +++ b/hack/run-e2e-conformance-virtual-ocp.sh @@ -191,6 +191,7 @@ export DEV_MODE=TRUE export CLUSTER_HAS_EMULATED_PF=TRUE export OPERATOR_LEADER_ELECTION_ENABLE=true export METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED=true +export METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULE=true export METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT:-"prometheus-k8s"} export METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE:-"openshfit-monitoring"} diff --git a/test/conformance/tests/test_exporter_metrics.go b/test/conformance/tests/test_exporter_metrics.go index e81f63067..804432f04 100644 --- a/test/conformance/tests/test_exporter_metrics.go +++ b/test/conformance/tests/test_exporter_metrics.go @@ -2,9 +2,12 @@ package tests import ( "context" + "encoding/json" "fmt" + "net/url" "strings" + sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/cluster" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/discovery" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces" @@ -13,6 +16,7 @@ import ( dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" + "github.com/prometheus/common/model" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -22,6 +26,8 @@ import ( ) var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { + var node string + var nic *sriovv1.InterfaceExt BeforeAll(func() { if cluster.VirtualCluster() { @@ -48,13 +54,11 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { Expect(err).ToNot(HaveOccurred()) WaitForSRIOVStable() - }) - It("collects metrics regarding receiving traffic via VF", func() { sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace) Expect(err).ToNot(HaveOccurred()) - node, nic, err := sriovInfos.FindOneSriovNodeAndDevice() + node, nic, err = sriovInfos.FindOneSriovNodeAndDevice() Expect(err).ToNot(HaveOccurred()) By("Using device " + nic.Name + " on node " + node) @@ -65,7 +69,13 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { Expect(err).ToNot(HaveOccurred()) waitForNetAttachDef("test-me-network", namespaces.Test) + DeferCleanup(namespaces.Clean, operatorNamespace, namespaces.Test, clients, discovery.Enabled()) + }) + + It("collects metrics regarding receiving traffic via VF", func() { + pod := createTestPod(node, []string{"test-me-network"}) + DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) ips, err := network.GetSriovNicIPs(pod, "net1") Expect(err).ToNot(HaveOccurred()) @@ -88,6 +98,28 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { Expect(finalRxPackets).Should(BeNumerically(">", initialRxPackets)) }) + It("PrometheusRule should provide namespaced metrics", func() { + pod := createTestPod(node, []string{"test-me-network"}) + DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) + + namespacedMetricNames := []string{ + "network:sriov_vf_rx_bytes", + "network:sriov_vf_tx_bytes", + "network:sriov_vf_rx_packets", + "network:sriov_vf_tx_packets", + "network:sriov_vf_rx_dropped", + "network:sriov_vf_tx_dropped", + "network:sriov_vf_rx_broadcast", + "network:sriov_vf_rx_multicast", + } + + Eventually(func(g Gomega) { + for _, metricName := range namespacedMetricNames { + values := runPromQLQuery(fmt.Sprintf(`%s{namespace="%s",pod="%s"}`, metricName, pod.Namespace, pod.Name)) + g.Expect(values).ToNot(BeEmpty(), "no value for metric %s", metricName) + } + }, "40s", "1s").Should(Succeed()) + }) }) func getMetricsForNode(nodeName string) map[string]*dto.MetricFamily { @@ -185,3 +217,33 @@ func areLabelsMatching(labels []*dto.LabelPair, labelsToMatch map[string]string) return true } + +func runPromQLQuery(query string) model.Vector { + prometheusPods, err := clients.Pods("").List(context.Background(), metav1.ListOptions{ + LabelSelector: "app.kubernetes.io/component=prometheus", + }) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + ExpectWithOffset(1, prometheusPods.Items).ToNot(HaveLen(0), "At least one Prometheus operator pod expected") + + prometheusPod := prometheusPods.Items[0] + + url := fmt.Sprintf("localhost:9090/api/v1/query?%s", (url.Values{"query": []string{query}}).Encode()) + command := []string{"curl", url} + stdout, stderr, err := pod.ExecCommand(clients, &prometheusPod, command...) + ExpectWithOffset(1, err).ToNot(HaveOccurred(), + "promQL query failed: [%s/%s] command: [%v]\nstdout: %s\nstderr: %s", prometheusPod.Namespace, prometheusPod.Name, command, stdout, stderr) + + result := struct { + Status string `json:"status"` + Data struct { + ResultType string `json:"resultType"` + Result model.Vector `json:"result"` + } `json:"data"` + }{} + + json.Unmarshal([]byte(stdout), &result) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + ExpectWithOffset(1, result.Status).To(Equal("success"), "cURL for [%s] failed: %s", url, stdout) + + return result.Data.Result +} diff --git a/test/util/crds/monitoring.coreos.com_prometheusrules.yaml b/test/util/crds/monitoring.coreos.com_prometheusrules.yaml new file mode 100644 index 000000000..6c16e8396 --- /dev/null +++ b/test/util/crds/monitoring.coreos.com_prometheusrules.yaml @@ -0,0 +1,142 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.15.0 + operator.prometheus.io/version: 0.75.1 + name: prometheusrules.monitoring.coreos.com +spec: + group: monitoring.coreos.com + names: + categories: + - prometheus-operator + kind: PrometheusRule + listKind: PrometheusRuleList + plural: prometheusrules + shortNames: + - promrule + singular: prometheusrule + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: |- + The `PrometheusRule` custom resource definition (CRD) defines [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) and [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) rules to be evaluated by `Prometheus` or `ThanosRuler` objects. + + + `Prometheus` and `ThanosRuler` objects select `PrometheusRule` objects using label and namespace selectors. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Specification of desired alerting rule definitions for Prometheus. + properties: + groups: + description: Content of Prometheus rule file + items: + description: RuleGroup is a list of sequentially evaluated recording + and alerting rules. + properties: + interval: + description: Interval determines how often rules in the group + are evaluated. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + limit: + description: |- + Limit the number of alerts an alerting rule and series a recording + rule can produce. + Limit is supported starting with Prometheus >= 2.31 and Thanos Ruler >= 0.24. + type: integer + name: + description: Name of the rule group. + minLength: 1 + type: string + partial_response_strategy: + description: |- + PartialResponseStrategy is only used by ThanosRuler and will + be ignored by Prometheus instances. + More info: https://github.com/thanos-io/thanos/blob/main/docs/components/rule.md#partial-response + pattern: ^(?i)(abort|warn)?$ + type: string + rules: + description: List of alerting and recording rules. + items: + description: |- + Rule describes an alerting or recording rule + See Prometheus documentation: [alerting](https://www.prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) or [recording](https://www.prometheus.io/docs/prometheus/latest/configuration/recording_rules/#recording-rules) rule + properties: + alert: + description: |- + Name of the alert. Must be a valid label value. + Only one of `record` and `alert` must be set. + type: string + annotations: + additionalProperties: + type: string + description: |- + Annotations to add to each alert. + Only valid for alerting rules. + type: object + expr: + anyOf: + - type: integer + - type: string + description: PromQL expression to evaluate. + x-kubernetes-int-or-string: true + for: + description: Alerts are considered firing once they have + been returned for this long. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + keep_firing_for: + description: KeepFiringFor defines how long an alert will + continue firing after the condition that triggered it + has cleared. + minLength: 1 + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + labels: + additionalProperties: + type: string + description: Labels to add or overwrite. + type: object + record: + description: |- + Name of the time series to output to. Must be a valid metric name. + Only one of `record` and `alert` must be set. + type: string + required: + - expr + type: object + type: array + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + required: + - spec + type: object + served: true + storage: true From b49cf15cb3718a5834dd26cbc6ea1ecfc6014383 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Wed, 28 Aug 2024 17:27:02 +0200 Subject: [PATCH 03/38] metrics: Add permissions to remove monitor objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the `metricsExporter` feature is turned off, deployed resources should be removed. These changes fix the error: ``` │ 2024-08-28T14:07:57.699760017Z ERROR controller/controller.go:266 Reconciler error {"controller": "sriovoperatorconfig", "controllerGroup": "sriovnetwork.openshift.io", "controllerKind": "SriovOperatorConfig", "SriovOperatorConfig": {"name":"default","namespace":"openshift-sriov-network-operator"}, │ │ "namespace": "openshift-sriov-network-operator", "name": "default", "reconcileID": "fa841c50-dbb8-4c4c-9ddd-b98624fd2a24", "error": "failed to delete object &{map[apiVersion:monitoring.coreos.com/v1 kind:ServiceMonitor metadata:map[name:sriov-network-metrics-exporter namespace:openshift-sriov-network-operator] │ │ spec:map[endpoints:[map[bearerTokenFile:/var/run/secrets/kubernetes.io/serviceaccount/token honorLabels:true interval:30s port:sriov-network-metrics scheme:https tlsConfig:map[caFile:/etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt insecureSkipVerify:false serverName:sriov-network-metrics-expor │ │ ter-service.openshift-sriov-network-operator.svc]]] namespaceSelector:map[matchNames:[openshift-sriov-network-operator]] selector:map[matchLabels:map[name:sriov-network-metrics-exporter-service]]]]} with err: could not delete object (monitoring.coreos.com/v1, Kind=ServiceMonitor) openshift-sriov-network-operato │ │ r/sriov-network-metrics-exporter: servicemonitors.monitoring.coreos.com \"sriov-network-metrics-exporter\" is forbidden: User \"system:serviceaccount:openshift-sriov-network-operator:sriov-network-operator\" cannot delete resource \"servicemonitors\" in API group \"monitoring.coreos.com\" in the namespace \"ope │ │ nshift-sriov-network-operator\""} ``` Signed-off-by: Andrea Panattoni --- deploy/role.yaml | 2 ++ .../sriov-network-operator-chart/templates/role.yaml | 2 ++ test/conformance/tests/test_sriov_operator.go | 8 ++++++++ 3 files changed, 12 insertions(+) diff --git a/deploy/role.yaml b/deploy/role.yaml index d03c47e21..0a6c27a21 100644 --- a/deploy/role.yaml +++ b/deploy/role.yaml @@ -33,6 +33,8 @@ rules: verbs: - get - create + - update + - delete - apiGroups: - apps resourceNames: diff --git a/deployment/sriov-network-operator-chart/templates/role.yaml b/deployment/sriov-network-operator-chart/templates/role.yaml index 28c5ff175..6551b5775 100644 --- a/deployment/sriov-network-operator-chart/templates/role.yaml +++ b/deployment/sriov-network-operator-chart/templates/role.yaml @@ -36,6 +36,8 @@ rules: verbs: - get - create + - update + - delete - apiGroups: - apps resourceNames: diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index c1db065b2..729bf683b 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -305,6 +305,14 @@ var _ = Describe("[sriov] operator", func() { g.Expect(err).ToNot(HaveOccurred()) }).Should(Succeed()) }) + + It("should remove ServiceMonitor when the feature is turned off", func() { + setFeatureFlag("metricsExporter", false) + Eventually(func(g Gomega) { + _, err := clients.ServiceMonitors(operatorNamespace).Get(context.Background(), "sriov-network-metrics-exporter", metav1.GetOptions{}) + g.Expect(k8serrors.IsNotFound(err)).To(BeTrue()) + }).Should(Succeed()) + }) }) }) From 6aedb8c57270e641babae3faf4746dda876a1bbf Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Wed, 18 Sep 2024 19:30:06 +0300 Subject: [PATCH 04/38] Fix merge annotation function if the current obj as annotation and the updated doesn't we still want to add the ones from the current object Signed-off-by: Sebastian Sch --- pkg/apply/merge.go | 4 ++-- pkg/apply/merge_test.go | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/pkg/apply/merge.go b/pkg/apply/merge.go index d2ed6d4fb..9ee859f83 100644 --- a/pkg/apply/merge.go +++ b/pkg/apply/merge.go @@ -220,7 +220,7 @@ func mergeAnnotations(current, updated *uns.Unstructured) { for k, v := range updatedAnnotations { curAnnotations[k] = v } - if len(curAnnotations) > 1 { + if len(curAnnotations) > 0 { updated.SetAnnotations(curAnnotations) } } @@ -238,7 +238,7 @@ func mergeLabels(current, updated *uns.Unstructured) { for k, v := range updatedLabels { curLabels[k] = v } - if len(curLabels) > 1 { + if len(curLabels) > 0 { updated.SetLabels(curLabels) } } diff --git a/pkg/apply/merge_test.go b/pkg/apply/merge_test.go index f6ad89289..ecf2fd98d 100644 --- a/pkg/apply/merge_test.go +++ b/pkg/apply/merge_test.go @@ -107,6 +107,38 @@ metadata: })) } +func TestMergeOne(t *testing.T) { + g := NewGomegaWithT(t) + + cur := UnstructuredFromYaml(t, ` +apiVersion: apps/v1 +kind: Deployment +metadata: + name: d1 + labels: + label-c: cur + annotations: + annotation-c: cur`) + + upd := UnstructuredFromYaml(t, ` +apiVersion: apps/v1 +kind: Deployment +metadata: + name: d1`) + + // this mutates updated + err := MergeObjectForUpdate(cur, upd) + g.Expect(err).NotTo(HaveOccurred()) + + g.Expect(upd.GetLabels()).To(Equal(map[string]string{ + "label-c": "cur", + })) + + g.Expect(upd.GetAnnotations()).To(Equal(map[string]string{ + "annotation-c": "cur", + })) +} + func TestMergeNilCur(t *testing.T) { g := NewGomegaWithT(t) From 644fcf2a4cb2194d1e3e8bc20be2f80690fd0693 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Thu, 19 Sep 2024 08:39:09 +0200 Subject: [PATCH 05/38] Delete webhooks when SriovOperatorConfig is deleted When a user deletes the default SriovOperatorConfig resource and tries to recreate it afterwards, the operator webhooks returns the error: ``` Error from server (InternalError): error when creating "/tmp/opconfig.yml": Internal error occurred: failed calling webhook "operator-webhook.sriovnetwork.openshift.io": failed to call webhook: Post "https://operator-webhook-service.openshift-sriov-network-operator.svc:443/validating-custom-resource?timeout=10s": service "operator-webhook-service" not found ``` as the webhook configuration is still present, while the Service and the DaemonSet has been deleted. Delete all the webhook configurations when the user deletes the default SriovOperatorConfig Signed-off-by: Andrea Panattoni --- controllers/sriovoperatorconfig_controller.go | 32 +++++++++- .../sriovoperatorconfig_controller_test.go | 61 ++++++++++++++++--- 2 files changed, 83 insertions(+), 10 deletions(-) diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index 1121b623f..377ebd2de 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -18,6 +18,7 @@ package controllers import ( "context" + "errors" "fmt" "os" "sort" @@ -28,6 +29,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" uns "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" kscheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" @@ -81,7 +83,9 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. if err != nil { if apierrors.IsNotFound(err) { logger.Info("default SriovOperatorConfig object not found. waiting for creation.") - return reconcile.Result{}, nil + + err := r.deleteAllWebhooks(ctx) + return reconcile.Result{}, err } // Error reading the object - requeue the request. logger.Error(err, "Failed to get default SriovOperatorConfig object") @@ -457,3 +461,29 @@ func (r SriovOperatorConfigReconciler) setLabelInsideObject(ctx context.Context, return nil } + +func (r SriovOperatorConfigReconciler) deleteAllWebhooks(ctx context.Context) error { + var err error + obj := &uns.Unstructured{} + obj.SetGroupVersionKind(schema.GroupVersionKind{Group: "admissionregistration.k8s.io", Kind: "MutatingWebhookConfiguration", Version: "v1"}) + obj.SetName(consts.OperatorWebHookName) + err = errors.Join( + err, r.deleteWebhookObject(ctx, obj), + ) + + obj = &uns.Unstructured{} + obj.SetGroupVersionKind(schema.GroupVersionKind{Group: "admissionregistration.k8s.io", Kind: "ValidatingWebhookConfiguration", Version: "v1"}) + obj.SetName(consts.OperatorWebHookName) + err = errors.Join( + err, r.deleteWebhookObject(ctx, obj), + ) + + obj = &uns.Unstructured{} + obj.SetGroupVersionKind(schema.GroupVersionKind{Group: "admissionregistration.k8s.io", Kind: "MutatingWebhookConfiguration", Version: "v1"}) + obj.SetName(consts.InjectorWebHookName) + err = errors.Join( + err, r.deleteWebhookObject(ctx, obj), + ) + + return err +} diff --git a/controllers/sriovoperatorconfig_controller_test.go b/controllers/sriovoperatorconfig_controller_test.go index 6a98925eb..7f6db3522 100644 --- a/controllers/sriovoperatorconfig_controller_test.go +++ b/controllers/sriovoperatorconfig_controller_test.go @@ -6,6 +6,7 @@ import ( "os" "strings" "sync" + "time" admv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" @@ -38,15 +39,7 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { BeforeAll(func() { By("Create SriovOperatorConfig controller k8s objs") - config := &sriovnetworkv1.SriovOperatorConfig{} - config.SetNamespace(testNamespace) - config.SetName(consts.DefaultConfigName) - config.Spec = sriovnetworkv1.SriovOperatorConfigSpec{ - EnableInjector: true, - EnableOperatorWebhook: true, - ConfigDaemonNodeSelector: map[string]string{}, - LogLevel: 2, - } + config := makeDefaultSriovOpConfig() Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) DeferCleanup(func() { err := k8sClient.Delete(context.Background(), config) @@ -224,6 +217,29 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { Expect(err).NotTo(HaveOccurred()) }) + // Namespaced resources are deleted via the `.ObjectMeta.OwnerReference` field. That logic can't be tested here because testenv doesn't have built-in controllers + // (See https://book.kubebuilder.io/reference/envtest#testing-considerations). Since Service and DaemonSet are deleted when default/SriovOperatorConfig is no longer + // present, it's important that webhook configurations are deleted as well. + It("should delete the webhooks when SriovOperatorConfig/default is deleted", func() { + DeferCleanup(k8sClient.Create, context.Background(), makeDefaultSriovOpConfig()) + + err := k8sClient.Delete(context.Background(), &sriovnetworkv1.SriovOperatorConfig{ + ObjectMeta: metav1.ObjectMeta{Namespace: testNamespace, Name: "default"}, + }) + Expect(err).NotTo(HaveOccurred()) + + assertResourceDoesNotExist( + schema.GroupVersionKind{Group: "admissionregistration.k8s.io", Kind: "MutatingWebhookConfiguration", Version: "v1"}, + client.ObjectKey{Name: "sriov-operator-webhook-config"}) + assertResourceDoesNotExist( + schema.GroupVersionKind{Group: "admissionregistration.k8s.io", Kind: "ValidatingWebhookConfiguration", Version: "v1"}, + client.ObjectKey{Name: "sriov-operator-webhook-config"}) + + assertResourceDoesNotExist( + schema.GroupVersionKind{Group: "admissionregistration.k8s.io", Kind: "MutatingWebhookConfiguration", Version: "v1"}, + client.ObjectKey{Name: "network-resources-injector-config"}) + }) + It("should be able to update the node selector of sriov-network-config-daemon", func() { By("specify the configDaemonNodeSelector") nodeSelector := map[string]string{"node-role.kubernetes.io/worker": ""} @@ -517,6 +533,19 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { }) }) +func makeDefaultSriovOpConfig() *sriovnetworkv1.SriovOperatorConfig { + config := &sriovnetworkv1.SriovOperatorConfig{} + config.SetNamespace(testNamespace) + config.SetName(consts.DefaultConfigName) + config.Spec = sriovnetworkv1.SriovOperatorConfigSpec{ + EnableInjector: true, + EnableOperatorWebhook: true, + ConfigDaemonNodeSelector: map[string]string{}, + LogLevel: 2, + } + return config +} + func assertResourceExists(gvk schema.GroupVersionKind, key client.ObjectKey) { u := &unstructured.Unstructured{} u.SetGroupVersionKind(gvk) @@ -524,6 +553,20 @@ func assertResourceExists(gvk schema.GroupVersionKind, key client.ObjectKey) { Expect(err).NotTo(HaveOccurred()) } +func assertResourceDoesNotExist(gvk schema.GroupVersionKind, key client.ObjectKey) { + Eventually(func(g Gomega) { + u := &unstructured.Unstructured{} + u.SetGroupVersionKind(gvk) + err := k8sClient.Get(context.Background(), key, u) + g.Expect(err).To(HaveOccurred()) + g.Expect(errors.IsNotFound(err)).To(BeTrue()) + }). + WithOffset(1). + WithPolling(100*time.Millisecond). + WithTimeout(2*time.Second). + Should(Succeed(), "Resource type[%s] name[%s] still present in the cluster", gvk.String(), key.String()) +} + func updateConfigDaemonNodeSelector(newValue map[string]string) func() { config := &sriovnetworkv1.SriovOperatorConfig{} err := k8sClient.Get(context.Background(), types.NamespacedName{Namespace: testNamespace, Name: "default"}, config) From f17bb2a9cb77897e833e96ff35fec3e626c928b9 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Thu, 19 Sep 2024 17:16:02 +0200 Subject: [PATCH 06/38] metrics: Fix typo in `METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES` Signed-off-by: Andrea Panattoni --- hack/run-e2e-conformance-virtual-ocp.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hack/run-e2e-conformance-virtual-ocp.sh b/hack/run-e2e-conformance-virtual-ocp.sh index 0092fcdad..cb65aaf50 100755 --- a/hack/run-e2e-conformance-virtual-ocp.sh +++ b/hack/run-e2e-conformance-virtual-ocp.sh @@ -191,9 +191,9 @@ export DEV_MODE=TRUE export CLUSTER_HAS_EMULATED_PF=TRUE export OPERATOR_LEADER_ELECTION_ENABLE=true export METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED=true -export METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULE=true +export METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES=true export METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT:-"prometheus-k8s"} -export METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE:-"openshfit-monitoring"} +export METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE:-"openshift-monitoring"} export SRIOV_NETWORK_OPERATOR_IMAGE="$registry/$NAMESPACE/sriov-network-operator:latest" export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE="$registry/$NAMESPACE/sriov-network-config-daemon:latest" From f94fa644ddee573d246f656e49e8f232273f3bc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Fri, 20 Sep 2024 19:39:43 +0200 Subject: [PATCH 07/38] Fix syntax for RDMA_CNI_IMAGE var substitution The bash syntax was incorrect and yielded: hack/env.sh: line 35: ${$RDMA_CNI_IMAGE:-}: bad substitution --- hack/env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/env.sh b/hack/env.sh index 28e0007e7..c49c399d8 100755 --- a/hack/env.sh +++ b/hack/env.sh @@ -16,7 +16,7 @@ else # ensure that OVS_CNI_IMAGE is set, empty string is a valid value OVS_CNI_IMAGE=${OVS_CNI_IMAGE:-} # ensure that RDMA_CNI_IMAGE is set, empty string is a valid value - RDMA_CNI_IMAGE=${$RDMA_CNI_IMAGE:-} + RDMA_CNI_IMAGE=${RDMA_CNI_IMAGE:-} METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE=${METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE:-} [ -z $SRIOV_CNI_IMAGE ] && echo "SRIOV_CNI_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 [ -z $SRIOV_INFINIBAND_CNI_IMAGE ] && echo "SRIOV_INFINIBAND_CNI_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 From 3ff1b85e8465a4382e3eb62aa247ebf3c81cd9ee Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Thu, 12 Sep 2024 12:20:45 +0200 Subject: [PATCH 08/38] metrics: Add `node` label to `sriov_*` metrics It might happen that two SR-IOV pods, deployed on different node, are using devices with the same PCI address. In such cases, the query suggested [1] by the sriov-network-metrics-exporter produces the error: ``` Error loading values found duplicate series for the match group {pciAddr="0000:3b:02.4"} on the right hand-side of the operation: [ { __name__="sriov_kubepoddevice", container="test", dev_type="openshift.io/intelnetdevice", endpoint="sriov-network-metrics", instance="10.1.98.60:9110", job="sriov-network-metrics-exporter-service", namespace="cnf-4916", pciAddr="0000:3b:02.4", pod="pod-cnfdr22.telco5g.eng.rdu2.redhat.com", prometheus="openshift-monitoring/k8s", service="sriov-network-metrics-exporter-service" }, { __name__="sriov_kubepoddevice", container="test", dev_type="openshift.io/intelnetdevice", endpoint="sriov-network-metrics", instance="10.1.98.230:9110", job="sriov-network-metrics-exporter-service", namespace="cnf-4916", pciAddr="0000:3b:02.4", pod="pod-dhcp-98-230.telco5g.eng.rdu2.redhat.com", prometheus="openshift-monitoring/k8s", service="sriov-network-metrics-exporter-service" } ];many-to-many matching not allowed: matching labels must be unique on one side ``` Configure the ServiceMonitor resource to add a `node` label to all metrics. The right query to get metrics, as updated in the PrometheusRule, will be: ``` sriov_vf_tx_packets * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice ``` Also remove `pod`, `namespace` and `container` label from the `sriov_vf_*` metrics, as they were wrongly set to `sriov-network-metrics-exporter-zj2n9`, `openshift-sriov-network-operator`, `kube-rbac-proxy` [1] https://github.com/k8snetworkplumbingwg/sriov-network-metrics-exporter/blob/0f6a784f377ede87b95f31e569116ceb9775b5b9/README.md?plain=1#L38 Signed-off-by: Andrea Panattoni --- .../metrics-prometheus-rule.yaml | 16 ++-- .../metrics-exporter/metrics-prometheus.yaml | 11 +++ .../tests/test_exporter_metrics.go | 95 ++++++++++++++----- test/util/k8sreporter/reporter.go | 20 ++++ 4 files changed, 111 insertions(+), 31 deletions(-) diff --git a/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml b/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml index efd760113..a385fa677 100644 --- a/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml +++ b/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml @@ -11,28 +11,28 @@ spec: interval: 30s rules: - expr: | - sriov_vf_tx_packets * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_tx_packets * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_tx_packets - expr: | - sriov_vf_rx_packets * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_packets * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_packets - expr: | - sriov_vf_tx_bytes * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_tx_bytes * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_tx_bytes - expr: | - sriov_vf_rx_bytes * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_bytes * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_bytes - expr: | - sriov_vf_tx_dropped * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_tx_dropped * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_tx_dropped - expr: | - sriov_vf_rx_dropped * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_dropped * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_dropped - expr: | - sriov_vf_rx_broadcast * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_broadcast * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_broadcast - expr: | - sriov_vf_rx_multicast * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_multicast * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_multicast {{ end }} diff --git a/bindata/manifests/metrics-exporter/metrics-prometheus.yaml b/bindata/manifests/metrics-exporter/metrics-prometheus.yaml index 45ae7adbf..d1772a554 100644 --- a/bindata/manifests/metrics-exporter/metrics-prometheus.yaml +++ b/bindata/manifests/metrics-exporter/metrics-prometheus.yaml @@ -12,6 +12,17 @@ spec: bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token" scheme: "https" honorLabels: true + relabelings: + - action: replace + sourceLabels: + - __meta_kubernetes_endpoint_node_name + targetLabel: node + - action: labeldrop + regex: pod + - action: labeldrop + regex: container + - action: labeldrop + regex: namespace tlsConfig: serverName: sriov-network-metrics-exporter-service.{{.Namespace}}.svc caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt diff --git a/test/conformance/tests/test_exporter_metrics.go b/test/conformance/tests/test_exporter_metrics.go index 804432f04..96bf792b5 100644 --- a/test/conformance/tests/test_exporter_metrics.go +++ b/test/conformance/tests/test_exporter_metrics.go @@ -19,21 +19,18 @@ import ( "github.com/prometheus/common/model" corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) -var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { +var _ = Describe("[sriov] Metrics Exporter", Ordered, ContinueOnFailure, func() { var node string var nic *sriovv1.InterfaceExt BeforeAll(func() { - if cluster.VirtualCluster() { - Skip("IGB driver does not support VF statistics") - } - err := namespaces.Create(namespaces.Test, clients) Expect(err).ToNot(HaveOccurred()) @@ -73,6 +70,9 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { }) It("collects metrics regarding receiving traffic via VF", func() { + if cluster.VirtualCluster() { + Skip("IGB driver does not support VF statistics") + } pod := createTestPod(node, []string{"test-me-network"}) DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) @@ -98,27 +98,76 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { Expect(finalRxPackets).Should(BeNumerically(">", initialRxPackets)) }) - It("PrometheusRule should provide namespaced metrics", func() { - pod := createTestPod(node, []string{"test-me-network"}) - DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) + Context("When Prometheus operator is available", func() { + BeforeEach(func() { + _, err := clients.ServiceMonitors(operatorNamespace).List(context.Background(), metav1.ListOptions{}) + if k8serrors.IsNotFound(err) { + Skip("Prometheus operator not available in the cluster") + } + }) - namespacedMetricNames := []string{ - "network:sriov_vf_rx_bytes", - "network:sriov_vf_tx_bytes", - "network:sriov_vf_rx_packets", - "network:sriov_vf_tx_packets", - "network:sriov_vf_rx_dropped", - "network:sriov_vf_tx_dropped", - "network:sriov_vf_rx_broadcast", - "network:sriov_vf_rx_multicast", - } + It("PrometheusRule should provide namespaced metrics", func() { + pod := createTestPod(node, []string{"test-me-network"}) + DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) + + namespacedMetricNames := []string{ + "network:sriov_vf_rx_bytes", + "network:sriov_vf_tx_bytes", + "network:sriov_vf_rx_packets", + "network:sriov_vf_tx_packets", + "network:sriov_vf_rx_dropped", + "network:sriov_vf_tx_dropped", + "network:sriov_vf_rx_broadcast", + "network:sriov_vf_rx_multicast", + } - Eventually(func(g Gomega) { - for _, metricName := range namespacedMetricNames { - values := runPromQLQuery(fmt.Sprintf(`%s{namespace="%s",pod="%s"}`, metricName, pod.Namespace, pod.Name)) - g.Expect(values).ToNot(BeEmpty(), "no value for metric %s", metricName) + Eventually(func(g Gomega) { + for _, metricName := range namespacedMetricNames { + values := runPromQLQuery(fmt.Sprintf(`%s{namespace="%s",pod="%s"}`, metricName, pod.Namespace, pod.Name)) + g.Expect(values).ToNot(BeEmpty(), "no value for metric %s", metricName) + } + }, "90s", "1s").Should(Succeed()) + }) + + It("Metrics should have the correct labels", func() { + pod := createTestPod(node, []string{"test-me-network"}) + DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) + + metricsName := []string{ + "sriov_vf_rx_bytes", + "sriov_vf_tx_bytes", + "sriov_vf_rx_packets", + "sriov_vf_tx_packets", + "sriov_vf_rx_dropped", + "sriov_vf_tx_dropped", + "sriov_vf_rx_broadcast", + "sriov_vf_rx_multicast", } - }, "40s", "1s").Should(Succeed()) + + Eventually(func(g Gomega) { + for _, metricName := range metricsName { + samples := runPromQLQuery(metricName) + g.Expect(samples).ToNot(BeEmpty(), "no value for metric %s", metricName) + g.Expect(samples[0].Metric).To(And( + HaveKey(model.LabelName("pciAddr")), + HaveKey(model.LabelName("node")), + HaveKey(model.LabelName("pf")), + HaveKey(model.LabelName("vf")), + )) + } + }, "90s", "1s").Should(Succeed()) + + // sriov_kubepoddevice has a different sets of label than statistics metrics + samples := runPromQLQuery(fmt.Sprintf(`sriov_kubepoddevice{namespace="%s",pod="%s"}`, pod.Namespace, pod.Name)) + Expect(samples).ToNot(BeEmpty(), "no value for metric sriov_kubepoddevice") + Expect(samples[0].Metric).To(And( + HaveKey(model.LabelName("pciAddr")), + HaveKeyWithValue(model.LabelName("node"), model.LabelValue(pod.Spec.NodeName)), + HaveKeyWithValue(model.LabelName("dev_type"), model.LabelValue("openshift.io/metricsResource")), + HaveKeyWithValue(model.LabelName("namespace"), model.LabelValue(pod.Namespace)), + HaveKeyWithValue(model.LabelName("pod"), model.LabelValue(pod.Name)), + )) + }) }) }) diff --git a/test/util/k8sreporter/reporter.go b/test/util/k8sreporter/reporter.go index 5a3405a91..13baac0aa 100644 --- a/test/util/k8sreporter/reporter.go +++ b/test/util/k8sreporter/reporter.go @@ -10,6 +10,9 @@ import ( sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + rbacv1 "k8s.io/api/rbac/v1" ) func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) { @@ -18,6 +21,17 @@ func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) { if err != nil { return err } + + err = monitoringv1.AddToScheme(s) + if err != nil { + return err + } + + err = rbacv1.AddToScheme(s) + if err != nil { + return err + } + return nil } @@ -38,6 +52,8 @@ func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) { return true case multusNamespace != "" && ns == multusNamespace: return true + case ns == "openshift-monitoring": + return true } return false } @@ -47,6 +63,10 @@ func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) { {Cr: &sriovv1.SriovNetworkNodePolicyList{}}, {Cr: &sriovv1.SriovNetworkList{}}, {Cr: &sriovv1.SriovOperatorConfigList{}}, + {Cr: &monitoringv1.ServiceMonitorList{}, Namespace: &operatorNamespace}, + {Cr: &monitoringv1.PrometheusRuleList{}, Namespace: &operatorNamespace}, + {Cr: &rbacv1.RoleList{}, Namespace: &operatorNamespace}, + {Cr: &rbacv1.RoleBindingList{}, Namespace: &operatorNamespace}, } err := os.Mkdir(reportPath, 0755) From 084810a1b5afa144f7f208e12bde2dbc58c72086 Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Wed, 11 Sep 2024 15:30:26 -0400 Subject: [PATCH 09/38] openstack: dynamically mount the config-drive When we want to use config-drive in immutable systems, very often the config-drive is only used at boot and then umounted (e.g. ignition does this). Later when we want to fetch Metadata from the config drive, we actually have to mount it. In this PR, I'm adding similar code than coreos/ignition where we dynamically mount the config-drive is the device was found with the right label (config-2 or CONFIG-2 as documented in OpenStack). If the device is found, we mount it, fetch the data and umount it. --- pkg/platforms/openstack/openstack.go | 115 +++++++++++++++++++++------ 1 file changed, 92 insertions(+), 23 deletions(-) diff --git a/pkg/platforms/openstack/openstack.go b/pkg/platforms/openstack/openstack.go index 94a9ae433..8968c96be 100644 --- a/pkg/platforms/openstack/openstack.go +++ b/pkg/platforms/openstack/openstack.go @@ -5,6 +5,8 @@ import ( "fmt" "io" "os" + "os/exec" + "path/filepath" "strconv" "strings" @@ -21,15 +23,18 @@ import ( ) const ( - ospHostMetaDataDir = "/host/var/config/openstack/2018-08-27" - ospMetaDataDir = "/var/config/openstack/2018-08-27" - ospMetaDataBaseURL = "http://169.254.169.254/openstack/2018-08-27" - ospNetworkDataJSON = "network_data.json" - ospMetaDataJSON = "meta_data.json" - ospHostNetworkDataFile = ospHostMetaDataDir + "/" + ospNetworkDataJSON - ospHostMetaDataFile = ospHostMetaDataDir + "/" + ospMetaDataJSON - ospNetworkDataURL = ospMetaDataBaseURL + "/" + ospNetworkDataJSON - ospMetaDataURL = ospMetaDataBaseURL + "/" + ospMetaDataJSON + varConfigPath = "/var/config" + ospMetaDataBaseDir = "/openstack/2018-08-27" + ospMetaDataDir = varConfigPath + ospMetaDataBaseDir + ospMetaDataBaseURL = "http://169.254.169.254" + ospMetaDataBaseDir + ospNetworkDataJSON = "network_data.json" + ospMetaDataJSON = "meta_data.json" + ospNetworkDataURL = ospMetaDataBaseURL + "/" + ospNetworkDataJSON + ospMetaDataURL = ospMetaDataBaseURL + "/" + ospMetaDataJSON + // Config drive is defined as an iso9660 or vfat (deprecated) drive + // with the "config-2" label. + //https://docs.openstack.org/nova/latest/user/config-drive.html + configDriveLabel = "config-2" ) var ( @@ -109,9 +114,10 @@ func New(hostManager host.HostManagerInterface) OpenstackInterface { } // GetOpenstackData gets the metadata and network_data -func getOpenstackData(useHostPath bool) (metaData *OSPMetaData, networkData *OSPNetworkData, err error) { - metaData, networkData, err = getOpenstackDataFromConfigDrive(useHostPath) +func getOpenstackData(mountConfigDrive bool) (metaData *OSPMetaData, networkData *OSPNetworkData, err error) { + metaData, networkData, err = getOpenstackDataFromConfigDrive(mountConfigDrive) if err != nil { + log.Log.Error(err, "GetOpenStackData(): non-fatal error getting OpenStack data from config drive") metaData, networkData, err = getOpenstackDataFromMetadataService() if err != nil { return metaData, networkData, fmt.Errorf("GetOpenStackData(): error getting OpenStack data: %w", err) @@ -153,46 +159,109 @@ func getOpenstackData(useHostPath bool) (metaData *OSPMetaData, networkData *OSP return metaData, networkData, err } +// getConfigDriveDevice returns the config drive device which was found +func getConfigDriveDevice() (string, error) { + dev := "/dev/disk/by-label/" + configDriveLabel + if _, err := os.Stat(dev); os.IsNotExist(err) { + out, err := exec.Command( + "blkid", "-l", + "-t", "LABEL="+configDriveLabel, + "-o", "device", + ).CombinedOutput() + if err != nil { + return "", fmt.Errorf("unable to run blkid: %v", err) + } + dev = strings.TrimSpace(string(out)) + } + log.Log.Info("found config drive device", "device", dev) + return dev, nil +} + +// mountConfigDriveDevice mounts the config drive and return the path +func mountConfigDriveDevice(device string) (string, error) { + if device == "" { + return "", fmt.Errorf("device is empty") + } + tmpDir, err := os.MkdirTemp("", "sriov-configdrive") + if err != nil { + return "", fmt.Errorf("error creating temp directory: %w", err) + } + cmd := exec.Command("mount", "-o", "ro", "-t", "auto", device, tmpDir) + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("error mounting config drive: %w", err) + } + log.Log.V(2).Info("mounted config drive device", "device", device, "path", tmpDir) + return tmpDir, nil +} + +// ummountConfigDriveDevice ummounts the config drive device +func ummountConfigDriveDevice(path string) error { + if path == "" { + return fmt.Errorf("path is empty") + } + cmd := exec.Command("umount", path) + if err := cmd.Run(); err != nil { + return fmt.Errorf("error umounting config drive: %w", err) + } + log.Log.V(2).Info("umounted config drive", "path", path) + return nil +} + // getOpenstackDataFromConfigDrive reads the meta_data and network_data files -func getOpenstackDataFromConfigDrive(useHostPath bool) (metaData *OSPMetaData, networkData *OSPNetworkData, err error) { +func getOpenstackDataFromConfigDrive(mountConfigDrive bool) (metaData *OSPMetaData, networkData *OSPNetworkData, err error) { metaData = &OSPMetaData{} networkData = &OSPNetworkData{} + var configDrivePath string log.Log.Info("reading OpenStack meta_data from config-drive") var metadataf *os.File ospMetaDataFilePath := ospMetaDataFile - if useHostPath { - ospMetaDataFilePath = ospHostMetaDataFile + if mountConfigDrive { + configDriveDevice, err := getConfigDriveDevice() + if err != nil { + return metaData, networkData, fmt.Errorf("error finding config drive device: %w", err) + } + configDrivePath, err = mountConfigDriveDevice(configDriveDevice) + if err != nil { + return metaData, networkData, fmt.Errorf("error mounting config drive device: %w", err) + } + defer func() { + if e := ummountConfigDriveDevice(configDrivePath); err == nil && e != nil { + err = fmt.Errorf("error umounting config drive device: %w", e) + } + if e := os.Remove(configDrivePath); err == nil && e != nil { + err = fmt.Errorf("error removing temp directory %s: %w", configDrivePath, e) + } + }() + ospMetaDataFilePath = filepath.Join(configDrivePath, ospMetaDataBaseDir, ospMetaDataJSON) + ospNetworkDataFile = filepath.Join(configDrivePath, ospMetaDataBaseDir, ospNetworkDataJSON) } metadataf, err = os.Open(ospMetaDataFilePath) if err != nil { - return metaData, networkData, fmt.Errorf("error opening file %s: %w", ospHostMetaDataFile, err) + return metaData, networkData, fmt.Errorf("error opening file %s: %w", ospMetaDataFilePath, err) } defer func() { if e := metadataf.Close(); err == nil && e != nil { - err = fmt.Errorf("error closing file %s: %w", ospHostMetaDataFile, e) + err = fmt.Errorf("error closing file %s: %w", ospMetaDataFilePath, e) } }() if err = json.NewDecoder(metadataf).Decode(&metaData); err != nil { - return metaData, networkData, fmt.Errorf("error unmarshalling metadata from file %s: %w", ospHostMetaDataFile, err) + return metaData, networkData, fmt.Errorf("error unmarshalling metadata from file %s: %w", ospMetaDataFilePath, err) } log.Log.Info("reading OpenStack network_data from config-drive") var networkDataf *os.File ospNetworkDataFilePath := ospNetworkDataFile - if useHostPath { - ospNetworkDataFilePath = ospHostNetworkDataFile - } networkDataf, err = os.Open(ospNetworkDataFilePath) if err != nil { - return metaData, networkData, fmt.Errorf("error opening file %s: %w", ospHostNetworkDataFile, err) + return metaData, networkData, fmt.Errorf("error opening file %s: %w", ospNetworkDataFilePath, err) } defer func() { if e := networkDataf.Close(); err == nil && e != nil { - err = fmt.Errorf("error closing file %s: %w", ospHostNetworkDataFile, e) + err = fmt.Errorf("error closing file %s: %w", ospNetworkDataFilePath, e) } }() if err = json.NewDecoder(networkDataf).Decode(&networkData); err != nil { - return metaData, networkData, fmt.Errorf("error unmarshalling metadata from file %s: %w", ospHostNetworkDataFile, err) + return metaData, networkData, fmt.Errorf("error unmarshalling metadata from file %s: %w", ospNetworkDataFilePath, err) } return metaData, networkData, err } From ba21df035b79c907dd1cbc4898e83a7557109553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Mon, 23 Sep 2024 14:19:43 +0200 Subject: [PATCH 10/38] Enclose array expansions in double quote Fixes the following shellcheck error: SC2068 (error): Double quote array expansions to avoid re-splitting elements. https://www.shellcheck.net/wiki/SC2068 --- hack/deploy-setup.sh | 2 +- hack/vf-netns-switcher.sh | 8 ++++---- test/scripts/enable-kargs_test.sh | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hack/deploy-setup.sh b/hack/deploy-setup.sh index 2c2fc7d8d..807479c38 100755 --- a/hack/deploy-setup.sh +++ b/hack/deploy-setup.sh @@ -22,7 +22,7 @@ load_manifest() { fi files="service_account.yaml role.yaml role_binding.yaml clusterrole.yaml clusterrolebinding.yaml configmap.yaml sriovoperatorconfig.yaml operator.yaml" for m in ${files}; do - if [ "$(echo ${EXCLUSIONS[@]} | grep -o ${m} | wc -w | xargs)" == "0" ] ; then + if [ "$(echo "${EXCLUSIONS[@]}" | grep -o ${m} | wc -w | xargs)" == "0" ] ; then envsubst< ${m} | ${OPERATOR_EXEC} apply ${namespace:-} --validate=false -f - fi done diff --git a/hack/vf-netns-switcher.sh b/hack/vf-netns-switcher.sh index de4e8041a..e842a8dc8 100755 --- a/hack/vf-netns-switcher.sh +++ b/hack/vf-netns-switcher.sh @@ -95,7 +95,7 @@ It must be of the form :,. This flag can be repeated to specify done return_interfaces_to_default_namespace(){ - for netns in ${netnses[@]};do + for netns in "${netnses[@]}";do for pf in ${pfs[$netns]};do return_interface_to_default_namespace "${netns}" "${pf}" done @@ -360,7 +360,7 @@ main(){ trap return_interfaces_to_default_namespace INT EXIT TERM while true;do - for netns in ${netnses[@]};do + for netns in "${netnses[@]}";do switch_pfs "$netns" "${pfs[$netns]}" sleep 2 switch_netns_vfs "$netns" @@ -388,7 +388,7 @@ if [[ "$status" != "0" ]];then exit $status fi -for netns in ${netnses[@]};do +for netns in "${netnses[@]}";do netns_create "$netns" let status=$status+$? if [[ "$status" != "0" ]];then @@ -397,7 +397,7 @@ for netns in ${netnses[@]};do fi done -for netns in ${netnses[@]};do +for netns in "${netnses[@]}";do get_pcis_from_pfs "$netns" "${pfs[$netns]}" get_pf_switch_dev_info "$netns" "${pfs[$netns]}" done diff --git a/test/scripts/enable-kargs_test.sh b/test/scripts/enable-kargs_test.sh index 615f3d2b2..40c2764be 100755 --- a/test/scripts/enable-kargs_test.sh +++ b/test/scripts/enable-kargs_test.sh @@ -46,7 +46,7 @@ setUp() { # Mock chroot calls to the temporary test folder export real_chroot=$(which chroot) chroot() { - $real_chroot $FAKE_HOST ${@:2} + $real_chroot $FAKE_HOST "${@:2}" } export -f chroot From 3d553bfd6985fbd7225f615577148ed1e6a42963 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Mon, 23 Sep 2024 14:22:26 +0200 Subject: [PATCH 11/38] Add missing shebang Fixes the following shellcheck error: SC2148 (error): Tips depend on target shell and yours is unknown. Add a shebang or a 'shell' directive. https://www.shellcheck.net/wiki/SC2148 --- hack/env.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hack/env.sh b/hack/env.sh index c49c399d8..64f79212d 100755 --- a/hack/env.sh +++ b/hack/env.sh @@ -1,3 +1,5 @@ +#!/bin/bash + if [ -z $SKIP_VAR_SET ]; then export SRIOV_CNI_IMAGE=${SRIOV_CNI_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-cni} export SRIOV_INFINIBAND_CNI_IMAGE=${SRIOV_INFINIBAND_CNI_IMAGE:-ghcr.io/k8snetworkplumbingwg/ib-sriov-cni} From 63246d6918a155fc9cbe2aed057274a5dcc9503d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Mon, 23 Sep 2024 15:51:05 +0200 Subject: [PATCH 12/38] Explicitly expand array values Fixes the following shellcheck errors: SC2145 (error): Argument mixes string and array. Use * or separate argument. SC2199 (error): Arrays implicitly concatenate in [[ ]]. Use a loop (or explicit * instead of @). https://www.shellcheck.net/wiki/SC2145 https://www.shellcheck.net/wiki/SC2199 Also fixes a typo in SUPPORTED_INTERFACE_SWITCHER_MODES. --- hack/run-e2e-test-kind.sh | 6 +++--- hack/vf-netns-switcher.sh | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hack/run-e2e-test-kind.sh b/hack/run-e2e-test-kind.sh index 5cb7750c7..3cc080d9c 100755 --- a/hack/run-e2e-test-kind.sh +++ b/hack/run-e2e-test-kind.sh @@ -6,7 +6,7 @@ export SRIOV_NETWORK_OPERATOR_IMAGE="${SRIOV_NETWORK_OPERATOR_IMAGE:-sriov-netwo export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE="${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE:-origin-sriov-network-config-daemon:e2e-test}" export KUBECONFIG="${KUBECONFIG:-${HOME}/.kube/config}" INTERFACES_SWITCHER="${INTERFACES_SWITCHER:-"test-suite"}" -SUPPORTED_INTERFACE_SWTICHER_MODES=("test-suite", "system-service") +SUPPORTED_INTERFACE_SWITCHER_MODES=("test-suite", "system-service") RETRY_MAX=10 INTERVAL=10 TIMEOUT=300 @@ -16,9 +16,9 @@ while test $# -gt 0; do case "$1" in --device-netns-switcher) INTERFACES_SWITCHER="$2" - if [[ ! "${SUPPORTED_INTERFACE_SWTICHER_MODES[@]}" =~ "${INTERFACES_SWITCHER}" ]]; then + if [[ ! "${SUPPORTED_INTERFACE_SWITCHER_MODES[*]}" =~ "${INTERFACES_SWITCHER}" ]]; then echo "Error: unsupported interface switching mode: ${INTERFACES_SWITCHER}!" - echo "Supported modes are: ${SUPPORTED_INTERFACE_SWTICHER_MODES[@]}" + echo "Supported modes are: ${SUPPORTED_INTERFACE_SWITCHER_MODES[*]}" exit 1 fi shift diff --git a/hack/vf-netns-switcher.sh b/hack/vf-netns-switcher.sh index e842a8dc8..69881da7b 100755 --- a/hack/vf-netns-switcher.sh +++ b/hack/vf-netns-switcher.sh @@ -348,7 +348,7 @@ variables_check(){ check_empty_var(){ local var_name="$1" - if [[ -z "${!var_name[@]}" ]];then + if [[ -z "${!var_name[*]}" ]];then echo "Error: $var_name is empty..." return 1 fi @@ -403,7 +403,7 @@ for netns in "${netnses[@]}";do done if [[ "${#pcis[@]}" == "0" ]];then - echo "Error: could not get pci addresses of interfaces ${pfs[@]}!!" + echo "Error: could not get pci addresses of interfaces ${pfs[*]}!!" exit 1 fi From 3529811b1d3a0833dacc2e7fc27425749562f769 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Mon, 23 Sep 2024 17:43:01 +0200 Subject: [PATCH 13/38] Iterate over globs. Fixes the following shellcheck error: SC2045 (error): Iterating over ls output is fragile. Use globs. https://www.shellcheck.net/wiki/SC2045 --- hack/vf-netns-switcher.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/hack/vf-netns-switcher.sh b/hack/vf-netns-switcher.sh index 69881da7b..c383b5d1e 100755 --- a/hack/vf-netns-switcher.sh +++ b/hack/vf-netns-switcher.sh @@ -277,19 +277,20 @@ switch_interface_vf_representors(){ return 0 fi - for interface in $(ls /sys/class/net);do - phys_switch_id=$(cat /sys/class/net/$interface/phys_switch_id) + for interface in /sys/class/net/*;do + phys_switch_id=$(cat $interface/phys_switch_id) if [[ "$phys_switch_id" != "${pf_switch_ids[$pf_name]}" ]]; then continue fi - phys_port_name=$(cat /sys/class/net/$interface/phys_port_name) + phys_port_name=$(cat $interface/phys_port_name) phys_port_name_pf_index=${phys_port_name%vf*} phys_port_name_pf_index=${phys_port_name_pf_index#pf} if [[ "$phys_port_name_pf_index" != "${pf_port_names[$pf_name]:1}" ]]; then continue fi - echo "Switching VF representor $interface of PF $pf_name to netns $worker_netns" - switch_vf $interface $worker_netns + interface_name=${interface##*/} + echo "Switching VF representor $interface_name of PF $pf_name to netns $worker_netns" + switch_vf $interface_name $worker_netns done } From 61aacb5bc7d51894346749fb6e838a320c0b7505 Mon Sep 17 00:00:00 2001 From: Yury Kulazhenkov Date: Mon, 23 Sep 2024 19:26:03 +0300 Subject: [PATCH 14/38] Fix: GetDevlinkDeviceParam to handle edge-cases correctly On some kernels GetDevlinkDeviceParam may return empty values for some kernel parameters. The netlink library is able to handle this, but the code in GetDevlinkDeviceParam function may panic if unexpected value received. Add extra checks to avoid panics --- pkg/host/internal/network/network.go | 22 +++++++++++++++------- pkg/host/internal/sriov/sriov.go | 5 +++++ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/pkg/host/internal/network/network.go b/pkg/host/internal/network/network.go index b3014f9e9..2eb40dd69 100644 --- a/pkg/host/internal/network/network.go +++ b/pkg/host/internal/network/network.go @@ -264,12 +264,12 @@ func (n *network) GetDevlinkDeviceParam(pciAddr, paramName string) (string, erro funcLog.Error(err, "GetDevlinkDeviceParam(): fail to get devlink device param") return "", err } - if len(param.Values) == 0 { - err = fmt.Errorf("param %s has no value", paramName) - funcLog.Error(err, "GetDevlinkDeviceParam(): error") - return "", err + if len(param.Values) == 0 || param.Values[0].Data == nil { + funcLog.Info("GetDevlinkDeviceParam(): WARNING: can't read devlink parameter from the device, an empty value received") + return "", nil } var value string + var ok bool switch param.Type { case nl.DEVLINK_PARAM_TYPE_U8, nl.DEVLINK_PARAM_TYPE_U16, nl.DEVLINK_PARAM_TYPE_U32: var valData uint64 @@ -281,14 +281,22 @@ func (n *network) GetDevlinkDeviceParam(pciAddr, paramName string) (string, erro case uint32: valData = uint64(v) default: - return "", fmt.Errorf("unexpected uint type type") + return "", fmt.Errorf("value is not uint") } value = strconv.FormatUint(valData, 10) case nl.DEVLINK_PARAM_TYPE_STRING: - value = param.Values[0].Data.(string) + value, ok = param.Values[0].Data.(string) + if !ok { + return "", fmt.Errorf("value is not a string") + } case nl.DEVLINK_PARAM_TYPE_BOOL: - value = strconv.FormatBool(param.Values[0].Data.(bool)) + var boolValue bool + boolValue, ok = param.Values[0].Data.(bool) + if !ok { + return "", fmt.Errorf("value is not a bool") + } + value = strconv.FormatBool(boolValue) default: return "", fmt.Errorf("unknown value type: %d", param.Type) } diff --git a/pkg/host/internal/sriov/sriov.go b/pkg/host/internal/sriov/sriov.go index 379cf6a70..bd453ae30 100644 --- a/pkg/host/internal/sriov/sriov.go +++ b/pkg/host/internal/sriov/sriov.go @@ -376,6 +376,11 @@ func (s *sriov) configureHWOptionsForSwitchdev(iface *sriovnetworkv1.Interface) log.Log.Error(err, "configureHWOptionsForSwitchdev(): fail to read current flow steering mode for the device", "device", iface.PciAddress) return err } + if currentFlowSteeringMode == "" { + log.Log.V(2).Info("configureHWOptionsForSwitchdev(): can't detect current flow_steering_mode mode for the device, skip", + "device", iface.PciAddress) + return nil + } if currentFlowSteeringMode == desiredFlowSteeringMode { return nil } From a01a1392f384df0653e4baa7cbdcacdc58953a38 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Mon, 7 Oct 2024 14:20:03 +0200 Subject: [PATCH 15/38] metrics: Fix `Metrics should have the correct labels` test `sriov_kubepoddevice` metric might end up in the Prometheus database after a while, as the default scrape interval is 30s. This leads to failures in the end-to-end lane like: ``` [sriov] Metrics Exporter When Prometheus operator is available [It] Metrics should have the correct labels /root/opr-ocp2-1/data/sriov-network-operator/sriov-network-operator/test/conformance/tests/test_exporter_metrics.go:132 [FAILED] no value for metric sriov_kubepoddevice ``` Put the metric assertion in an `Eventually` statement Signed-off-by: Andrea Panattoni --- .../tests/test_exporter_metrics.go | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/test/conformance/tests/test_exporter_metrics.go b/test/conformance/tests/test_exporter_metrics.go index 96bf792b5..f7bc82d3f 100644 --- a/test/conformance/tests/test_exporter_metrics.go +++ b/test/conformance/tests/test_exporter_metrics.go @@ -66,6 +66,8 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, ContinueOnFailure, func() Expect(err).ToNot(HaveOccurred()) waitForNetAttachDef("test-me-network", namespaces.Test) + WaitForSRIOVStable() + DeferCleanup(namespaces.Clean, operatorNamespace, namespaces.Test, clients, discovery.Enabled()) }) @@ -158,15 +160,17 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, ContinueOnFailure, func() }, "90s", "1s").Should(Succeed()) // sriov_kubepoddevice has a different sets of label than statistics metrics - samples := runPromQLQuery(fmt.Sprintf(`sriov_kubepoddevice{namespace="%s",pod="%s"}`, pod.Namespace, pod.Name)) - Expect(samples).ToNot(BeEmpty(), "no value for metric sriov_kubepoddevice") - Expect(samples[0].Metric).To(And( - HaveKey(model.LabelName("pciAddr")), - HaveKeyWithValue(model.LabelName("node"), model.LabelValue(pod.Spec.NodeName)), - HaveKeyWithValue(model.LabelName("dev_type"), model.LabelValue("openshift.io/metricsResource")), - HaveKeyWithValue(model.LabelName("namespace"), model.LabelValue(pod.Namespace)), - HaveKeyWithValue(model.LabelName("pod"), model.LabelValue(pod.Name)), - )) + Eventually(func(g Gomega) { + samples := runPromQLQuery(fmt.Sprintf(`sriov_kubepoddevice{namespace="%s",pod="%s"}`, pod.Namespace, pod.Name)) + g.Expect(samples).ToNot(BeEmpty(), "no value for metric sriov_kubepoddevice") + g.Expect(samples[0].Metric).To(And( + HaveKey(model.LabelName("pciAddr")), + HaveKeyWithValue(model.LabelName("node"), model.LabelValue(pod.Spec.NodeName)), + HaveKeyWithValue(model.LabelName("dev_type"), model.LabelValue("openshift.io/metricsResource")), + HaveKeyWithValue(model.LabelName("namespace"), model.LabelValue(pod.Namespace)), + HaveKeyWithValue(model.LabelName("pod"), model.LabelValue(pod.Name)), + )) + }, "60s", "1s").Should(Succeed()) }) }) }) From 6abdfe6d188344ab6bd6ad0e64f0895ab7aa414f Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Tue, 8 Oct 2024 21:23:42 +0300 Subject: [PATCH 16/38] Fix NRI rbac Signed-off-by: Sebastian Sch --- bindata/manifests/webhook/002-rbac.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindata/manifests/webhook/002-rbac.yaml b/bindata/manifests/webhook/002-rbac.yaml index 77b2d95d7..32affca29 100644 --- a/bindata/manifests/webhook/002-rbac.yaml +++ b/bindata/manifests/webhook/002-rbac.yaml @@ -21,7 +21,7 @@ rules: - apiGroups: - "" resources: - - configmap + - configmaps verbs: - 'watch' - 'list' From fb193e80038325b4c9bc8d8012d809eca9bc46da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Thu, 26 Sep 2024 14:23:34 +0200 Subject: [PATCH 17/38] Use grep for matching args with sh Fixes the following shellcheck error: SC2081 (error): [ .. ] can't match globs. Use a case statement. https://www.shellcheck.net/wiki/SC2081 --- test/scripts/enable-kargs_test.sh | 1 + test/scripts/rpm-ostree_mock | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/test/scripts/enable-kargs_test.sh b/test/scripts/enable-kargs_test.sh index 40c2764be..93a985700 100755 --- a/test/scripts/enable-kargs_test.sh +++ b/test/scripts/enable-kargs_test.sh @@ -40,6 +40,7 @@ setUp() { cp $(which cat) ${FAKE_HOST}/usr/bin/ cp $(which test) ${FAKE_HOST}/usr/bin/ cp $(which sh) ${FAKE_HOST}/usr/bin/ + cp $(which grep) ${FAKE_HOST}/usr/bin/ cp "$SCRIPTPATH/rpm-ostree_mock" ${FAKE_HOST}/usr/bin/rpm-ostree } diff --git a/test/scripts/rpm-ostree_mock b/test/scripts/rpm-ostree_mock index 16e816cc9..db6f66040 100755 --- a/test/scripts/rpm-ostree_mock +++ b/test/scripts/rpm-ostree_mock @@ -5,7 +5,7 @@ # Write invocation with arguments to a file to allow making assertion. echo "$*" >> /rpm-ostree_calls -if [ "$*" != *"--append"* ] +if ! echo "$*" | grep -q "\--append" then # Caller is trying to read kernel arguments. cat /proc/cmdline From 5394d218f8c50ab7acf05558c1777491115fdbaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Mon, 23 Sep 2024 17:58:54 +0200 Subject: [PATCH 18/38] CI: Add a bash linter to pre-submits Warns about shellcheck issues with severity `error`. --- .github/workflows/test.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2fbe84c81..d59e52e47 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -100,6 +100,16 @@ jobs: # Required: the version of golangci-lint is required and must be specified without patch version: we always use the latest patch version. version: v1.55.2 + shellcheck: + name: Shellcheck + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run ShellCheck + uses: ludeeus/action-shellcheck@master + with: + severity: error + test-coverage: name: test-coverage runs-on: ubuntu-latest From f286a04ad7c47216fece213bca47fddcc774f4d2 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Fri, 4 Oct 2024 19:05:53 +0200 Subject: [PATCH 19/38] config-daemon: Restart all instances of device-plugin When the operator changes the device-plugin Spec (e.g. .Spec.NodeSelector), it may happen that there are two device plugin pods for a given node, one that is terminating, the other that is initializing. If the config-daemon executes `restartDevicePluginPod()` at the same time, it may kill the terminating pod, while the initializing one will run with the old dp configuration. This may cause one or more resources to not being advertised, until a manual device plugin restart occurs. Make the config-daemon restart all the device-plugin instances it founds for its own node. Signed-off-by: Andrea Panattoni --- pkg/daemon/daemon.go | 53 +++++++++++++++++----------------- pkg/daemon/daemon_test.go | 61 ++++++++++++++++++++++++++++++--------- 2 files changed, 74 insertions(+), 40 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 5ed31ff85..ff7f326dc 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -120,6 +120,7 @@ func New( eventRecorder: er, featureGate: featureGates, disabledPlugins: disabledPlugins, + mu: &sync.Mutex{}, } } @@ -159,7 +160,6 @@ func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error { var timeout int64 = 5 var metadataKey = "metadata.name" - dn.mu = &sync.Mutex{} informerFactory := sninformer.NewFilteredSharedInformerFactory(dn.sriovClient, time.Second*15, vars.Namespace, @@ -683,7 +683,6 @@ func (dn *Daemon) restartDevicePluginPod() error { defer dn.mu.Unlock() log.Log.V(2).Info("restartDevicePluginPod(): try to restart device plugin pod") - var podToDelete string pods, err := dn.kubeClient.CoreV1().Pods(vars.Namespace).List(context.Background(), metav1.ListOptions{ LabelSelector: "app=sriov-device-plugin", FieldSelector: "spec.nodeName=" + vars.NodeName, @@ -702,35 +701,37 @@ func (dn *Daemon) restartDevicePluginPod() error { log.Log.Info("restartDevicePluginPod(): device plugin pod exited") return nil } - podToDelete = pods.Items[0].Name - log.Log.V(2).Info("restartDevicePluginPod(): Found device plugin pod, deleting it", "pod-name", podToDelete) - err = dn.kubeClient.CoreV1().Pods(vars.Namespace).Delete(context.Background(), podToDelete, metav1.DeleteOptions{}) - if errors.IsNotFound(err) { - log.Log.Info("restartDevicePluginPod(): pod to delete not found") - return nil - } - if err != nil { - log.Log.Error(err, "restartDevicePluginPod(): Failed to delete device plugin pod, retrying") - return err - } - - if err := wait.PollImmediateUntil(3*time.Second, func() (bool, error) { - _, err := dn.kubeClient.CoreV1().Pods(vars.Namespace).Get(context.Background(), podToDelete, metav1.GetOptions{}) + for _, pod := range pods.Items { + podToDelete := pod.Name + log.Log.V(2).Info("restartDevicePluginPod(): Found device plugin pod, deleting it", "pod-name", podToDelete) + err = dn.kubeClient.CoreV1().Pods(vars.Namespace).Delete(context.Background(), podToDelete, metav1.DeleteOptions{}) if errors.IsNotFound(err) { - log.Log.Info("restartDevicePluginPod(): device plugin pod exited") - return true, nil + log.Log.Info("restartDevicePluginPod(): pod to delete not found") + continue } - if err != nil { - log.Log.Error(err, "restartDevicePluginPod(): Failed to check for device plugin exit, retrying") - } else { - log.Log.Info("restartDevicePluginPod(): waiting for device plugin pod to exit", "pod-name", podToDelete) + log.Log.Error(err, "restartDevicePluginPod(): Failed to delete device plugin pod, retrying") + return err + } + + if err := wait.PollImmediateUntil(3*time.Second, func() (bool, error) { + _, err := dn.kubeClient.CoreV1().Pods(vars.Namespace).Get(context.Background(), podToDelete, metav1.GetOptions{}) + if errors.IsNotFound(err) { + log.Log.Info("restartDevicePluginPod(): device plugin pod exited") + return true, nil + } + + if err != nil { + log.Log.Error(err, "restartDevicePluginPod(): Failed to check for device plugin exit, retrying") + } else { + log.Log.Info("restartDevicePluginPod(): waiting for device plugin pod to exit", "pod-name", podToDelete) + } + return false, nil + }, dn.stopCh); err != nil { + log.Log.Error(err, "restartDevicePluginPod(): failed to wait for checking pod deletion") + return err } - return false, nil - }, dn.stopCh); err != nil { - log.Log.Error(err, "restartDevicePluginPod(): failed to wait for checking pod deletion") - return err } return nil diff --git a/pkg/daemon/daemon_test.go b/pkg/daemon/daemon_test.go index f1111810a..67a56633f 100644 --- a/pkg/daemon/daemon_test.go +++ b/pkg/daemon/daemon_test.go @@ -32,6 +32,8 @@ import ( "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/fakefilesystem" ) +var SriovDevicePluginPod corev1.Pod + func TestConfigDaemon(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "Config Daemon Suite") @@ -107,19 +109,6 @@ var _ = Describe("Config Daemon", func() { }, } - SriovDevicePluginPod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "sriov-device-plugin-xxxx", - Namespace: vars.Namespace, - Labels: map[string]string{ - "app": "sriov-device-plugin", - }, - }, - Spec: corev1.PodSpec{ - NodeName: "test-node", - }, - } - err = sriovnetworkv1.AddToScheme(scheme.Scheme) Expect(err).ToNot(HaveOccurred()) kClient := kclient.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(&corev1.Node{ @@ -130,7 +119,7 @@ var _ = Describe("Config Daemon", func() { Namespace: vars.Namespace, }}).Build() - kubeClient := fakek8s.NewSimpleClientset(&FakeSupportedNicIDs, &SriovDevicePluginPod) + kubeClient := fakek8s.NewSimpleClientset(&FakeSupportedNicIDs) snclient := snclientset.NewSimpleClientset() err = sriovnetworkv1.InitNicIDMapFromConfigMap(kubeClient, vars.Namespace) Expect(err).ToNot(HaveOccurred()) @@ -175,6 +164,22 @@ var _ = Describe("Config Daemon", func() { err := sut.Run(stopCh, exitCh) Expect(err).ToNot(HaveOccurred()) }() + + SriovDevicePluginPod = corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "sriov-device-plugin-xxxx", + Namespace: vars.Namespace, + Labels: map[string]string{ + "app": "sriov-device-plugin", + }, + }, + Spec: corev1.PodSpec{ + NodeName: "test-node", + }, + } + _, err = sut.kubeClient.CoreV1().Pods(vars.Namespace).Create(context.Background(), &SriovDevicePluginPod, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + }) AfterEach(func() { @@ -286,6 +291,34 @@ var _ = Describe("Config Daemon", func() { Expect(sut.desiredNodeState.GetGeneration()).To(BeNumerically("==", 777)) }) + + It("restart all the sriov-device-plugin pods present on the node", func() { + otherPod1 := SriovDevicePluginPod.DeepCopy() + otherPod1.Name = "sriov-device-plugin-xxxa" + _, err := sut.kubeClient.CoreV1().Pods(vars.Namespace).Create(context.Background(), otherPod1, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + otherPod2 := SriovDevicePluginPod.DeepCopy() + otherPod2.Name = "sriov-device-plugin-xxxz" + _, err = sut.kubeClient.CoreV1().Pods(vars.Namespace).Create(context.Background(), otherPod2, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + err = sut.restartDevicePluginPod() + Expect(err).ToNot(HaveOccurred()) + + Eventually(func() (int, error) { + podList, err := sut.kubeClient.CoreV1().Pods(vars.Namespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: "app=sriov-device-plugin", + FieldSelector: "spec.nodeName=test-node", + }) + + if err != nil { + return 0, err + } + + return len(podList.Items), nil + }, "1s").Should(BeZero()) + }) }) }) From 85063dc58f1c86d5d33c09e12a42504abd0217dd Mon Sep 17 00:00:00 2001 From: William Zhao Date: Thu, 10 Oct 2024 16:16:00 -0400 Subject: [PATCH 20/38] Add Intel Corporation Ethernet Controller E810-XXV for backplane, E823-L for SFP, E823-L for backplane for NetSec Accelerator Cards Fixes Issue #789 Signed-off-by: William Zhao --- deploy/configmap.yaml | 3 +++ .../sriov-network-operator-chart/templates/configmap.yaml | 3 +++ doc/supported-hardware.md | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/deploy/configmap.yaml b/deploy/configmap.yaml index b21b07ea6..8aa4cb970 100644 --- a/deploy/configmap.yaml +++ b/deploy/configmap.yaml @@ -16,8 +16,11 @@ data: Intel_ice_Columbiaville_E810-CQDA2_2CQDA2: "8086 1592 1889" Intel_ice_Columbiaville_E810-XXVDA4: "8086 1593 1889" Intel_ice_Columbiaville_E810-XXVDA2: "8086 159b 1889" + Intel_ice_Columbiaville_E810-XXV_BACKPLANE: "8086 1599 1889" Intel_ice_Columbiaville_E810: "8086 1591 1889" Intel_ice_Columbiapark_E823C: "8086 188a 1889" + Intel_ice_Columbiapark_E823L_SFP: "8086 124d 1889" + Intel_ice_Columbiapark_E823L_BACKPLANE: "8086 124c 1889" Nvidia_mlx5_ConnectX-4: "15b3 1013 1014" Nvidia_mlx5_ConnectX-4LX: "15b3 1015 1016" Nvidia_mlx5_ConnectX-5: "15b3 1017 1018" diff --git a/deployment/sriov-network-operator-chart/templates/configmap.yaml b/deployment/sriov-network-operator-chart/templates/configmap.yaml index 6f6ab3bcc..a4e467da8 100644 --- a/deployment/sriov-network-operator-chart/templates/configmap.yaml +++ b/deployment/sriov-network-operator-chart/templates/configmap.yaml @@ -16,8 +16,11 @@ data: Intel_ice_Columbiaville_E810-CQDA2_2CQDA2: "8086 1592 1889" Intel_ice_Columbiaville_E810-XXVDA4: "8086 1593 1889" Intel_ice_Columbiaville_E810-XXVDA2: "8086 159b 1889" + Intel_ice_Columbiaville_E810-XXV_BACKPLANE: "8086 1599 1889" Intel_ice_Columbiaville_E810: "8086 1591 1889" Intel_ice_Columbiapark_E823C: "8086 188a 1889" + Intel_ice_Columbiapark_E823L_SFP: "8086 124d 1889" + Intel_ice_Columbiapark_E823L_BACKPLANE: "8086 124c 1889" Nvidia_mlx5_ConnectX-4: "15b3 1013 1014" Nvidia_mlx5_ConnectX-4LX: "15b3 1015 1016" Nvidia_mlx5_ConnectX-5: "15b3 1017 1018" diff --git a/doc/supported-hardware.md b/doc/supported-hardware.md index 446190905..75b3fafcb 100644 --- a/doc/supported-hardware.md +++ b/doc/supported-hardware.md @@ -13,7 +13,10 @@ The following SR-IOV capable hardware is supported with sriov-network-operator: | Intel E810-CQDA2/2CQDA2 Family | 8086 | 1592 | | Intel E810-XXVDA4 Family | 8086 | 1593 | | Intel E810-XXVDA2 Family | 8086 | 159b | +| Intel E810-XXV Backplane Family | 8086 | 1599 | | Intel E823-C Family | 8086 | 188a | +| Intel E823-L SFP Family | 8086 | 124d | +| Intel E823-L Backplane Family | 8086 | 124c | | Mellanox MT27700 Family [ConnectX-4] | 15b3 | 1013 | | Mellanox MT27710 Family [ConnectX-4 Lx] | 15b3 | 1015 | | Mellanox MT27800 Family [ConnectX-5] | 15b3 | 1017 | @@ -53,7 +56,10 @@ The following table depicts the supported SR-IOV hardware features of each suppo | Intel E810-CQDA2/2CQDA2 Family | V | V | X | | Intel E810-XXVDA4 Family | V | V | X | | Intel E810-XXVDA2 Family | V | V | X | +| Intel E810-XXV Backplane Family | V | V | X | | Intel E823-C Family | V | V | X | +| Intel E823-L SFP Family | V | V | X | +| Intel E823-L Backplane Family | V | V | X | | Mellanox MT27700 Family [ConnectX-4] | V | V | V | | Mellanox MT27710 Family [ConnectX-4 Lx] | V | V | V | | Mellanox MT27800 Family [ConnectX-5] | V | V | V | From 6556c92a3d47b1c1d87136eff437d10dbe4c2562 Mon Sep 17 00:00:00 2001 From: Ivan Kolodiazhnyi Date: Thu, 19 Sep 2024 11:50:21 +0300 Subject: [PATCH 21/38] Add NVIDIA ConnectX-8 to supported NICs list Signed-off-by: Ivan Kolodiazhnyi --- deploy/configmap.yaml | 1 + .../sriov-network-operator-chart/templates/configmap.yaml | 1 + doc/supported-hardware.md | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/deploy/configmap.yaml b/deploy/configmap.yaml index b21b07ea6..98845d907 100644 --- a/deploy/configmap.yaml +++ b/deploy/configmap.yaml @@ -26,6 +26,7 @@ data: Nvidia_mlx5_ConnectX-6_Dx: "15b3 101d 101e" Nvidia_mlx5_ConnectX-6_Lx: "15b3 101f 101e" Nvidia_mlx5_ConnectX-7: "15b3 1021 101e" + Nvidia_mlx5_ConnectX-8: "15b3 1023 101e" Nvidia_mlx5_MT42822_BlueField-2_integrated_ConnectX-6_Dx: "15b3 a2d6 101e" Nvidia_mlx5_MT43244_BlueField-3_integrated_ConnectX-7_Dx: "15b3 a2dc 101e" Broadcom_bnxt_BCM57414_2x25G: "14e4 16d7 16dc" diff --git a/deployment/sriov-network-operator-chart/templates/configmap.yaml b/deployment/sriov-network-operator-chart/templates/configmap.yaml index 6f6ab3bcc..b250ddfe5 100644 --- a/deployment/sriov-network-operator-chart/templates/configmap.yaml +++ b/deployment/sriov-network-operator-chart/templates/configmap.yaml @@ -26,6 +26,7 @@ data: Nvidia_mlx5_ConnectX-6_Dx: "15b3 101d 101e" Nvidia_mlx5_ConnectX-6_Lx: "15b3 101f 101e" Nvidia_mlx5_ConnectX-7: "15b3 1021 101e" + Nvidia_mlx5_ConnectX-8: "15b3 1023 101e" Nvidia_mlx5_MT42822_BlueField-2_integrated_ConnectX-6_Dx: "15b3 a2d6 101e" Nvidia_mlx5_MT43244_BlueField-3_integrated_ConnectX-7_Dx: "15b3 a2dc 101e" Broadcom_bnxt_BCM57414_2x25G: "14e4 16d7 16dc" diff --git a/doc/supported-hardware.md b/doc/supported-hardware.md index 446190905..7e2c3002c 100644 --- a/doc/supported-hardware.md +++ b/doc/supported-hardware.md @@ -21,7 +21,8 @@ The following SR-IOV capable hardware is supported with sriov-network-operator: | Mellanox MT28908 Family [ConnectX-6] | 15b3 | 101b | | Mellanox MT28908 Family [ConnectX-6 Dx] | 15b3 | 101d | | Mellanox MT28908 Family [ConnectX-6 Lx] | 15b3 | 101f | -| Mellanox MT2910 Family [ConnectX-7 | 15b3 | 1021 | +| Mellanox MT2910 Family [ConnectX-7] | 15b3 | 1021 | +| Mellanox CX8 Family [ConnectX-8] | 15b3 | 1023 | | Mellanox MT42822 BlueField-2 integrated ConnectX-6 Dx | 15b3 | a2d6 | | Mellanox MT43244 BlueField-3 integrated ConnectX-7 Dx | 15b3 | a2dc | | Qlogic QL45000 Series 50GbE Controller | 1077 | 1654 | @@ -62,6 +63,7 @@ The following table depicts the supported SR-IOV hardware features of each suppo | Mellanox MT28908 Family [ConnectX-6 Dx] | V | V | V | | Mellanox MT28908 Family [ConnectX-6 Lx] | V | V | V | | Mellanox MT28908 Family [ConnectX-7] | V | V | V | +| Mellanox CX8 Family [ConnectX-8] | V | V | V | | Mellanox MT42822 BlueField-2 integrated ConnectX-6 Dx | V | V | V | | Mellanox MT43244 BlueField-3 integrated ConnectX-6 Dx | V | V | V | | Qlogic QL45000 Series 50GbE Controller | V | X | X | From 9782923ca92fc34454081ede724bbaed191da200 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Fri, 18 Oct 2024 13:11:18 +0200 Subject: [PATCH 22/38] logging: Reduce device discovering verbosity The `DiscoverSriovDevices` routine produces a huge amount of log entries, making debugging problems hard. Remove log entries that can produce a log line for each configured VF and which does not produce any change in the environment. Signed-off-by: Andrea Panattoni --- pkg/host/internal/network/network.go | 2 +- pkg/host/internal/vdpa/vdpa.go | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/host/internal/network/network.go b/pkg/host/internal/network/network.go index 2eb40dd69..ef85ad24a 100644 --- a/pkg/host/internal/network/network.go +++ b/pkg/host/internal/network/network.go @@ -75,7 +75,7 @@ func (n *network) TryToGetVirtualInterfaceName(pciAddr string) string { func (n *network) TryGetInterfaceName(pciAddr string) string { names, err := n.dputilsLib.GetNetNames(pciAddr) if err != nil || len(names) < 1 { - log.Log.Error(err, "TryGetInterfaceName(): failed to get interface name") + log.Log.Error(err, "TryGetInterfaceName(): failed to get interface name", "pciAddress", pciAddr) return "" } netDevName := names[0] diff --git a/pkg/host/internal/vdpa/vdpa.go b/pkg/host/internal/vdpa/vdpa.go index 4a41c63d1..e21d00cb6 100644 --- a/pkg/host/internal/vdpa/vdpa.go +++ b/pkg/host/internal/vdpa/vdpa.go @@ -94,11 +94,9 @@ func (v *vdpa) DeleteVDPADevice(pciAddr string) error { func (v *vdpa) DiscoverVDPAType(pciAddr string) string { expectedVDPAName := generateVDPADevName(pciAddr) funcLog := log.Log.WithValues("device", pciAddr, "name", expectedVDPAName) - funcLog.V(2).Info("DiscoverVDPAType() discover device type") _, err := v.netlinkLib.VDPAGetDevByName(expectedVDPAName) if err != nil { if errors.Is(err, syscall.ENODEV) { - funcLog.V(2).Info("DiscoverVDPAType(): VDPA device for VF not found") return "" } if errors.Is(err, syscall.ENOENT) { From b5b0d6b2177231d7faca9db52ced9c25f50cab0b Mon Sep 17 00:00:00 2001 From: Soule BA Date: Tue, 22 Oct 2024 14:28:56 +0200 Subject: [PATCH 23/38] Add a note in documentation regarding systemd mode Signed-off-by: Soule BA --- deployment/sriov-network-operator-chart/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/deployment/sriov-network-operator-chart/README.md b/deployment/sriov-network-operator-chart/README.md index a867613b2..d5d529dc9 100644 --- a/deployment/sriov-network-operator-chart/README.md +++ b/deployment/sriov-network-operator-chart/README.md @@ -135,6 +135,11 @@ This section contains general parameters that apply to both the operator and dae | `sriovOperatorConfig.configurationMode` | string | `daemon` | sriov-network-config-daemon configuration mode. either `daemon` or `systemd` | | `sriovOperatorConfig.featureGates` | map[string]bool | `{}` | feature gates to enable/disable | +**Note** + +When `sriovOperatorConfig.configurationMode` is configured as `systemd`, configurations files and `systemd` service files are created on the node. +Upon chart deletion, those files are not cleaned up. For cases where this is not acceptable, users should rather configured the `daemon` mode. + ### Images parameters | Name | description | From dc299c464d838a4d73dffe1978cc9edac0bc64fb Mon Sep 17 00:00:00 2001 From: Ido Heyvi Date: Sun, 27 Oct 2024 16:04:24 +0200 Subject: [PATCH 24/38] Fixing daemon sriov VFs config, where PF pci address got unbind instead of VF address, in case of using IB link type Signed-off-by: Ido Heyvi --- pkg/host/internal/sriov/sriov.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/host/internal/sriov/sriov.go b/pkg/host/internal/sriov/sriov.go index bd453ae30..bf9919a7e 100644 --- a/pkg/host/internal/sriov/sriov.go +++ b/pkg/host/internal/sriov/sriov.go @@ -484,7 +484,7 @@ func (s *sriov) configSriovVFDevices(iface *sriovnetworkv1.Interface) error { if err := s.infinibandHelper.ConfigureVfGUID(addr, iface.PciAddress, vfID, pfLink); err != nil { return err } - if err := s.kernelHelper.Unbind(iface.PciAddress); err != nil { + if err := s.kernelHelper.Unbind(addr); err != nil { return err } } else { From df1407d3a6af01f9d5e8bfe859ef39e07d29aaa3 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Tue, 29 Oct 2024 13:51:45 +0200 Subject: [PATCH 25/38] Fix k8s CI have a service that will load the br_netfilter driver after reboot Signed-off-by: Sebastian Sch --- hack/run-e2e-conformance-virtual-cluster.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/hack/run-e2e-conformance-virtual-cluster.sh b/hack/run-e2e-conformance-virtual-cluster.sh index 1a75a280d..d6fa44fd9 100755 --- a/hack/run-e2e-conformance-virtual-cluster.sh +++ b/hack/run-e2e-conformance-virtual-cluster.sh @@ -196,6 +196,22 @@ WantedBy=default.target' > /etc/systemd/system/disable-offload.service systemctl daemon-reload systemctl enable --now disable-offload +echo '[Unit] +Description=load br_netfilter +After=network.target + +[Service] +Type=oneshot +ExecStart=/usr/bin/bash -c "modprobe br_netfilter" +StandardOutput=journal+console +StandardError=journal+console + +[Install] +WantedBy=default.target' > /etc/systemd/system/load-br-netfilter.service + +systemctl daemon-reload +systemctl enable --now load-br-netfilter + systemctl restart NetworkManager EOF From 0d9a7070041b8256cd0ece643d0f91aa4cd1e5bc Mon Sep 17 00:00:00 2001 From: Ido Heyvi Date: Sun, 13 Oct 2024 10:52:43 +0300 Subject: [PATCH 26/38] adding sriov operator config finalizer, to control generated cluster level objects cleanup Signed-off-by: Ido Heyvi --- api/v1/helper.go | 11 ++--- controllers/sriovoperatorconfig_controller.go | 37 ++++++++++++++++- .../sriovoperatorconfig_controller_test.go | 40 ++++++++++++++++++- 3 files changed, 80 insertions(+), 8 deletions(-) diff --git a/api/v1/helper.go b/api/v1/helper.go index bfdfbc473..62ea0d2a5 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -26,11 +26,12 @@ import ( ) const ( - LASTNETWORKNAMESPACE = "operator.sriovnetwork.openshift.io/last-network-namespace" - NETATTDEFFINALIZERNAME = "netattdef.finalizers.sriovnetwork.openshift.io" - POOLCONFIGFINALIZERNAME = "poolconfig.finalizers.sriovnetwork.openshift.io" - ESwithModeLegacy = "legacy" - ESwithModeSwitchDev = "switchdev" + LASTNETWORKNAMESPACE = "operator.sriovnetwork.openshift.io/last-network-namespace" + NETATTDEFFINALIZERNAME = "netattdef.finalizers.sriovnetwork.openshift.io" + POOLCONFIGFINALIZERNAME = "poolconfig.finalizers.sriovnetwork.openshift.io" + OPERATORCONFIGFINALIZERNAME = "operatorconfig.finalizers.sriovnetwork.openshift.io" + ESwithModeLegacy = "legacy" + ESwithModeSwitchDev = "switchdev" SriovCniStateEnable = "enable" SriovCniStateDisable = "disable" diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index 377ebd2de..c9f21f428 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -40,6 +40,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/go-logr/logr" machinev1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" @@ -83,8 +84,6 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. if err != nil { if apierrors.IsNotFound(err) { logger.Info("default SriovOperatorConfig object not found. waiting for creation.") - - err := r.deleteAllWebhooks(ctx) return reconcile.Result{}, err } // Error reading the object - requeue the request. @@ -94,6 +93,19 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. snolog.SetLogLevel(defaultConfig.Spec.LogLevel) + // examine DeletionTimestamp to determine if object is under deletion + if !defaultConfig.ObjectMeta.DeletionTimestamp.IsZero() { + // The object is being deleted + return r.handleSriovOperatorConfigDeletion(ctx, defaultConfig, logger) + } + // add finalizer if needed + if !sriovnetworkv1.StringInArray(sriovnetworkv1.OPERATORCONFIGFINALIZERNAME, defaultConfig.ObjectMeta.Finalizers) { + defaultConfig.ObjectMeta.Finalizers = append(defaultConfig.ObjectMeta.Finalizers, sriovnetworkv1.OPERATORCONFIGFINALIZERNAME) + if err := r.Update(ctx, defaultConfig); err != nil { + return reconcile.Result{}, err + } + } + r.FeatureGate.Init(defaultConfig.Spec.FeatureGates) logger.Info("enabled featureGates", "featureGates", r.FeatureGate.String()) @@ -434,6 +446,27 @@ func (r *SriovOperatorConfigReconciler) syncOpenShiftSystemdService(ctx context. return r.setLabelInsideObject(ctx, cr, objs) } +func (r *SriovOperatorConfigReconciler) handleSriovOperatorConfigDeletion(ctx context.Context, + defaultConfig *sriovnetworkv1.SriovOperatorConfig, logger logr.Logger) (ctrl.Result, error) { + var err error + if sriovnetworkv1.StringInArray(sriovnetworkv1.OPERATORCONFIGFINALIZERNAME, defaultConfig.ObjectMeta.Finalizers) { + // our finalizer is present, so lets handle any external dependency + logger.Info("delete SriovOperatorConfig CR", "Namespace", defaultConfig.Namespace, "Name", defaultConfig.Name) + // make sure webhooks objects are deleted prior of removing finalizer + err = r.deleteAllWebhooks(ctx) + if err != nil { + return reconcile.Result{}, err + } + // remove our finalizer from the list and update it. + defaultConfig.ObjectMeta.Finalizers, _ = sriovnetworkv1.RemoveString(sriovnetworkv1.OPERATORCONFIGFINALIZERNAME, defaultConfig.ObjectMeta.Finalizers) + if err := r.Update(ctx, defaultConfig); err != nil { + return reconcile.Result{}, err + } + } + + return reconcile.Result{}, err +} + func (r SriovOperatorConfigReconciler) setLabelInsideObject(ctx context.Context, cr *sriovnetworkv1.SriovOperatorConfig, objs []*uns.Unstructured) error { logger := log.Log.WithName("setLabelInsideObject") for _, obj := range objs { diff --git a/controllers/sriovoperatorconfig_controller_test.go b/controllers/sriovoperatorconfig_controller_test.go index 7f6db3522..47e4fc09d 100644 --- a/controllers/sriovoperatorconfig_controller_test.go +++ b/controllers/sriovoperatorconfig_controller_test.go @@ -102,9 +102,15 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { Context("When is up", func() { BeforeEach(func() { + var err error config := &sriovnetworkv1.SriovOperatorConfig{} - err := util.WaitForNamespacedObject(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout) + err = util.WaitForNamespacedObject(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout) Expect(err).NotTo(HaveOccurred()) + // in case controller yet to add object's finalizer (e.g whenever test deferCleanup is creating new 'default' config object) + if len(config.Finalizers) == 0 { + err = util.WaitForNamespacedObject(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout) + Expect(err).NotTo(HaveOccurred()) + } config.Spec = sriovnetworkv1.SriovOperatorConfigSpec{ EnableInjector: true, EnableOperatorWebhook: true, @@ -240,6 +246,38 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { client.ObjectKey{Name: "network-resources-injector-config"}) }) + It("should add/delete finalizer 'operatorconfig' when SriovOperatorConfig/default is added/deleted", func() { + DeferCleanup(k8sClient.Create, context.Background(), makeDefaultSriovOpConfig()) + + // verify that finalizer has been added upon object creation + config := &sriovnetworkv1.SriovOperatorConfig{} + Eventually(func() []string { + // wait for SriovOperatorConfig flags to get updated + err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config) + if err != nil { + return nil + } + return config.Finalizers + }, util.APITimeout, util.RetryInterval).Should(Equal([]string{sriovnetworkv1.OPERATORCONFIGFINALIZERNAME})) + + err := k8sClient.Delete(context.Background(), &sriovnetworkv1.SriovOperatorConfig{ + ObjectMeta: metav1.ObjectMeta{Namespace: testNamespace, Name: "default"}, + }) + Expect(err).NotTo(HaveOccurred()) + + // verify that finalizer has been removed + var empty []string + config = &sriovnetworkv1.SriovOperatorConfig{} + Eventually(func() []string { + // wait for SriovOperatorConfig flags to get updated + err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config) + if err != nil { + return nil + } + return config.Finalizers + }, util.APITimeout, util.RetryInterval).Should(Equal(empty)) + }) + It("should be able to update the node selector of sriov-network-config-daemon", func() { By("specify the configDaemonNodeSelector") nodeSelector := map[string]string{"node-role.kubernetes.io/worker": ""} From b1bb0443823ed741d52f8fc55739d93dd6dc0ef6 Mon Sep 17 00:00:00 2001 From: Ido Heyvi Date: Mon, 28 Oct 2024 15:14:05 +0200 Subject: [PATCH 27/38] adding sriov operator config cleanup binary, to be used under helm uninstall pre-delete hook Signed-off-by: Ido Heyvi --- Dockerfile | 2 + Makefile | 2 +- .../cleanup.go | 83 ++++++++ .../cleanup_test.go | 177 ++++++++++++++++++ .../main.go | 38 ++++ .../suite_test.go | 121 ++++++++++++ .../templates/pre-delete-webooks.yaml | 27 +++ 7 files changed, 449 insertions(+), 1 deletion(-) create mode 100644 cmd/sriov-network-operator-config-cleanup/cleanup.go create mode 100644 cmd/sriov-network-operator-config-cleanup/cleanup_test.go create mode 100644 cmd/sriov-network-operator-config-cleanup/main.go create mode 100644 cmd/sriov-network-operator-config-cleanup/suite_test.go create mode 100644 deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml diff --git a/Dockerfile b/Dockerfile index 2b26247e8..7735bef7b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,9 +2,11 @@ FROM golang:1.22 AS builder WORKDIR /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator COPY . . RUN make _build-manager BIN_PATH=build/_output/cmd +RUN make _build-sriov-network-operator-config-cleanup BIN_PATH=build/_output/cmd FROM quay.io/centos/centos:stream9 COPY --from=builder /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator/build/_output/cmd/manager /usr/bin/sriov-network-operator +COPY --from=builder /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator/build/_output/cmd/sriov-network-operator-config-cleanup /usr/bin/sriov-network-operator-config-cleanup COPY bindata /bindata ENV OPERATOR_NAME=sriov-network-operator CMD ["/usr/bin/sriov-network-operator"] diff --git a/Makefile b/Makefile index 3718b75bd..310f1dc52 100644 --- a/Makefile +++ b/Makefile @@ -60,7 +60,7 @@ GOLANGCI_LINT_VER = v1.55.2 all: generate lint build -build: manager _build-sriov-network-config-daemon _build-webhook +build: manager _build-sriov-network-config-daemon _build-webhook _build-sriov-network-operator-config-cleanup _build-%: WHAT=$* hack/build-go.sh diff --git a/cmd/sriov-network-operator-config-cleanup/cleanup.go b/cmd/sriov-network-operator-config-cleanup/cleanup.go new file mode 100644 index 000000000..e53deba34 --- /dev/null +++ b/cmd/sriov-network-operator-config-cleanup/cleanup.go @@ -0,0 +1,83 @@ +package main + +import ( + "context" + "time" + + "github.com/spf13/cobra" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/log" + + snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" + + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/watch" + + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned/typed/sriovnetwork/v1" +) + +var ( + namespace string + watchTO int +) + +func init() { + rootCmd.Flags().StringVarP(&namespace, "namespace", "n", "", "designated SriovOperatorConfig namespace") + rootCmd.Flags().IntVarP(&watchTO, "watch-timeout", "w", 10, "sriov-operator config post-delete watch timeout ") +} + +func runCleanupCmd(cmd *cobra.Command, args []string) error { + // init logger + snolog.InitLog() + setupLog := log.Log.WithName("sriov-network-operator-config-cleanup") + setupLog.Info("Run sriov-network-operator-config-cleanup") + + // adding context timeout although client-go Delete should be non-blocking by default + ctx, timeoutFunc := context.WithTimeout(context.Background(), time.Second*time.Duration(watchTO)) + defer timeoutFunc() + + restConfig := ctrl.GetConfigOrDie() + sriovcs, err := sriovnetworkv1.NewForConfig(restConfig) + if err != nil { + setupLog.Error(err, "failed to create 'sriovnetworkv1' clientset") + } + + err = sriovcs.SriovOperatorConfigs(namespace).Delete(context.Background(), "default", metav1.DeleteOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return nil + } + setupLog.Error(err, "failed to delete SriovOperatorConfig") + return err + } + + // watching 'default' config deletion with context timeout, in case sriov-operator fails to delete 'default' config + watcher, err := sriovcs.SriovOperatorConfigs(namespace).Watch(ctx, metav1.ListOptions{Watch: true}) + if err != nil { + setupLog.Error(err, "failed creating 'default' SriovOperatorConfig object watcher") + return err + } + defer watcher.Stop() + for { + select { + case event := <-watcher.ResultChan(): + if event.Type == watch.Deleted { + setupLog.Info("'default' SriovOperatorConfig is deleted") + return nil + } + + case <-ctx.Done(): + // check whether object might has been deleted before watch event triggered + _, err := sriovcs.SriovOperatorConfigs(namespace).Get(context.Background(), "default", metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return nil + } + } + err = ctx.Err() + setupLog.Error(err, "timeout has occurred for 'default' SriovOperatorConfig deletion") + return err + } + } +} diff --git a/cmd/sriov-network-operator-config-cleanup/cleanup_test.go b/cmd/sriov-network-operator-config-cleanup/cleanup_test.go new file mode 100644 index 000000000..f7926d834 --- /dev/null +++ b/cmd/sriov-network-operator-config-cleanup/cleanup_test.go @@ -0,0 +1,177 @@ +package main + +import ( + "context" + "sync" + + "github.com/golang/mock/gomock" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/spf13/cobra" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/manager" + + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/controllers" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" + mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" +) + +type configController struct { + k8sManager manager.Manager + ctx context.Context + cancel context.CancelFunc + wg *sync.WaitGroup +} + +var ( + controller *configController + testNamespace string = "sriov-network-operator" + defaultSriovOperatorSpec = sriovnetworkv1.SriovOperatorConfigSpec{ + EnableInjector: true, + EnableOperatorWebhook: true, + LogLevel: 2, + FeatureGates: nil, + } +) + +var _ = Describe("cleanup", Ordered, func() { + BeforeAll(func() { + By("Create SriovOperatorConfig controller k8s objs") + config := getDefaultSriovOperatorConfig() + Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) + + somePolicy := &sriovnetworkv1.SriovNetworkNodePolicy{} + somePolicy.SetNamespace(testNamespace) + somePolicy.SetName("some-policy") + somePolicy.Spec = sriovnetworkv1.SriovNetworkNodePolicySpec{ + NumVfs: 5, + NodeSelector: map[string]string{"foo": "bar"}, + NicSelector: sriovnetworkv1.SriovNetworkNicSelector{}, + Priority: 20, + } + Expect(k8sClient.Create(context.Background(), somePolicy)).ToNot(HaveOccurred()) + DeferCleanup(func() { + err := k8sClient.Delete(context.Background(), somePolicy) + Expect(err).ToNot(HaveOccurred()) + }) + + controller = newConfigController() + + }) + + It("test webhook cleanup flow", func() { + controller.start() + defer controller.stop() + + cmd := &cobra.Command{} + namespace = testNamespace + // verify that finalizer has been added, by controller, upon object creation + config := &sriovnetworkv1.SriovOperatorConfig{} + Eventually(func() []string { + // wait for SriovOperatorConfig flags to get updated + err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config) + if err != nil { + return nil + } + return config.Finalizers + }, util.APITimeout, util.RetryInterval).Should(Equal([]string{sriovnetworkv1.OPERATORCONFIGFINALIZERNAME})) + + Expect(runCleanupCmd(cmd, []string{})).Should(Succeed()) + config = &sriovnetworkv1.SriovOperatorConfig{} + err := util.WaitForNamespacedObjectDeleted(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout) + Expect(err).NotTo(HaveOccurred()) + + }) + + It("test 'default' config cleanup timeout", func() { + // in this test case sriov-operator controller has been scaled down. + // we are testing returned ctx timeout error, for not being able to delete 'default' config object + config := getDefaultSriovOperatorConfig() + config.Finalizers = []string{sriovnetworkv1.OPERATORCONFIGFINALIZERNAME} + Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) + + cmd := &cobra.Command{} + namespace = testNamespace + // verify that finalizer has been added, by controller, upon object creation + config = &sriovnetworkv1.SriovOperatorConfig{} + Eventually(func() []string { + // wait for SriovOperatorConfig flags to get updated + err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config) + if err != nil { + return nil + } + return config.Finalizers + }, util.APITimeout, util.RetryInterval).Should(Equal([]string{sriovnetworkv1.OPERATORCONFIGFINALIZERNAME})) + + watchTO = 1 + err := runCleanupCmd(cmd, []string{}) + Expect(err.Error()).To(ContainSubstring("context deadline exceeded")) + }) +}) + +func getDefaultSriovOperatorConfig() *sriovnetworkv1.SriovOperatorConfig { + return &sriovnetworkv1.SriovOperatorConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "default", + Namespace: testNamespace, + }, + Spec: defaultSriovOperatorSpec, + } +} + +func newConfigController() *configController { + // setup controller manager + By("Setup controller manager") + k8sManager, err := ctrl.NewManager(cfg, ctrl.Options{ + Scheme: scheme.Scheme, + }) + Expect(err).ToNot(HaveOccurred()) + + t := GinkgoT() + mockCtrl := gomock.NewController(t) + platformHelper := mock_platforms.NewMockInterface(mockCtrl) + platformHelper.EXPECT().GetFlavor().Return(openshift.OpenshiftFlavorDefault).AnyTimes() + platformHelper.EXPECT().IsOpenshiftCluster().Return(false).AnyTimes() + platformHelper.EXPECT().IsHypershift().Return(false).AnyTimes() + + err = (&controllers.SriovOperatorConfigReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + PlatformHelper: platformHelper, + FeatureGate: featuregate.New(), + }).SetupWithManager(k8sManager) + Expect(err).ToNot(HaveOccurred()) + + ctx, cancel := context.WithCancel(context.Background()) + wg := sync.WaitGroup{} + controller = &configController{ + k8sManager: k8sManager, + ctx: ctx, + cancel: cancel, + wg: &wg, + } + + return controller +} + +func (c *configController) start() { + c.wg.Add(1) + go func() { + defer c.wg.Done() + defer GinkgoRecover() + By("Start controller manager") + err := c.k8sManager.Start(c.ctx) + Expect(err).ToNot(HaveOccurred()) + }() +} + +func (c *configController) stop() { + c.cancel() + c.wg.Wait() +} diff --git a/cmd/sriov-network-operator-config-cleanup/main.go b/cmd/sriov-network-operator-config-cleanup/main.go new file mode 100644 index 000000000..51874e54e --- /dev/null +++ b/cmd/sriov-network-operator-config-cleanup/main.go @@ -0,0 +1,38 @@ +package main + +import ( + "flag" + "os" + + "github.com/spf13/cobra" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/log" + + snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" +) + +const ( + componentName = "sriov-network-operator-config-cleanup" +) + +var ( + rootCmd = &cobra.Command{ + Use: componentName, + Short: "Removes 'default' SriovOperatorConfig", + Long: `Removes 'default' SriovOperatorConfig in order to cleanup non-namespaced objects e.g clusterroles/clusterrolebinding/validating/mutating webhooks + +Example: sriov-network-operator-config-cleanup -n `, + RunE: runCleanupCmd, + } +) + +func main() { + klog.InitFlags(nil) + snolog.BindFlags(flag.CommandLine) + rootCmd.PersistentFlags().AddGoFlagSet(flag.CommandLine) + + if err := rootCmd.Execute(); err != nil { + log.Log.Error(err, "Error executing sriov-network-operator-config-cleanup") + os.Exit(1) + } +} diff --git a/cmd/sriov-network-operator-config-cleanup/suite_test.go b/cmd/sriov-network-operator-config-cleanup/suite_test.go new file mode 100644 index 000000000..ee1815ff7 --- /dev/null +++ b/cmd/sriov-network-operator-config-cleanup/suite_test.go @@ -0,0 +1,121 @@ +package main + +import ( + "context" + "io/fs" + "os" + "path/filepath" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "go.uber.org/zap/zapcore" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + + //+kubebuilder:scaffold:imports + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" + + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + "sigs.k8s.io/controller-runtime/pkg/log/zap" +) + +var ( + k8sClient client.Client + testEnv *envtest.Environment + cfg *rest.Config + kubecfgPath string +) + +var _ = BeforeSuite(func() { + + logf.SetLogger(zap.New( + zap.WriteTo(GinkgoWriter), + zap.UseDevMode(true), + func(o *zap.Options) { + o.TimeEncoder = zapcore.RFC3339NanoTimeEncoder + })) + + // Go to project root directory + err := os.Chdir("../..") + Expect(err).NotTo(HaveOccurred()) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("config", "crd", "bases"), filepath.Join("test", "util", "crds")}, + ErrorIfCRDPathMissing: true, + } + + testEnv.ControlPlane.GetAPIServer().Configure().Set("disable-admission-plugins", "MutatingAdmissionWebhook", "ValidatingAdmissionWebhook") + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + apiserverDir := testEnv.ControlPlane.GetAPIServer().CertDir + kubecfgPath = findKubecfg(apiserverDir, ".kubecfg") + err = os.Setenv("KUBECONFIG", kubecfgPath) + Expect(err).NotTo(HaveOccurred()) + + By("registering schemes") + err = sriovnetworkv1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + vars.Config = cfg + vars.Scheme = scheme.Scheme + vars.Namespace = testNamespace + + By("creating K8s client") + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + + By("creating default/common k8s objects for tests") + // Create test namespace + ns := &corev1.Namespace{ + TypeMeta: metav1.TypeMeta{}, + ObjectMeta: metav1.ObjectMeta{ + Name: testNamespace, + }, + Spec: corev1.NamespaceSpec{}, + Status: corev1.NamespaceStatus{}, + } + ctx := context.Background() + Expect(k8sClient.Create(ctx, ns)).Should(Succeed()) +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + if testEnv != nil { + Eventually(func() error { + return testEnv.Stop() + }, util.APITimeout, time.Second).ShouldNot(HaveOccurred()) + } +}) + +func findKubecfg(path, ext string) string { + var cfg string + filepath.WalkDir(path, func(s string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if filepath.Ext(d.Name()) == ext { + cfg = s + } + return nil + }) + return cfg +} + +func TestAPIs(t *testing.T) { + _, reporterConfig := GinkgoConfiguration() + + RegisterFailHandler(Fail) + + RunSpecs(t, "operator-webhook Suite", reporterConfig) +} diff --git a/deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml b/deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml new file mode 100644 index 000000000..8fc7fa06b --- /dev/null +++ b/deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml @@ -0,0 +1,27 @@ +# The following job will be used as Helm pre-delete hook. It executes a small go-client binary +# which intent to delete 'default' SriovOperatorConfig, that triggers operator removal of generated cluster objects +# e.g. mutating/validating webhooks, within operator's recoinciling loop and +# preventing operator cluster object remainings while using helm uninstall +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "sriov-network-operator.fullname" . }}-pre-delete-hook + namespace: {{ .Release.Namespace }} + annotations: + "helm.sh/hook": pre-delete + "helm.sh/hook-delete-policy": hook-succeeded,hook-failed +spec: + template: + spec: + serviceAccountName: {{ include "sriov-network-operator.fullname" . }} + containers: + - name: cleanup + image: {{ .Values.images.operator }} + command: + - sriov-network-operator-config-cleanup + args: + - --namespace + - {{ .Release.Namespace }} + restartPolicy: Never + backoffLimit: 2 + From 6d32ec0745d31821eddfcf77a2a314ddb146c0e8 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Fri, 25 Oct 2024 09:14:08 +0200 Subject: [PATCH 28/38] kernel: Set arguments based on CPU architecture Kernel arguments like `intel_iommu=on` does not have sense on AMD or ARM systems and some user might complain about their presence, though they are likely to be harmless. Also, on ARM systems the `iommu.passthrough` parameter is the one to use [1]. Improve `GHWLib` to bridge CPU information from the library. Add `CpuInfoProviderInterface` and inject it into the GenericPlugin to implement the per CPU vendor logic. [1] https://github.com/torvalds/linux/blob/master/Documentation/admin-guide/kernel-parameters.txt#L2343 Signed-off-by: Andrea Panattoni --- pkg/consts/constants.go | 7 ++-- pkg/helper/mock/mock_helper.go | 15 +++++++ pkg/host/internal/cpu/cpu.go | 40 ++++++++++++++++++ pkg/host/internal/lib/ghw/ghw.go | 8 ++++ pkg/host/internal/lib/ghw/mock/mock_ghw.go | 16 ++++++++ pkg/host/manager.go | 5 +++ pkg/host/mock/mock_host.go | 15 +++++++ pkg/host/types/interfaces.go | 13 ++++++ pkg/plugins/generic/generic_plugin.go | 27 ++++++++++++- pkg/plugins/generic/generic_plugin_test.go | 47 +++++++++++++++++----- 10 files changed, 178 insertions(+), 15 deletions(-) create mode 100644 pkg/host/internal/cpu/cpu.go diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index f3c076111..f7025c90d 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -121,9 +121,10 @@ const ( `IMPORT{program}="/etc/udev/switchdev-vf-link-name.sh $attr{phys_port_name}", ` + `NAME="%s_$env{NUMBER}"` - KernelArgPciRealloc = "pci=realloc" - KernelArgIntelIommu = "intel_iommu=on" - KernelArgIommuPt = "iommu=pt" + KernelArgPciRealloc = "pci=realloc" + KernelArgIntelIommu = "intel_iommu=on" + KernelArgIommuPt = "iommu=pt" + KernelArgIommuPassthrough = "iommu.passthrough=1" // Feature gates // ParallelNicConfigFeatureGate: allow to configure nics in parallel diff --git a/pkg/helper/mock/mock_helper.go b/pkg/helper/mock/mock_helper.go index cfca2a768..432d741be 100644 --- a/pkg/helper/mock/mock_helper.go +++ b/pkg/helper/mock/mock_helper.go @@ -351,6 +351,21 @@ func (mr *MockHostHelpersInterfaceMockRecorder) EnableService(service interface{ return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EnableService", reflect.TypeOf((*MockHostHelpersInterface)(nil).EnableService), service) } +// GetCPUVendor mocks base method. +func (m *MockHostHelpersInterface) GetCPUVendor() (types.CPUVendor, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetCPUVendor") + ret0, _ := ret[0].(types.CPUVendor) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetCPUVendor indicates an expected call of GetCPUVendor. +func (mr *MockHostHelpersInterfaceMockRecorder) GetCPUVendor() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetCPUVendor", reflect.TypeOf((*MockHostHelpersInterface)(nil).GetCPUVendor)) +} + // GetCheckPointNodeState mocks base method. func (m *MockHostHelpersInterface) GetCheckPointNodeState() (*v1.SriovNetworkNodeState, error) { m.ctrl.T.Helper() diff --git a/pkg/host/internal/cpu/cpu.go b/pkg/host/internal/cpu/cpu.go new file mode 100644 index 000000000..fd02157e6 --- /dev/null +++ b/pkg/host/internal/cpu/cpu.go @@ -0,0 +1,40 @@ +package cpu + +import ( + "fmt" + + ghwPkg "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/lib/ghw" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/types" +) + +type cpuInfoProvider struct { + ghwLib ghwPkg.GHWLib +} + +func New(ghwLib ghwPkg.GHWLib) *cpuInfoProvider { + return &cpuInfoProvider{ + ghwLib: ghwLib, + } +} + +func (c *cpuInfoProvider) GetCPUVendor() (types.CPUVendor, error) { + cpuInfo, err := c.ghwLib.CPU() + if err != nil { + return -1, fmt.Errorf("can't retrieve the CPU vendor: %w", err) + } + + if len(cpuInfo.Processors) == 0 { + return -1, fmt.Errorf("wrong CPU information retrieved: %v", cpuInfo) + } + + switch cpuInfo.Processors[0].Vendor { + case "GenuineIntel": + return types.CPUVendorIntel, nil + case "AuthenticAMD": + return types.CPUVendorAMD, nil + case "ARM": + return types.CPUVendorARM, nil + } + + return -1, fmt.Errorf("unknown CPU vendor: %s", cpuInfo.Processors[0].Vendor) +} diff --git a/pkg/host/internal/lib/ghw/ghw.go b/pkg/host/internal/lib/ghw/ghw.go index 6a6829604..d518977e4 100644 --- a/pkg/host/internal/lib/ghw/ghw.go +++ b/pkg/host/internal/lib/ghw/ghw.go @@ -2,6 +2,7 @@ package ghw import ( "github.com/jaypipes/ghw" + "github.com/jaypipes/ghw/pkg/cpu" ) func New() GHWLib { @@ -12,6 +13,9 @@ func New() GHWLib { type GHWLib interface { // PCI returns a pointer to an Info that provide methods to access info about devices PCI() (Info, error) + + // CPU returns a pointer to an Info that provide methods to access info about devices + CPU() (*cpu.Info, error) } // Info interface provide methods to access info about devices @@ -27,3 +31,7 @@ type libWrapper struct{} func (w *libWrapper) PCI() (Info, error) { return ghw.PCI() } + +func (w *libWrapper) CPU() (*cpu.Info, error) { + return ghw.CPU() +} diff --git a/pkg/host/internal/lib/ghw/mock/mock_ghw.go b/pkg/host/internal/lib/ghw/mock/mock_ghw.go index 2e2b4b5c5..9d6092362 100644 --- a/pkg/host/internal/lib/ghw/mock/mock_ghw.go +++ b/pkg/host/internal/lib/ghw/mock/mock_ghw.go @@ -9,6 +9,7 @@ import ( gomock "github.com/golang/mock/gomock" ghw "github.com/jaypipes/ghw" + cpu "github.com/jaypipes/ghw/pkg/cpu" ghw0 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/lib/ghw" ) @@ -35,6 +36,21 @@ func (m *MockGHWLib) EXPECT() *MockGHWLibMockRecorder { return m.recorder } +// CPU mocks base method. +func (m *MockGHWLib) CPU() (*cpu.Info, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CPU") + ret0, _ := ret[0].(*cpu.Info) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// CPU indicates an expected call of CPU. +func (mr *MockGHWLibMockRecorder) CPU() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CPU", reflect.TypeOf((*MockGHWLib)(nil).CPU)) +} + // PCI mocks base method. func (m *MockGHWLib) PCI() (ghw0.Info, error) { m.ctrl.T.Helper() diff --git a/pkg/host/manager.go b/pkg/host/manager.go index 02a77a659..44bd45807 100644 --- a/pkg/host/manager.go +++ b/pkg/host/manager.go @@ -2,6 +2,7 @@ package host import ( "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/bridge" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/cpu" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/infiniband" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/kernel" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/lib/dputils" @@ -30,6 +31,7 @@ type HostManagerInterface interface { types.VdpaInterface types.InfinibandInterface types.BridgeInterface + types.CPUInfoProviderInterface } type hostManager struct { @@ -42,6 +44,7 @@ type hostManager struct { types.VdpaInterface types.InfinibandInterface types.BridgeInterface + types.CPUInfoProviderInterface } func NewHostManager(utilsInterface utils.CmdInterface) (HostManagerInterface, error) { @@ -61,6 +64,7 @@ func NewHostManager(utilsInterface utils.CmdInterface) (HostManagerInterface, er } br := bridge.New() sr := sriov.New(utilsInterface, k, n, u, v, ib, netlinkLib, dpUtils, sriovnetLib, ghwLib, br) + cpuInfoProvider := cpu.New(ghwLib) return &hostManager{ utilsInterface, k, @@ -71,5 +75,6 @@ func NewHostManager(utilsInterface utils.CmdInterface) (HostManagerInterface, er v, ib, br, + cpuInfoProvider, }, nil } diff --git a/pkg/host/mock/mock_host.go b/pkg/host/mock/mock_host.go index cb4d1480a..5ebed46aa 100644 --- a/pkg/host/mock/mock_host.go +++ b/pkg/host/mock/mock_host.go @@ -321,6 +321,21 @@ func (mr *MockHostManagerInterfaceMockRecorder) EnableService(service interface{ return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EnableService", reflect.TypeOf((*MockHostManagerInterface)(nil).EnableService), service) } +// GetCPUVendor mocks base method. +func (m *MockHostManagerInterface) GetCPUVendor() (types.CPUVendor, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetCPUVendor") + ret0, _ := ret[0].(types.CPUVendor) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetCPUVendor indicates an expected call of GetCPUVendor. +func (mr *MockHostManagerInterfaceMockRecorder) GetCPUVendor() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetCPUVendor", reflect.TypeOf((*MockHostManagerInterface)(nil).GetCPUVendor)) +} + // GetCurrentKernelArgs mocks base method. func (m *MockHostManagerInterface) GetCurrentKernelArgs() (string, error) { m.ctrl.T.Helper() diff --git a/pkg/host/types/interfaces.go b/pkg/host/types/interfaces.go index 5918dca34..c6e0c8faf 100644 --- a/pkg/host/types/interfaces.go +++ b/pkg/host/types/interfaces.go @@ -187,3 +187,16 @@ type InfinibandInterface interface { // ConfigureVfGUID configures and sets a GUID for an IB VF device ConfigureVfGUID(vfAddr string, pfAddr string, vfID int, pfLink netlink.Link) error } + +type CPUVendor int + +const ( + CPUVendorIntel CPUVendor = iota + CPUVendorAMD + CPUVendorARM +) + +type CPUInfoProviderInterface interface { + // Retrieve the CPU vendor of the current system + GetCPUVendor() (CPUVendor, error) +} diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index 14b1903e5..552f8142a 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -13,6 +13,7 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" + hostTypes "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/types" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" @@ -419,9 +420,31 @@ func (p *GenericPlugin) shouldConfigureBridges() bool { func (p *GenericPlugin) addVfioDesiredKernelArg(state *sriovnetworkv1.SriovNetworkNodeState) { driverState := p.DriverStateMap[Vfio] + + kernelArgFnByCPUVendor := map[hostTypes.CPUVendor]func(){ + hostTypes.CPUVendorIntel: func() { + p.addToDesiredKernelArgs(consts.KernelArgIntelIommu) + p.addToDesiredKernelArgs(consts.KernelArgIommuPt) + }, + hostTypes.CPUVendorAMD: func() { + p.addToDesiredKernelArgs(consts.KernelArgIommuPt) + }, + hostTypes.CPUVendorARM: func() { + p.addToDesiredKernelArgs(consts.KernelArgIommuPassthrough) + }, + } + if !driverState.DriverLoaded && driverState.NeedDriverFunc(state, driverState) { - p.addToDesiredKernelArgs(consts.KernelArgIntelIommu) - p.addToDesiredKernelArgs(consts.KernelArgIommuPt) + cpuVendor, err := p.helpers.GetCPUVendor() + if err != nil { + log.Log.Error(err, "can't get CPU vendor, falling back to Intel") + cpuVendor = hostTypes.CPUVendorIntel + } + + addKernelArgFn := kernelArgFnByCPUVendor[cpuVendor] + if addKernelArgFn != nil { + addKernelArgFn() + } } } diff --git a/pkg/plugins/generic/generic_plugin_test.go b/pkg/plugins/generic/generic_plugin_test.go index 0d6701a64..0a6674712 100644 --- a/pkg/plugins/generic/generic_plugin_test.go +++ b/pkg/plugins/generic/generic_plugin_test.go @@ -10,6 +10,7 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" mock_helper "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper/mock" + hostTypes "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/types" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) @@ -850,8 +851,9 @@ var _ = Describe("Generic plugin", func() { Expect(changed).To(BeTrue()) }) - It("should detect changes on status due to missing kernel args", func() { - networkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ + Context("Kernel Args", func() { + + vfioNetworkNodeState := &sriovnetworkv1.SriovNetworkNodeState{ Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{ Interfaces: sriovnetworkv1.Interfaces{{ PciAddress: "0000:00:00.0", @@ -896,16 +898,41 @@ var _ = Describe("Generic plugin", func() { }, } - // Load required kernel args. - genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(networkNodeState) + It("should detect changes on status due to missing kernel args", func() { + hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorIntel, nil) - hostHelper.EXPECT().GetCurrentKernelArgs().Return("", nil) - hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false) - hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false) + // Load required kernel args. + genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) - changed, err := genericPlugin.CheckStatusChanges(networkNodeState) - Expect(err).ToNot(HaveOccurred()) - Expect(changed).To(BeTrue()) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ + consts.KernelArgIntelIommu: false, + consts.KernelArgIommuPt: false, + })) + + hostHelper.EXPECT().GetCurrentKernelArgs().Return("", nil) + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false) + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false) + + changed, err := genericPlugin.CheckStatusChanges(vfioNetworkNodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeTrue()) + }) + + It("should set the correct kernel args on AMD CPUs", func() { + hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorAMD, nil) + genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ + consts.KernelArgIommuPt: false, + })) + }) + + It("should set the correct kernel args on ARM CPUs", func() { + hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorARM, nil) + genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ + consts.KernelArgIommuPassthrough: false, + })) + }) }) It("should load vfio_pci driver", func() { From 5522c96101c673ad15efbc5a7acd1596283bc19c Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Fri, 25 Oct 2024 09:38:44 +0200 Subject: [PATCH 29/38] Update `github.com/jaypipes/ghw` To include - https://github.com/jaypipes/ghw/pull/387 Signed-off-by: Andrea Panattoni --- go.mod | 21 +-- go.sum | 50 +++--- pkg/host/internal/lib/ghw/ghw.go | 12 +- pkg/host/internal/lib/ghw/mock/mock_ghw.go | 44 +----- pkg/host/internal/sriov/sriov.go | 2 +- pkg/host/internal/sriov/sriov_test.go | 176 ++++++++++----------- pkg/platforms/openstack/openstack.go | 4 +- 7 files changed, 130 insertions(+), 179 deletions(-) diff --git a/go.mod b/go.mod index 0353c7ec1..350dbb82d 100644 --- a/go.mod +++ b/go.mod @@ -15,8 +15,8 @@ require ( github.com/google/renameio/v2 v2.0.0 github.com/google/uuid v1.3.1 github.com/hashicorp/go-retryablehttp v0.7.7 - github.com/jaypipes/ghw v0.9.0 - github.com/jaypipes/pcidb v1.0.0 + github.com/jaypipes/ghw v0.13.1-0.20241024164530-c1bfc6e6cd6a + github.com/jaypipes/pcidb v1.0.1 github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0 github.com/k8snetworkplumbingwg/sriov-network-device-plugin v0.0.0-20221127172732-a5a7395122e3 github.com/k8snetworkplumbingwg/sriovnet v1.2.0 @@ -33,7 +33,7 @@ require ( github.com/prometheus/client_model v0.5.0 github.com/prometheus/common v0.45.0 github.com/safchain/ethtool v0.3.0 - github.com/spf13/cobra v1.7.0 + github.com/spf13/cobra v1.8.0 github.com/stretchr/testify v1.8.4 github.com/vishvananda/netlink v1.2.1-beta.2.0.20240221172127-ec7bcb248e94 github.com/vishvananda/netns v0.0.4 @@ -131,6 +131,7 @@ require ( github.com/robfig/cron v1.2.0 // indirect github.com/rogpeppe/go-internal v1.10.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/samber/lo v1.47.0 // indirect github.com/shopspring/decimal v1.2.0 // indirect github.com/spf13/afero v1.9.4 // indirect github.com/spf13/cast v1.5.0 // indirect @@ -141,16 +142,16 @@ require ( go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect go.uber.org/multierr v1.11.0 // indirect go4.org v0.0.0-20200104003542-c7e774b10ea0 // indirect - golang.org/x/crypto v0.21.0 // indirect + golang.org/x/crypto v0.23.0 // indirect golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect - golang.org/x/mod v0.13.0 // indirect - golang.org/x/net v0.23.0 // indirect + golang.org/x/mod v0.17.0 // indirect + golang.org/x/net v0.25.0 // indirect golang.org/x/oauth2 v0.13.0 // indirect - golang.org/x/sync v0.4.0 // indirect + golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.20.0 // indirect - golang.org/x/term v0.18.0 // indirect - golang.org/x/text v0.14.0 // indirect - golang.org/x/tools v0.14.0 // indirect + golang.org/x/term v0.20.0 // indirect + golang.org/x/text v0.16.0 // indirect + golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/appengine v1.6.8 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19 // indirect diff --git a/go.sum b/go.sum index 6f90a1a94..4d8d4c171 100644 --- a/go.sum +++ b/go.sum @@ -119,7 +119,7 @@ github.com/coreos/vcontext v0.0.0-20190529201340-22b159166068/go.mod h1:E+6hug9b github.com/coreos/vcontext v0.0.0-20191017033345-260217907eb5/go.mod h1:E+6hug9bFSe0KZ2ZAzr8M9F5JlArJjv5D1JS7KSkPKE= github.com/coreos/vcontext v0.0.0-20230201181013-d72178a18687 h1:uSmlDgJGbUB0bwQBcZomBTottKwEDF5fF8UjSwKSzWM= github.com/coreos/vcontext v0.0.0-20230201181013-d72178a18687/go.mod h1:Salmysdw7DAVuobBW/LwsKKgpyCPHUhjyJoMJD+ZJiI= -github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= @@ -147,7 +147,6 @@ github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0X github.com/frankban/quicktest v1.14.4/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32 h1:Mn26/9ZMNWSw9C9ERFA1PUxfmGpolnw2v0bKOREu5ew= github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32/go.mod h1:GIjDIg/heH5DOkXY3YJ/wNhfHsQHoXGjl8G8amsYQ1I= github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= @@ -285,13 +284,12 @@ github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1: github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/jaypipes/ghw v0.9.0 h1:TWF4wNIGtZcgDJaiNcFgby5BR8s2ixcUe0ydxNO2McY= -github.com/jaypipes/ghw v0.9.0/go.mod h1:dXMo19735vXOjpIBDyDYSp31sB2u4hrtRCMxInqQ64k= -github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8= -github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLRCuNDfk= +github.com/jaypipes/ghw v0.13.1-0.20241024164530-c1bfc6e6cd6a h1:orxBMCkYww7RFCk3iCDP9DC3l+yKtp4VdWtctCTyjPQ= +github.com/jaypipes/ghw v0.13.1-0.20241024164530-c1bfc6e6cd6a/go.mod h1:F4UM7Ix55ONYwD3Lck2S4BI+hKezOwtizuJxXDFsioo= +github.com/jaypipes/pcidb v1.0.1 h1:WB2zh27T3nwg8AE8ei81sNRb9yWBii3JGNJtT7K9Oic= +github.com/jaypipes/pcidb v1.0.1/go.mod h1:6xYUz/yYEyOkIkUt2t2J2folIuZ4Yg6uByCGFXMCeE4= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= @@ -336,7 +334,6 @@ github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQth github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= -github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0= @@ -411,6 +408,8 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/safchain/ethtool v0.3.0 h1:gimQJpsI6sc1yIqP/y8GYgiXn/NjgvpM0RNoWLVVmP0= github.com/safchain/ethtool v0.3.0/go.mod h1:SA9BwrgyAqNo7M+uaL6IYbxpm5wk3L7Mm6ocLW+CJUs= +github.com/samber/lo v1.47.0 h1:z7RynLwP5nbyRscyvcD043DWYoOcYRv3mV8lBeqOCLc= +github.com/samber/lo v1.47.0/go.mod h1:RmDH9Ct32Qy3gduHQuKJ3gW1fMHAnE/fAzQuf6He5cU= github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= @@ -423,10 +422,8 @@ github.com/spf13/afero v1.9.4/go.mod h1:iUV7ddyEEZPO5gA3zD4fJt6iStLlL+Lg4m2cihcD github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w= github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU= -github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= -github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= -github.com/spf13/pflag v1.0.2/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/pflag v1.0.6-0.20210604193023-d5e0c0615ace h1:9PNP1jnUjRhfmGMlkXHjYPishpcw4jpSt/V/xYY3FMA= github.com/spf13/pflag v1.0.6-0.20210604193023-d5e0c0615ace/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= @@ -494,8 +491,8 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= -golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -533,8 +530,8 @@ golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY= -golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -570,8 +567,8 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -595,8 +592,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ= -golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -638,7 +635,6 @@ golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -651,8 +647,8 @@ golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9sn golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= -golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -662,8 +658,8 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -721,8 +717,8 @@ golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc= -golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/pkg/host/internal/lib/ghw/ghw.go b/pkg/host/internal/lib/ghw/ghw.go index d518977e4..2a4ba609d 100644 --- a/pkg/host/internal/lib/ghw/ghw.go +++ b/pkg/host/internal/lib/ghw/ghw.go @@ -3,6 +3,7 @@ package ghw import ( "github.com/jaypipes/ghw" "github.com/jaypipes/ghw/pkg/cpu" + "github.com/jaypipes/ghw/pkg/pci" ) func New() GHWLib { @@ -12,23 +13,16 @@ func New() GHWLib { //go:generate ../../../../../bin/mockgen -destination mock/mock_ghw.go -source ghw.go type GHWLib interface { // PCI returns a pointer to an Info that provide methods to access info about devices - PCI() (Info, error) + PCI() (*pci.Info, error) // CPU returns a pointer to an Info that provide methods to access info about devices CPU() (*cpu.Info, error) } -// Info interface provide methods to access info about devices -type Info interface { - // ListDevices returns a list of pointers to Device structs present on the - // host system - ListDevices() []*ghw.PCIDevice -} - type libWrapper struct{} // PCI returns a pointer to an Info that provide methods to access info about devices -func (w *libWrapper) PCI() (Info, error) { +func (w *libWrapper) PCI() (*pci.Info, error) { return ghw.PCI() } diff --git a/pkg/host/internal/lib/ghw/mock/mock_ghw.go b/pkg/host/internal/lib/ghw/mock/mock_ghw.go index 9d6092362..ded8784bf 100644 --- a/pkg/host/internal/lib/ghw/mock/mock_ghw.go +++ b/pkg/host/internal/lib/ghw/mock/mock_ghw.go @@ -8,9 +8,8 @@ import ( reflect "reflect" gomock "github.com/golang/mock/gomock" - ghw "github.com/jaypipes/ghw" cpu "github.com/jaypipes/ghw/pkg/cpu" - ghw0 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/lib/ghw" + pci "github.com/jaypipes/ghw/pkg/pci" ) // MockGHWLib is a mock of GHWLib interface. @@ -52,10 +51,10 @@ func (mr *MockGHWLibMockRecorder) CPU() *gomock.Call { } // PCI mocks base method. -func (m *MockGHWLib) PCI() (ghw0.Info, error) { +func (m *MockGHWLib) PCI() (*pci.Info, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "PCI") - ret0, _ := ret[0].(ghw0.Info) + ret0, _ := ret[0].(*pci.Info) ret1, _ := ret[1].(error) return ret0, ret1 } @@ -65,40 +64,3 @@ func (mr *MockGHWLibMockRecorder) PCI() *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PCI", reflect.TypeOf((*MockGHWLib)(nil).PCI)) } - -// MockInfo is a mock of Info interface. -type MockInfo struct { - ctrl *gomock.Controller - recorder *MockInfoMockRecorder -} - -// MockInfoMockRecorder is the mock recorder for MockInfo. -type MockInfoMockRecorder struct { - mock *MockInfo -} - -// NewMockInfo creates a new mock instance. -func NewMockInfo(ctrl *gomock.Controller) *MockInfo { - mock := &MockInfo{ctrl: ctrl} - mock.recorder = &MockInfoMockRecorder{mock} - return mock -} - -// EXPECT returns an object that allows the caller to indicate expected use. -func (m *MockInfo) EXPECT() *MockInfoMockRecorder { - return m.recorder -} - -// ListDevices mocks base method. -func (m *MockInfo) ListDevices() []*ghw.PCIDevice { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ListDevices") - ret0, _ := ret[0].([]*ghw.PCIDevice) - return ret0 -} - -// ListDevices indicates an expected call of ListDevices. -func (mr *MockInfoMockRecorder) ListDevices() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListDevices", reflect.TypeOf((*MockInfo)(nil).ListDevices)) -} diff --git a/pkg/host/internal/sriov/sriov.go b/pkg/host/internal/sriov/sriov.go index bf9919a7e..3e5989bae 100644 --- a/pkg/host/internal/sriov/sriov.go +++ b/pkg/host/internal/sriov/sriov.go @@ -217,7 +217,7 @@ func (s *sriov) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]sri return nil, fmt.Errorf("DiscoverSriovDevices(): error getting PCI info: %v", err) } - devices := pci.ListDevices() + devices := pci.Devices if len(devices) == 0 { return nil, fmt.Errorf("DiscoverSriovDevices(): could not retrieve PCI devices") } diff --git a/pkg/host/internal/sriov/sriov_test.go b/pkg/host/internal/sriov/sriov_test.go index f30e93773..319bacf54 100644 --- a/pkg/host/internal/sriov/sriov_test.go +++ b/pkg/host/internal/sriov/sriov_test.go @@ -7,7 +7,7 @@ import ( "syscall" "github.com/golang/mock/gomock" - "github.com/jaypipes/ghw" + "github.com/jaypipes/ghw/pkg/pci" "github.com/jaypipes/pcidb" "github.com/vishvananda/netlink" @@ -57,12 +57,7 @@ var _ = Describe("SRIOV", func() { }) Context("DiscoverSriovDevices", func() { - var ( - ghwInfoMock *ghwMockPkg.MockInfo - ) BeforeEach(func() { - ghwInfoMock = ghwMockPkg.NewMockInfo(testCtrl) - ghwLibMock.EXPECT().PCI().Return(ghwInfoMock, nil) origNicMap := sriovnetworkv1.NicIDMap sriovnetworkv1.InitNicIDMapFromList([]string{ "15b3 101d 101e", @@ -73,7 +68,7 @@ var _ = Describe("SRIOV", func() { }) It("discovered", func() { - ghwInfoMock.EXPECT().ListDevices().Return(getTestPCIDevices()) + ghwLibMock.EXPECT().PCI().Return(getTestPCIDevices(), nil) dputilsLibMock.EXPECT().IsSriovVF("0000:d8:00.0").Return(false) dputilsLibMock.EXPECT().IsSriovVF("0000:d8:00.2").Return(true) dputilsLibMock.EXPECT().IsSriovVF("0000:3b:00.0").Return(false) @@ -628,91 +623,94 @@ var _ = Describe("SRIOV", func() { }) }) -func getTestPCIDevices() []*ghw.PCIDevice { - return []*ghw.PCIDevice{{ - Driver: "mlx5_core", - Address: "0000:d8:00.0", - Vendor: &pcidb.Vendor{ - ID: "15b3", - Name: "Mellanox Technologies", - }, - Product: &pcidb.Product{ - ID: "101d", - Name: "MT2892 Family [ConnectX-6 Dx]", - }, - Revision: "0x00", - Subsystem: &pcidb.Product{ - ID: "0083", - Name: "unknown", - }, - Class: &pcidb.Class{ - ID: "02", - Name: "Network controller", - }, - Subclass: &pcidb.Subclass{ - ID: "00", - Name: "Ethernet controller", - }, - ProgrammingInterface: &pcidb.ProgrammingInterface{ - ID: "00", - Name: "unknonw", - }, - }, - { - Driver: "mlx5_core", - Address: "0000:d8:00.2", - Vendor: &pcidb.Vendor{ - ID: "15b3", - Name: "Mellanox Technologies", - }, - Product: &pcidb.Product{ - ID: "101e", - Name: "ConnectX Family mlx5Gen Virtual Function", - }, - Revision: "0x00", - Subsystem: &pcidb.Product{ - ID: "0083", - Name: "unknown", +func getTestPCIDevices() *pci.Info { + return &pci.Info{ + Devices: []*pci.Device{ + { + Driver: "mlx5_core", + Address: "0000:d8:00.0", + Vendor: &pcidb.Vendor{ + ID: "15b3", + Name: "Mellanox Technologies", + }, + Product: &pcidb.Product{ + ID: "101d", + Name: "MT2892 Family [ConnectX-6 Dx]", + }, + Revision: "0x00", + Subsystem: &pcidb.Product{ + ID: "0083", + Name: "unknown", + }, + Class: &pcidb.Class{ + ID: "02", + Name: "Network controller", + }, + Subclass: &pcidb.Subclass{ + ID: "00", + Name: "Ethernet controller", + }, + ProgrammingInterface: &pcidb.ProgrammingInterface{ + ID: "00", + Name: "unknonw", + }, }, - Class: &pcidb.Class{ - ID: "02", - Name: "Network controller", + { + Driver: "mlx5_core", + Address: "0000:d8:00.2", + Vendor: &pcidb.Vendor{ + ID: "15b3", + Name: "Mellanox Technologies", + }, + Product: &pcidb.Product{ + ID: "101e", + Name: "ConnectX Family mlx5Gen Virtual Function", + }, + Revision: "0x00", + Subsystem: &pcidb.Product{ + ID: "0083", + Name: "unknown", + }, + Class: &pcidb.Class{ + ID: "02", + Name: "Network controller", + }, + Subclass: &pcidb.Subclass{ + ID: "00", + Name: "Ethernet controller", + }, + ProgrammingInterface: &pcidb.ProgrammingInterface{ + ID: "00", + Name: "unknonw", + }, }, - Subclass: &pcidb.Subclass{ - ID: "00", - Name: "Ethernet controller", - }, - ProgrammingInterface: &pcidb.ProgrammingInterface{ - ID: "00", - Name: "unknonw", - }, - }, - { - Driver: "mlx5_core", - Address: "0000:3b:00.0", - Vendor: &pcidb.Vendor{ - ID: "15b3", - Name: "Mellanox Technologies", - }, - Product: &pcidb.Product{ - ID: "aaaa", // not supported - Name: "not supported", - }, - Class: &pcidb.Class{ - ID: "02", - Name: "Network controller", - }, - }, - { - Driver: "test", - Address: "0000:d7:16.5", - Vendor: &pcidb.Vendor{ - ID: "8086", - Name: "Intel Corporation", + { + Driver: "mlx5_core", + Address: "0000:3b:00.0", + Vendor: &pcidb.Vendor{ + ID: "15b3", + Name: "Mellanox Technologies", + }, + Product: &pcidb.Product{ + ID: "aaaa", // not supported + Name: "not supported", + }, + Class: &pcidb.Class{ + ID: "02", + Name: "Network controller", + }, }, - Class: &pcidb.Class{ - ID: "11", // not network device - Name: "Signal processing controller", + { + Driver: "test", + Address: "0000:d7:16.5", + Vendor: &pcidb.Vendor{ + ID: "8086", + Name: "Intel Corporation", + }, + Class: &pcidb.Class{ + ID: "11", // not network device + Name: "Signal processing controller", + }, }, }, } diff --git a/pkg/platforms/openstack/openstack.go b/pkg/platforms/openstack/openstack.go index 8968c96be..608ba6f87 100644 --- a/pkg/platforms/openstack/openstack.go +++ b/pkg/platforms/openstack/openstack.go @@ -362,7 +362,7 @@ func (o *openstackContext) CreateOpenstackDevicesInfo() error { return fmt.Errorf("CreateOpenstackDevicesInfo(): error getting PCI info: %v", err) } - devices := pci.ListDevices() + devices := pci.Devices if len(devices) == 0 { return fmt.Errorf("CreateOpenstackDevicesInfo(): could not retrieve PCI devices") } @@ -421,7 +421,7 @@ func (o *openstackContext) DiscoverSriovDevicesVirtual() ([]sriovnetworkv1.Inter return nil, fmt.Errorf("DiscoverSriovDevicesVirtual(): error getting PCI info: %v", err) } - devices := pci.ListDevices() + devices := pci.Devices if len(devices) == 0 { return nil, fmt.Errorf("DiscoverSriovDevicesVirtual(): could not retrieve PCI devices") } From 73c1f81fa81c790246d111b1da99de4c7b17106d Mon Sep 17 00:00:00 2001 From: Ivan Kolodiazhnyi Date: Sat, 23 Mar 2024 18:34:28 +0200 Subject: [PATCH 30/38] RDMA subsystem is implemented via ib_core module config. --- api/v1/sriovnetworknodestate_types.go | 8 ++ api/v1/sriovnetworkpoolconfig_types.go | 4 + api/v1/zz_generated.deepcopy.go | 17 +++ ...k.openshift.io_sriovnetworknodestates.yaml | 18 +++ ....openshift.io_sriovnetworkpoolconfigs.yaml | 6 + controllers/drain_controller.go | 99 +--------------- controllers/helper.go | 109 +++++++++++++++++- .../sriovnetworknodepolicy_controller.go | 7 ++ ...k.openshift.io_sriovnetworknodestates.yaml | 18 +++ ....openshift.io_sriovnetworkpoolconfigs.yaml | 6 + pkg/consts/constants.go | 3 + pkg/daemon/daemon.go | 10 ++ pkg/daemon/writer.go | 7 ++ pkg/helper/mock/mock_helper.go | 29 +++++ .../internal/lib/netlink/mock/mock_netlink.go | 15 +++ pkg/host/internal/lib/netlink/netlink.go | 7 ++ pkg/host/internal/network/network.go | 31 +++++ pkg/host/internal/network/network_test.go | 31 +++++ pkg/host/mock/mock_host.go | 29 +++++ pkg/host/types/interfaces.go | 4 + pkg/utils/cluster.go | 3 +- 21 files changed, 357 insertions(+), 104 deletions(-) diff --git a/api/v1/sriovnetworknodestate_types.go b/api/v1/sriovnetworknodestate_types.go index 4b90d61d2..e5f59d71c 100644 --- a/api/v1/sriovnetworknodestate_types.go +++ b/api/v1/sriovnetworknodestate_types.go @@ -27,6 +27,7 @@ import ( type SriovNetworkNodeStateSpec struct { Interfaces Interfaces `json:"interfaces,omitempty"` Bridges Bridges `json:"bridges,omitempty"` + System System `json:"system,omitempty"` } type Interfaces []Interface @@ -114,10 +115,17 @@ type OVSUplinkConfigExt struct { Interface OVSInterfaceConfig `json:"interface,omitempty"` } +type System struct { + // +kubebuilder:validation:Enum=shared;exclusive + //RDMA subsystem. Allowed value "shared", "exclusive". + RdmaMode string `json:"rdmaMode,omitempty"` +} + // SriovNetworkNodeStateStatus defines the observed state of SriovNetworkNodeState type SriovNetworkNodeStateStatus struct { Interfaces InterfaceExts `json:"interfaces,omitempty"` Bridges Bridges `json:"bridges,omitempty"` + System System `json:"system,omitempty"` SyncStatus string `json:"syncStatus,omitempty"` LastSyncError string `json:"lastSyncError,omitempty"` } diff --git a/api/v1/sriovnetworkpoolconfig_types.go b/api/v1/sriovnetworkpoolconfig_types.go index c6e710a99..011ffc7d9 100644 --- a/api/v1/sriovnetworkpoolconfig_types.go +++ b/api/v1/sriovnetworkpoolconfig_types.go @@ -21,6 +21,10 @@ type SriovNetworkPoolConfigSpec struct { // Drain will respect Pod Disruption Budgets (PDBs) such as etcd quorum guards, // even if maxUnavailable is greater than one. MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"` + + // +kubebuilder:validation:Enum=shared;exclusive + // RDMA subsystem. Allowed value "shared", "exclusive". + RdmaMode string `json:"rdmaMode,omitempty"` } type OvsHardwareOffloadConfig struct { diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index fc9477593..0209c0573 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -783,6 +783,7 @@ func (in *SriovNetworkNodeStateSpec) DeepCopyInto(out *SriovNetworkNodeStateSpec } } in.Bridges.DeepCopyInto(&out.Bridges) + out.System = in.System } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SriovNetworkNodeStateSpec. @@ -806,6 +807,7 @@ func (in *SriovNetworkNodeStateStatus) DeepCopyInto(out *SriovNetworkNodeStateSt } } in.Bridges.DeepCopyInto(&out.Bridges) + out.System = in.System } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SriovNetworkNodeStateStatus. @@ -1066,6 +1068,21 @@ func (in *SriovOperatorConfigStatus) DeepCopy() *SriovOperatorConfigStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *System) DeepCopyInto(out *System) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new System. +func (in *System) DeepCopy() *System { + if in == nil { + return nil + } + out := new(System) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TrunkConfig) DeepCopyInto(out *TrunkConfig) { *out = *in diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml index c5bf230c3..31ddf3bf1 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml @@ -174,6 +174,15 @@ spec: - pciAddress type: object type: array + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object status: description: SriovNetworkNodeStateStatus defines the observed state of @@ -335,6 +344,15 @@ spec: type: string syncStatus: type: string + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object type: object served: true diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml index 2cb2ece31..3d8a6a105 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml @@ -111,6 +111,12 @@ spec: Name is the name of MachineConfigPool to be enabled with OVS hardware offload type: string type: object + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string type: object status: description: SriovNetworkPoolConfigStatus defines the observed state of diff --git a/controllers/drain_controller.go b/controllers/drain_controller.go index 86da909d8..b96458fa7 100644 --- a/controllers/drain_controller.go +++ b/controllers/drain_controller.go @@ -24,11 +24,8 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" ctrl "sigs.k8s.io/controller-runtime" @@ -48,13 +45,6 @@ import ( "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) -var ( - oneNode = intstr.FromInt32(1) - defaultNpcl = &sriovnetworkv1.SriovNetworkPoolConfig{Spec: sriovnetworkv1.SriovNetworkPoolConfigSpec{ - MaxUnavailable: &oneNode, - NodeSelector: &metav1.LabelSelector{}}} -) - type DrainReconcile struct { client.Client Scheme *runtime.Scheme @@ -346,94 +336,7 @@ func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) ( } func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.Node) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) { - logger := log.FromContext(ctx) - logger.Info("findNodePoolConfig():") - // get all the sriov network pool configs - npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{} - err := dr.List(ctx, npcl) - if err != nil { - logger.Error(err, "failed to list sriovNetworkPoolConfig") - return nil, nil, err - } - - selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{} - nodesInPools := map[string]interface{}{} - - for _, npc := range npcl.Items { - // we skip hw offload objects - if npc.Spec.OvsHardwareOffloadConfig.Name != "" { - continue - } - - if npc.Spec.NodeSelector == nil { - npc.Spec.NodeSelector = &metav1.LabelSelector{} - } - - selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector) - if err != nil { - logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector) - return nil, nil, err - } - - if selector.Matches(labels.Set(node.Labels)) { - selectedNpcl = append(selectedNpcl, npc.DeepCopy()) - } - - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector}) - if err != nil { - logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector", - "machineConfigPoolName", npc, - "nodeSelector", npc.Spec.NodeSelector) - return nil, nil, err - } - - for _, nodeName := range nodeList.Items { - nodesInPools[nodeName.Name] = nil - } - } - - if len(selectedNpcl) > 1 { - // don't allow the node to be part of multiple pools - err = fmt.Errorf("node is part of more then one pool") - logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl) - return nil, nil, err - } else if len(selectedNpcl) == 1 { - // found one pool for our node - logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0]) - selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector) - if err != nil { - logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector) - return nil, nil, err - } - - // list all the nodes that are also part of this pool and return them - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector}) - if err != nil { - logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector) - return nil, nil, err - } - - return selectedNpcl[0], nodeList.Items, nil - } else { - // in this case we get all the nodes and remove the ones that already part of any pool - logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultNpcl) - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList) - if err != nil { - logger.Error(err, "failed to list all the nodes") - return nil, nil, err - } - - defaultNodeLists := []corev1.Node{} - for _, nodeObj := range nodeList.Items { - if _, exist := nodesInPools[nodeObj.Name]; !exist { - defaultNodeLists = append(defaultNodeLists, nodeObj) - } - } - return defaultNpcl, defaultNodeLists, nil - } + return findNodePoolConfig(ctx, node, dr.Client) } // SetupWithManager sets up the controller with the Manager. diff --git a/controllers/helper.go b/controllers/helper.go index 9ff735473..b90ad44f8 100644 --- a/controllers/helper.go +++ b/controllers/helper.go @@ -30,9 +30,12 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" uns "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" kscheme "k8s.io/client-go/kubernetes/scheme" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -47,10 +50,17 @@ import ( "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) -var webhooks = map[string](string){ - constants.InjectorWebHookName: constants.InjectorWebHookPath, - constants.OperatorWebHookName: constants.OperatorWebHookPath, -} +var ( + webhooks = map[string](string){ + constants.InjectorWebHookName: constants.InjectorWebHookPath, + constants.OperatorWebHookName: constants.OperatorWebHookPath, + } + oneNode = intstr.FromInt32(1) + defaultPoolConfig = &sriovnetworkv1.SriovNetworkPoolConfig{Spec: sriovnetworkv1.SriovNetworkPoolConfigSpec{ + MaxUnavailable: &oneNode, + NodeSelector: &metav1.LabelSelector{}, + RdmaMode: ""}} +) const ( clusterRoleResourceName = "ClusterRole" @@ -397,3 +407,94 @@ func updateDaemonsetNodeSelector(obj *uns.Unstructured, nodeSelector map[string] } return nil } + +func findNodePoolConfig(ctx context.Context, node *corev1.Node, c k8sclient.Client) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) { + logger := log.FromContext(ctx) + logger.Info("FindNodePoolConfig():") + // get all the sriov network pool configs + npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{} + err := c.List(ctx, npcl) + if err != nil { + logger.Error(err, "failed to list sriovNetworkPoolConfig") + return nil, nil, err + } + + selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{} + nodesInPools := map[string]interface{}{} + + for _, npc := range npcl.Items { + // we skip hw offload objects + if npc.Spec.OvsHardwareOffloadConfig.Name != "" { + continue + } + + if npc.Spec.NodeSelector == nil { + npc.Spec.NodeSelector = &metav1.LabelSelector{} + } + + selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector) + if err != nil { + logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector) + return nil, nil, err + } + + if selector.Matches(labels.Set(node.Labels)) { + selectedNpcl = append(selectedNpcl, npc.DeepCopy()) + } + + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList, &k8sclient.ListOptions{LabelSelector: selector}) + if err != nil { + logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector", + "machineConfigPoolName", npc, + "nodeSelector", npc.Spec.NodeSelector) + return nil, nil, err + } + + for _, nodeName := range nodeList.Items { + nodesInPools[nodeName.Name] = nil + } + } + + if len(selectedNpcl) > 1 { + // don't allow the node to be part of multiple pools + err = fmt.Errorf("node is part of more then one pool") + logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl) + return nil, nil, err + } else if len(selectedNpcl) == 1 { + // found one pool for our node + logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0]) + selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector) + if err != nil { + logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector) + return nil, nil, err + } + + // list all the nodes that are also part of this pool and return them + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList, &k8sclient.ListOptions{LabelSelector: selector}) + if err != nil { + logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector) + return nil, nil, err + } + + return selectedNpcl[0], nodeList.Items, nil + } else { + // in this case we get all the nodes and remove the ones that already part of any pool + logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultPoolConfig) + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList) + if err != nil { + logger.Error(err, "failed to list all the nodes") + return nil, nil, err + } + + defaultNodeLists := []corev1.Node{} + for _, nodeObj := range nodeList.Items { + if _, exist := nodesInPools[nodeObj.Name]; !exist { + defaultNodeLists = append(defaultNodeLists, nodeObj) + } + } + return defaultPoolConfig, defaultNodeLists, nil + } +} diff --git a/controllers/sriovnetworknodepolicy_controller.go b/controllers/sriovnetworknodepolicy_controller.go index be46880b7..1d2811fac 100644 --- a/controllers/sriovnetworknodepolicy_controller.go +++ b/controllers/sriovnetworknodepolicy_controller.go @@ -272,6 +272,13 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con ns.Name = node.Name ns.Namespace = vars.Namespace j, _ := json.Marshal(ns) + netPoolConfig, _, err := findNodePoolConfig(ctx, &node, r.Client) + if err != nil { + log.Log.Error(err, "nodeStateSyncHandler(): failed to get SriovNetworkPoolConfig for the current node") + } + if netPoolConfig != nil { + ns.Spec.System.RdmaMode = netPoolConfig.Spec.RdmaMode + } logger.V(2).Info("SriovNetworkNodeState CR", "content", j) if err := r.syncSriovNetworkNodeState(ctx, dc, npl, ns, &node); err != nil { logger.Error(err, "Fail to sync", "SriovNetworkNodeState", ns.Name) diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml index c5bf230c3..31ddf3bf1 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml @@ -174,6 +174,15 @@ spec: - pciAddress type: object type: array + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object status: description: SriovNetworkNodeStateStatus defines the observed state of @@ -335,6 +344,15 @@ spec: type: string syncStatus: type: string + system: + properties: + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string + type: object type: object type: object served: true diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml index 2cb2ece31..3d8a6a105 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml @@ -111,6 +111,12 @@ spec: Name is the name of MachineConfigPool to be enabled with OVS hardware offload type: string type: object + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string type: object status: description: SriovNetworkPoolConfigStatus defines the observed state of diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index f7025c90d..66a5ad2b5 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -54,6 +54,9 @@ const ( VdpaTypeVirtio = "virtio" VdpaTypeVhost = "vhost" + RdmaSubsystemModeShared = "shared" + RdmaSubsystemModeExclusive = "exclusive" + ClusterTypeOpenshift = "openshift" ClusterTypeKubernetes = "kubernetes" diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index ff7f326dc..0867685dc 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -429,6 +429,16 @@ func (dn *Daemon) nodeStateSyncHandler() error { reqReboot = reqReboot || r } + if dn.currentNodeState.Status.System.RdmaMode != dn.desiredNodeState.Spec.System.RdmaMode { + err = dn.HostHelpers.SetRDMASubsystem(dn.desiredNodeState.Spec.System.RdmaMode) + if err != nil { + log.Log.Error(err, "nodeStateSyncHandler(): failed to set RDMA subsystem") + return err + } + reqReboot = true + reqDrain = true + } + // When running using systemd check if the applied configuration is the latest one // or there is a new config we need to apply // When using systemd configuration we write the file diff --git a/pkg/daemon/writer.go b/pkg/daemon/writer.go index 09d06d8f9..60d4e8d91 100644 --- a/pkg/daemon/writer.go +++ b/pkg/daemon/writer.go @@ -118,6 +118,7 @@ func (w *NodeStateStatusWriter) pollNicStatus() error { log.Log.V(2).Info("pollNicStatus()") var iface []sriovnetworkv1.InterfaceExt var bridges sriovnetworkv1.Bridges + var rdmaMode string var err error if vars.PlatformType == consts.VirtualOpenStack { @@ -138,8 +139,14 @@ func (w *NodeStateStatusWriter) pollNicStatus() error { } } + rdmaMode, err = w.hostHelper.DiscoverRDMASubsystem() + if err != nil { + return err + } + w.status.Interfaces = iface w.status.Bridges = bridges + w.status.System.RdmaMode = rdmaMode return nil } diff --git a/pkg/helper/mock/mock_helper.go b/pkg/helper/mock/mock_helper.go index 432d741be..b413ecdee 100644 --- a/pkg/helper/mock/mock_helper.go +++ b/pkg/helper/mock/mock_helper.go @@ -294,6 +294,21 @@ func (mr *MockHostHelpersInterfaceMockRecorder) DiscoverBridges() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverBridges", reflect.TypeOf((*MockHostHelpersInterface)(nil).DiscoverBridges)) } +// DiscoverRDMASubsystem mocks base method. +func (m *MockHostHelpersInterface) DiscoverRDMASubsystem() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DiscoverRDMASubsystem") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DiscoverRDMASubsystem indicates an expected call of DiscoverRDMASubsystem. +func (mr *MockHostHelpersInterfaceMockRecorder) DiscoverRDMASubsystem() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverRDMASubsystem", reflect.TypeOf((*MockHostHelpersInterface)(nil).DiscoverRDMASubsystem)) +} + // DiscoverSriovDevices mocks base method. func (m *MockHostHelpersInterface) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]v1.InterfaceExt, error) { m.ctrl.T.Helper() @@ -1044,6 +1059,20 @@ func (mr *MockHostHelpersInterfaceMockRecorder) SetNicSriovMode(pciAddr, mode in return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetNicSriovMode", reflect.TypeOf((*MockHostHelpersInterface)(nil).SetNicSriovMode), pciAddr, mode) } +// SetRDMASubsystem mocks base method. +func (m *MockHostHelpersInterface) SetRDMASubsystem(mode string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SetRDMASubsystem", mode) + ret0, _ := ret[0].(error) + return ret0 +} + +// SetRDMASubsystem indicates an expected call of SetRDMASubsystem. +func (mr *MockHostHelpersInterfaceMockRecorder) SetRDMASubsystem(mode interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetRDMASubsystem", reflect.TypeOf((*MockHostHelpersInterface)(nil).SetRDMASubsystem), mode) +} + // SetSriovNumVfs mocks base method. func (m *MockHostHelpersInterface) SetSriovNumVfs(pciAddr string, numVfs int) error { m.ctrl.T.Helper() diff --git a/pkg/host/internal/lib/netlink/mock/mock_netlink.go b/pkg/host/internal/lib/netlink/mock/mock_netlink.go index 5b3bcc790..758346a3f 100644 --- a/pkg/host/internal/lib/netlink/mock/mock_netlink.go +++ b/pkg/host/internal/lib/netlink/mock/mock_netlink.go @@ -145,6 +145,21 @@ func (mr *MockNetlinkLibMockRecorder) DevlinkSetDeviceParam(bus, device, param, return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DevlinkSetDeviceParam", reflect.TypeOf((*MockNetlinkLib)(nil).DevlinkSetDeviceParam), bus, device, param, cmode, value) } +// DiscoverRDMASubsystem mocks base method. +func (m *MockNetlinkLib) DiscoverRDMASubsystem() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DiscoverRDMASubsystem") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DiscoverRDMASubsystem indicates an expected call of DiscoverRDMASubsystem. +func (mr *MockNetlinkLibMockRecorder) DiscoverRDMASubsystem() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverRDMASubsystem", reflect.TypeOf((*MockNetlinkLib)(nil).DiscoverRDMASubsystem)) +} + // IsLinkAdminStateUp mocks base method. func (m *MockNetlinkLib) IsLinkAdminStateUp(link netlink.Link) bool { m.ctrl.T.Helper() diff --git a/pkg/host/internal/lib/netlink/netlink.go b/pkg/host/internal/lib/netlink/netlink.go index ed063834e..7d857921d 100644 --- a/pkg/host/internal/lib/netlink/netlink.go +++ b/pkg/host/internal/lib/netlink/netlink.go @@ -68,6 +68,8 @@ type NetlinkLib interface { RdmaLinkByName(name string) (*netlink.RdmaLink, error) // IsLinkAdminStateUp checks if the admin state of a link is up IsLinkAdminStateUp(link Link) bool + // DiscoverRDMASubsystem returns RDMA subsystem mode + DiscoverRDMASubsystem() (string, error) } type libWrapper struct{} @@ -185,3 +187,8 @@ func (w *libWrapper) RdmaLinkByName(name string) (*netlink.RdmaLink, error) { func (w *libWrapper) IsLinkAdminStateUp(link Link) bool { return link.Attrs().Flags&net.FlagUp == 1 } + +// DiscoverRDMASubsystem returns RDMA subsystem mode +func (w *libWrapper) DiscoverRDMASubsystem() (string, error) { + return netlink.RdmaSystemGetNetnsMode() +} diff --git a/pkg/host/internal/network/network.go b/pkg/host/internal/network/network.go index ef85ad24a..940c4b248 100644 --- a/pkg/host/internal/network/network.go +++ b/pkg/host/internal/network/network.go @@ -429,3 +429,34 @@ func (n *network) GetPciAddressFromInterfaceName(interfaceName string) (string, log.Log.V(2).Info("GetPciAddressFromInterfaceName(): result", "interface", interfaceName, "pci address", pciAddress) return pciAddress, nil } + +func (n *network) DiscoverRDMASubsystem() (string, error) { + log.Log.Info("DiscoverRDMASubsystem(): retrieving RDMA subsystem mode") + subsystem, err := n.netlinkLib.DiscoverRDMASubsystem() + + if err != nil { + log.Log.Error(err, "DiscoverRDMASubsystem(): failed to get RDMA subsystem mode") + return "", err + } + + return subsystem, nil +} + +func (n *network) SetRDMASubsystem(mode string) error { + log.Log.Info("SetRDMASubsystem(): Updating RDMA subsystem mode") + + modeValue := 1 + if mode == "exclusive" { + modeValue = 0 + } + config := fmt.Sprintf("options ib_core netns_mode=%d\n", modeValue) + path := filepath.Join(vars.FilesystemRoot, consts.Host, "etc", "modprobe.d", "ib_core.conf") + err := os.WriteFile(path, []byte(config), 0644) + + if err != nil { + log.Log.Error(err, "SetRDMASubsystem(): failed to write ib_core config") + return fmt.Errorf("failed to write ib_core config: %v", err) + } + + return nil +} diff --git a/pkg/host/internal/network/network_test.go b/pkg/host/internal/network/network_test.go index 19eb3f438..51c56b875 100644 --- a/pkg/host/internal/network/network_test.go +++ b/pkg/host/internal/network/network_test.go @@ -283,4 +283,35 @@ var _ = Describe("Network", func() { Expect(pci).To(Equal("0000:3b:00.0")) }) }) + Context("DiscoverRDMASubsystem", func() { + It("Should get RDMA Subsystem using netlink", func() { + netlinkLibMock.EXPECT().DiscoverRDMASubsystem().Return("shared", nil) + + pci, err := n.DiscoverRDMASubsystem() + Expect(err).NotTo(HaveOccurred()) + Expect(pci).To(Equal("shared")) + }) + }) + Context("SetRDMASubsystem", func() { + It("Should set RDMA Subsystem shared mode", func() { + helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{ + Dirs: []string{"/host/etc/modprobe.d"}, + Files: map[string][]byte{ + "/host/etc/modprobe.d/ib_core.conf": {}, + }, + }) + Expect(n.SetRDMASubsystem("shared")).NotTo(HaveOccurred()) + helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/ib_core.conf", "options ib_core netns_mode=1\n") + }) + It("Should set RDMA Subsystem exclusive mode", func() { + helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{ + Dirs: []string{"/host/etc/modprobe.d"}, + Files: map[string][]byte{ + "/host/etc/modprobe.d/ib_core.conf": {}, + }, + }) + Expect(n.SetRDMASubsystem("exclusive")).NotTo(HaveOccurred()) + helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/ib_core.conf", "options ib_core netns_mode=0\n") + }) + }) }) diff --git a/pkg/host/mock/mock_host.go b/pkg/host/mock/mock_host.go index 5ebed46aa..095d270a9 100644 --- a/pkg/host/mock/mock_host.go +++ b/pkg/host/mock/mock_host.go @@ -264,6 +264,21 @@ func (mr *MockHostManagerInterfaceMockRecorder) DiscoverBridges() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverBridges", reflect.TypeOf((*MockHostManagerInterface)(nil).DiscoverBridges)) } +// DiscoverRDMASubsystem mocks base method. +func (m *MockHostManagerInterface) DiscoverRDMASubsystem() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DiscoverRDMASubsystem") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DiscoverRDMASubsystem indicates an expected call of DiscoverRDMASubsystem. +func (mr *MockHostManagerInterfaceMockRecorder) DiscoverRDMASubsystem() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverRDMASubsystem", reflect.TypeOf((*MockHostManagerInterface)(nil).DiscoverRDMASubsystem)) +} + // DiscoverSriovDevices mocks base method. func (m *MockHostManagerInterface) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]v1.InterfaceExt, error) { m.ctrl.T.Helper() @@ -859,6 +874,20 @@ func (mr *MockHostManagerInterfaceMockRecorder) SetNicSriovMode(pciAddr, mode in return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetNicSriovMode", reflect.TypeOf((*MockHostManagerInterface)(nil).SetNicSriovMode), pciAddr, mode) } +// SetRDMASubsystem mocks base method. +func (m *MockHostManagerInterface) SetRDMASubsystem(mode string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SetRDMASubsystem", mode) + ret0, _ := ret[0].(error) + return ret0 +} + +// SetRDMASubsystem indicates an expected call of SetRDMASubsystem. +func (mr *MockHostManagerInterfaceMockRecorder) SetRDMASubsystem(mode interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetRDMASubsystem", reflect.TypeOf((*MockHostManagerInterface)(nil).SetRDMASubsystem), mode) +} + // SetSriovNumVfs mocks base method. func (m *MockHostManagerInterface) SetSriovNumVfs(pciAddr string, numVfs int) error { m.ctrl.T.Helper() diff --git a/pkg/host/types/interfaces.go b/pkg/host/types/interfaces.go index c6e0c8faf..6844ee5ae 100644 --- a/pkg/host/types/interfaces.go +++ b/pkg/host/types/interfaces.go @@ -90,6 +90,10 @@ type NetworkInterface interface { GetNetDevLinkAdminState(ifaceName string) string // GetPciAddressFromInterfaceName parses sysfs to get pci address of an interface by name GetPciAddressFromInterfaceName(interfaceName string) (string, error) + // DiscoverRDMASubsystem returns RDMA subsystem mode + DiscoverRDMASubsystem() (string, error) + // SetRDMASubsystem changes RDMA subsystem mode + SetRDMASubsystem(mode string) error } type ServiceInterface interface { diff --git a/pkg/utils/cluster.go b/pkg/utils/cluster.go index 6f8d72e07..c5f1f333a 100644 --- a/pkg/utils/cluster.go +++ b/pkg/utils/cluster.go @@ -5,13 +5,12 @@ import ( "fmt" "os" - "sigs.k8s.io/controller-runtime/pkg/log" - configv1 "github.com/openshift/api/config/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" ) From 02c6b009c3c4b0bf0c1345ebc2a16bb490e68000 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Mon, 28 Oct 2024 15:28:41 +0200 Subject: [PATCH 31/38] Add kernel args for rdma mode to complement the modprobe file Signed-off-by: Sebastian Sch --- Makefile | 2 +- bindata/scripts/enable-kargs.sh | 33 -- bindata/scripts/kargs.sh | 55 +++ .../sriovnetworknodepolicy_controller.go | 13 +- go.mod | 2 +- pkg/consts/constants.go | 2 + pkg/daemon/daemon.go | 17 +- pkg/daemon/plugin_test.go | 8 + pkg/daemon/writer.go | 1 + .../internal/lib/netlink/mock/mock_netlink.go | 30 +- pkg/host/internal/lib/netlink/netlink.go | 8 +- pkg/host/internal/network/network.go | 24 +- pkg/host/internal/network/network_test.go | 10 +- pkg/plugins/generic/generic_plugin.go | 200 +++++----- pkg/plugins/generic/generic_plugin_test.go | 74 +++- test/conformance/tests/test_networkpool.go | 345 ++++++++++++++++++ .../{enable-kargs_test.sh => kargs_test.sh} | 29 +- test/scripts/rpm-ostree_mock | 6 + 18 files changed, 667 insertions(+), 192 deletions(-) delete mode 100755 bindata/scripts/enable-kargs.sh create mode 100755 bindata/scripts/kargs.sh create mode 100644 test/conformance/tests/test_networkpool.go rename test/scripts/{enable-kargs_test.sh => kargs_test.sh} (61%) diff --git a/Makefile b/Makefile index 310f1dc52..f5ca7edc8 100644 --- a/Makefile +++ b/Makefile @@ -226,7 +226,7 @@ test-e2e-k8s: export NAMESPACE=sriov-network-operator test-e2e-k8s: test-e2e test-bindata-scripts: fakechroot - fakechroot ./test/scripts/enable-kargs_test.sh + fakechroot ./test/scripts/kargs_test.sh test-%: generate manifests envtest KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir=/tmp -p path)" HOME="$(shell pwd)" go test ./$*/... -coverprofile cover-$*.out -coverpkg ./... -v diff --git a/bindata/scripts/enable-kargs.sh b/bindata/scripts/enable-kargs.sh deleted file mode 100755 index 0dc18c784..000000000 --- a/bindata/scripts/enable-kargs.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -set -x - -declare -a kargs=( "$@" ) -ret=0 -args=$(chroot /host/ cat /proc/cmdline) - -if chroot /host/ test -f /run/ostree-booted ; then - for t in "${kargs[@]}";do - if [[ $args != *${t}* ]];then - if chroot /host/ rpm-ostree kargs | grep -vq ${t}; then - chroot /host/ rpm-ostree kargs --append ${t} > /dev/null 2>&1 - fi - let ret++ - fi - done -else - chroot /host/ which grubby > /dev/null 2>&1 - # if grubby is not there, let's tell it - if [ $? -ne 0 ]; then - exit 127 - fi - for t in "${kargs[@]}";do - if [[ $args != *${t}* ]];then - if chroot /host/ grubby --info=DEFAULT | grep args | grep -vq ${t}; then - chroot /host/ grubby --update-kernel=DEFAULT --args=${t} > /dev/null 2>&1 - fi - let ret++ - fi - done -fi - -echo $ret diff --git a/bindata/scripts/kargs.sh b/bindata/scripts/kargs.sh new file mode 100755 index 000000000..8d118456e --- /dev/null +++ b/bindata/scripts/kargs.sh @@ -0,0 +1,55 @@ +#!/bin/bash +set -x + +command=$1 +shift +declare -a kargs=( "$@" ) +ret=0 +args=$(chroot /host/ cat /proc/cmdline) + +if chroot /host/ test -f /run/ostree-booted ; then + for t in "${kargs[@]}";do + if [[ $command == "add" ]];then + if [[ $args != *${t}* ]];then + if chroot /host/ rpm-ostree kargs | grep -vq ${t}; then + chroot /host/ rpm-ostree kargs --append ${t} > /dev/null 2>&1 + fi + let ret++ + fi + fi + if [[ $command == "remove" ]];then + if [[ $args == *${t}* ]];then + if chroot /host/ rpm-ostree kargs | grep -q ${t}; then + chroot /host/ rpm-ostree kargs --delete ${t} > /dev/null 2>&1 + fi + let ret++ + fi + fi + done +else + chroot /host/ which grubby > /dev/null 2>&1 + # if grubby is not there, let's tell it + if [ $? -ne 0 ]; then + exit 127 + fi + for t in "${kargs[@]}";do + if [[ $command == "add" ]];then + if [[ $args != *${t}* ]];then + if chroot /host/ grubby --info=DEFAULT | grep args | grep -vq ${t}; then + chroot /host/ grubby --update-kernel=DEFAULT --args=${t} > /dev/null 2>&1 + fi + let ret++ + fi + fi + if [[ $command == "remove" ]];then + if [[ $args == *${t}* ]];then + if chroot /host/ grubby --info=DEFAULT | grep args | grep -q ${t}; then + chroot /host/ grubby --update-kernel=DEFAULT --remove-args=${t} > /dev/null 2>&1 + fi + let ret++ + fi + fi + done +fi + +echo $ret diff --git a/controllers/sriovnetworknodepolicy_controller.go b/controllers/sriovnetworknodepolicy_controller.go index 1d2811fac..62218436f 100644 --- a/controllers/sriovnetworknodepolicy_controller.go +++ b/controllers/sriovnetworknodepolicy_controller.go @@ -155,22 +155,22 @@ func (r *SriovNetworkNodePolicyReconciler) SetupWithManager(mgr ctrl.Manager) er delayedEventHandler := handler.Funcs{ CreateFunc: func(ctx context.Context, e event.CreateEvent, q workqueue.RateLimitingInterface) { log.Log.WithName("SriovNetworkNodePolicy"). - Info("Enqueuing sync for create event", "resource", e.Object.GetName()) + Info("Enqueuing sync for create event", "resource", e.Object.GetName(), "type", e.Object.GetObjectKind().GroupVersionKind().String()) qHandler(q) }, UpdateFunc: func(ctx context.Context, e event.UpdateEvent, q workqueue.RateLimitingInterface) { log.Log.WithName("SriovNetworkNodePolicy"). - Info("Enqueuing sync for update event", "resource", e.ObjectNew.GetName()) + Info("Enqueuing sync for update event", "resource", e.ObjectNew.GetName(), "type", e.ObjectNew.GetObjectKind().GroupVersionKind().String()) qHandler(q) }, DeleteFunc: func(ctx context.Context, e event.DeleteEvent, q workqueue.RateLimitingInterface) { log.Log.WithName("SriovNetworkNodePolicy"). - Info("Enqueuing sync for delete event", "resource", e.Object.GetName()) + Info("Enqueuing sync for delete event", "resource", e.Object.GetName(), "type", e.Object.GetObjectKind().GroupVersionKind().String()) qHandler(q) }, GenericFunc: func(ctx context.Context, e event.GenericEvent, q workqueue.RateLimitingInterface) { log.Log.WithName("SriovNetworkNodePolicy"). - Info("Enqueuing sync for generic event", "resource", e.Object.GetName()) + Info("Enqueuing sync for generic event", "resource", e.Object.GetName(), "type", e.Object.GetObjectKind().GroupVersionKind().String()) qHandler(q) }, } @@ -199,6 +199,7 @@ func (r *SriovNetworkNodePolicyReconciler) SetupWithManager(mgr ctrl.Manager) er For(&sriovnetworkv1.SriovNetworkNodePolicy{}). Watches(&corev1.Node{}, nodeEvenHandler). Watches(&sriovnetworkv1.SriovNetworkNodePolicy{}, delayedEventHandler). + Watches(&sriovnetworkv1.SriovNetworkPoolConfig{}, delayedEventHandler). WatchesRawSource(&source.Channel{Source: eventChan}, delayedEventHandler). Complete(r) } @@ -271,14 +272,14 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con ns := &sriovnetworkv1.SriovNetworkNodeState{} ns.Name = node.Name ns.Namespace = vars.Namespace - j, _ := json.Marshal(ns) netPoolConfig, _, err := findNodePoolConfig(ctx, &node, r.Client) if err != nil { - log.Log.Error(err, "nodeStateSyncHandler(): failed to get SriovNetworkPoolConfig for the current node") + logger.Error(err, "failed to get SriovNetworkPoolConfig for the current node") } if netPoolConfig != nil { ns.Spec.System.RdmaMode = netPoolConfig.Spec.RdmaMode } + j, _ := json.Marshal(ns) logger.V(2).Info("SriovNetworkNodeState CR", "content", j) if err := r.syncSriovNetworkNodeState(ctx, dc, npl, ns, &node); err != nil { logger.Error(err, "Fail to sync", "SriovNetworkNodeState", ns.Name) diff --git a/go.mod b/go.mod index 350dbb82d..31d70d572 100644 --- a/go.mod +++ b/go.mod @@ -38,6 +38,7 @@ require ( github.com/vishvananda/netlink v1.2.1-beta.2.0.20240221172127-ec7bcb248e94 github.com/vishvananda/netns v0.0.4 go.uber.org/zap v1.25.0 + golang.org/x/net v0.25.0 golang.org/x/time v0.3.0 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c gopkg.in/yaml.v3 v3.0.1 @@ -145,7 +146,6 @@ require ( golang.org/x/crypto v0.23.0 // indirect golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect golang.org/x/mod v0.17.0 // indirect - golang.org/x/net v0.25.0 // indirect golang.org/x/oauth2 v0.13.0 // indirect golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.20.0 // indirect diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index 66a5ad2b5..ba1830f5b 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -128,6 +128,8 @@ const ( KernelArgIntelIommu = "intel_iommu=on" KernelArgIommuPt = "iommu=pt" KernelArgIommuPassthrough = "iommu.passthrough=1" + KernelArgRdmaShared = "ib_core.netns_mode=1" + KernelArgRdmaExclusive = "ib_core.netns_mode=0" // Feature gates // ParallelNicConfigFeatureGate: allow to configure nics in parallel diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 0867685dc..53fe82b8b 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "math/rand" - "os/exec" "reflect" "sync" "time" @@ -429,16 +428,6 @@ func (dn *Daemon) nodeStateSyncHandler() error { reqReboot = reqReboot || r } - if dn.currentNodeState.Status.System.RdmaMode != dn.desiredNodeState.Spec.System.RdmaMode { - err = dn.HostHelpers.SetRDMASubsystem(dn.desiredNodeState.Spec.System.RdmaMode) - if err != nil { - log.Log.Error(err, "nodeStateSyncHandler(): failed to set RDMA subsystem") - return err - } - reqReboot = true - reqDrain = true - } - // When running using systemd check if the applied configuration is the latest one // or there is a new config we need to apply // When using systemd configuration we write the file @@ -761,11 +750,11 @@ func (dn *Daemon) rebootNode() { // However note we use `;` instead of `&&` so we keep rebooting even // if kubelet failed to shutdown - that way the machine will still eventually reboot // as systemd will time out the stop invocation. - cmd := exec.Command("systemd-run", "--unit", "sriov-network-config-daemon-reboot", + stdOut, StdErr, err := dn.HostHelpers.RunCommand("systemd-run", "--unit", "sriov-network-config-daemon-reboot", "--description", "sriov-network-config-daemon reboot node", "/bin/sh", "-c", "systemctl stop kubelet.service; reboot") - if err := cmd.Run(); err != nil { - log.Log.Error(err, "failed to reboot node") + if err != nil { + log.Log.Error(err, "failed to reboot node", "stdOut", stdOut, "StdErr", StdErr) } } diff --git a/pkg/daemon/plugin_test.go b/pkg/daemon/plugin_test.go index a13fc1f8b..7b14a4504 100644 --- a/pkg/daemon/plugin_test.go +++ b/pkg/daemon/plugin_test.go @@ -41,6 +41,14 @@ var _ = Describe("config daemon plugin loading tests", func() { vars.ClusterType = consts.ClusterTypeKubernetes gmockController = gomock.NewController(GinkgoT()) helperMock = helperMocks.NewMockHostHelpersInterface(gmockController) + helperMock.EXPECT().GetCurrentKernelArgs().Return("", nil).AnyTimes() + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgPciRealloc).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaExclusive).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaShared).Return(false) + helperMock.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPassthrough).Return(false) + // k8s plugin is ATM the only plugin which require mocking/faking, as its New method performs additional logic // other than simple plugin struct initialization K8sPlugin = func(_ helper.HostHelpersInterface) (plugin.VendorPlugin, error) { diff --git a/pkg/daemon/writer.go b/pkg/daemon/writer.go index 60d4e8d91..42eeb2928 100644 --- a/pkg/daemon/writer.go +++ b/pkg/daemon/writer.go @@ -189,6 +189,7 @@ func (w *NodeStateStatusWriter) setNodeStateStatus(msg Message) (*sriovnetworkv1 nodeState, err := w.updateNodeStateStatusRetry(func(nodeState *sriovnetworkv1.SriovNetworkNodeState) { nodeState.Status.Interfaces = w.status.Interfaces nodeState.Status.Bridges = w.status.Bridges + nodeState.Status.System = w.status.System if msg.lastSyncError != "" || msg.syncStatus == consts.SyncStatusSucceeded { // clear lastSyncError when sync Succeeded nodeState.Status.LastSyncError = msg.lastSyncError diff --git a/pkg/host/internal/lib/netlink/mock/mock_netlink.go b/pkg/host/internal/lib/netlink/mock/mock_netlink.go index 758346a3f..ec136bf29 100644 --- a/pkg/host/internal/lib/netlink/mock/mock_netlink.go +++ b/pkg/host/internal/lib/netlink/mock/mock_netlink.go @@ -145,21 +145,6 @@ func (mr *MockNetlinkLibMockRecorder) DevlinkSetDeviceParam(bus, device, param, return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DevlinkSetDeviceParam", reflect.TypeOf((*MockNetlinkLib)(nil).DevlinkSetDeviceParam), bus, device, param, cmode, value) } -// DiscoverRDMASubsystem mocks base method. -func (m *MockNetlinkLib) DiscoverRDMASubsystem() (string, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "DiscoverRDMASubsystem") - ret0, _ := ret[0].(string) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// DiscoverRDMASubsystem indicates an expected call of DiscoverRDMASubsystem. -func (mr *MockNetlinkLibMockRecorder) DiscoverRDMASubsystem() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DiscoverRDMASubsystem", reflect.TypeOf((*MockNetlinkLib)(nil).DiscoverRDMASubsystem)) -} - // IsLinkAdminStateUp mocks base method. func (m *MockNetlinkLib) IsLinkAdminStateUp(link netlink.Link) bool { m.ctrl.T.Helper() @@ -304,6 +289,21 @@ func (mr *MockNetlinkLibMockRecorder) RdmaLinkByName(name interface{}) *gomock.C return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RdmaLinkByName", reflect.TypeOf((*MockNetlinkLib)(nil).RdmaLinkByName), name) } +// RdmaSystemGetNetnsMode mocks base method. +func (m *MockNetlinkLib) RdmaSystemGetNetnsMode() (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RdmaSystemGetNetnsMode") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// RdmaSystemGetNetnsMode indicates an expected call of RdmaSystemGetNetnsMode. +func (mr *MockNetlinkLibMockRecorder) RdmaSystemGetNetnsMode() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RdmaSystemGetNetnsMode", reflect.TypeOf((*MockNetlinkLib)(nil).RdmaSystemGetNetnsMode)) +} + // VDPADelDev mocks base method. func (m *MockNetlinkLib) VDPADelDev(name string) error { m.ctrl.T.Helper() diff --git a/pkg/host/internal/lib/netlink/netlink.go b/pkg/host/internal/lib/netlink/netlink.go index 7d857921d..ad6056710 100644 --- a/pkg/host/internal/lib/netlink/netlink.go +++ b/pkg/host/internal/lib/netlink/netlink.go @@ -68,8 +68,8 @@ type NetlinkLib interface { RdmaLinkByName(name string) (*netlink.RdmaLink, error) // IsLinkAdminStateUp checks if the admin state of a link is up IsLinkAdminStateUp(link Link) bool - // DiscoverRDMASubsystem returns RDMA subsystem mode - DiscoverRDMASubsystem() (string, error) + // RdmaSystemGetNetnsMode returns RDMA subsystem mode + RdmaSystemGetNetnsMode() (string, error) } type libWrapper struct{} @@ -188,7 +188,7 @@ func (w *libWrapper) IsLinkAdminStateUp(link Link) bool { return link.Attrs().Flags&net.FlagUp == 1 } -// DiscoverRDMASubsystem returns RDMA subsystem mode -func (w *libWrapper) DiscoverRDMASubsystem() (string, error) { +// RdmaSystemGetNetnsMode returns RDMA subsystem mode +func (w *libWrapper) RdmaSystemGetNetnsMode() (string, error) { return netlink.RdmaSystemGetNetnsMode() } diff --git a/pkg/host/internal/network/network.go b/pkg/host/internal/network/network.go index 940c4b248..3ac17cf8f 100644 --- a/pkg/host/internal/network/network.go +++ b/pkg/host/internal/network/network.go @@ -431,8 +431,7 @@ func (n *network) GetPciAddressFromInterfaceName(interfaceName string) (string, } func (n *network) DiscoverRDMASubsystem() (string, error) { - log.Log.Info("DiscoverRDMASubsystem(): retrieving RDMA subsystem mode") - subsystem, err := n.netlinkLib.DiscoverRDMASubsystem() + subsystem, err := n.netlinkLib.RdmaSystemGetNetnsMode() if err != nil { log.Log.Error(err, "DiscoverRDMASubsystem(): failed to get RDMA subsystem mode") @@ -443,19 +442,28 @@ func (n *network) DiscoverRDMASubsystem() (string, error) { } func (n *network) SetRDMASubsystem(mode string) error { - log.Log.Info("SetRDMASubsystem(): Updating RDMA subsystem mode") + log.Log.Info("SetRDMASubsystem(): Updating RDMA subsystem mode", "mode", mode) + path := filepath.Join(vars.FilesystemRoot, consts.Host, "etc", "modprobe.d", "sriov_network_operator_modules_config.conf") + + if mode == "" { + err := os.Remove(path) + if err != nil && !errors.Is(err, os.ErrNotExist) { + log.Log.Error(err, "failed to remove ib_core config file") + return err + } + return nil + } modeValue := 1 if mode == "exclusive" { modeValue = 0 } - config := fmt.Sprintf("options ib_core netns_mode=%d\n", modeValue) - path := filepath.Join(vars.FilesystemRoot, consts.Host, "etc", "modprobe.d", "ib_core.conf") - err := os.WriteFile(path, []byte(config), 0644) + config := fmt.Sprintf("# This file is managed by sriov-network-operator do not edit.\noptions ib_core netns_mode=%d\n", modeValue) + err := os.WriteFile(path, []byte(config), 0644) if err != nil { - log.Log.Error(err, "SetRDMASubsystem(): failed to write ib_core config") - return fmt.Errorf("failed to write ib_core config: %v", err) + log.Log.Error(err, "SetRDMASubsystem(): failed to write sriov_network_operator_modules_config.conf") + return fmt.Errorf("failed to write sriov_network_operator_modules_config.conf: %v", err) } return nil diff --git a/pkg/host/internal/network/network_test.go b/pkg/host/internal/network/network_test.go index 51c56b875..3e197c3f8 100644 --- a/pkg/host/internal/network/network_test.go +++ b/pkg/host/internal/network/network_test.go @@ -285,7 +285,7 @@ var _ = Describe("Network", func() { }) Context("DiscoverRDMASubsystem", func() { It("Should get RDMA Subsystem using netlink", func() { - netlinkLibMock.EXPECT().DiscoverRDMASubsystem().Return("shared", nil) + netlinkLibMock.EXPECT().RdmaSystemGetNetnsMode().Return("shared", nil) pci, err := n.DiscoverRDMASubsystem() Expect(err).NotTo(HaveOccurred()) @@ -297,21 +297,21 @@ var _ = Describe("Network", func() { helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{ Dirs: []string{"/host/etc/modprobe.d"}, Files: map[string][]byte{ - "/host/etc/modprobe.d/ib_core.conf": {}, + "/host/etc/modprobe.d/sriov_network_operator_modules_config.conf": {}, }, }) Expect(n.SetRDMASubsystem("shared")).NotTo(HaveOccurred()) - helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/ib_core.conf", "options ib_core netns_mode=1\n") + helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/sriov_network_operator_modules_config.conf", "# This file is managed by sriov-network-operator do not edit.\noptions ib_core netns_mode=1\n") }) It("Should set RDMA Subsystem exclusive mode", func() { helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{ Dirs: []string{"/host/etc/modprobe.d"}, Files: map[string][]byte{ - "/host/etc/modprobe.d/ib_core.conf": {}, + "/host/etc/modprobe.d/sriov_network_operator_modules_config.conf": {}, }, }) Expect(n.SetRDMASubsystem("exclusive")).NotTo(HaveOccurred()) - helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/ib_core.conf", "options ib_core netns_mode=0\n") + helpers.GinkgoAssertFileContentsEquals("/host/etc/modprobe.d/sriov_network_operator_modules_config.conf", "# This file is managed by sriov-network-operator do not edit.\noptions ib_core netns_mode=0\n") }) }) }) diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index 552f8142a..948459a7f 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -1,11 +1,8 @@ package generic import ( - "bytes" "errors" - "os/exec" - "strconv" - "strings" + "fmt" "syscall" "sigs.k8s.io/controller-runtime/pkg/log" @@ -48,12 +45,14 @@ type DriverState struct { type DriverStateMapType map[uint]*DriverState +type KargStateMapType map[string]bool + type GenericPlugin struct { PluginName string SpecVersion string DesireState *sriovnetworkv1.SriovNetworkNodeState DriverStateMap DriverStateMapType - DesiredKernelArgs map[string]bool + DesiredKernelArgs KargStateMapType helpers helper.HostHelpersInterface skipVFConfiguration bool skipBridgeConfiguration bool @@ -82,7 +81,7 @@ type genericPluginOptions struct { skipBridgeConfiguration bool } -const scriptsPath = "bindata/scripts/enable-kargs.sh" +const scriptsPath = "bindata/scripts/kargs.sh" // Initialize our plugin and set up initial values func NewGenericPlugin(helpers helper.HostHelpersInterface, options ...Option) (plugin.VendorPlugin, error) { @@ -112,11 +111,27 @@ func NewGenericPlugin(helpers helper.HostHelpersInterface, options ...Option) (p NeedDriverFunc: needDriverCheckVdpaType, DriverLoaded: false, } + + // To maintain backward compatibility we don't remove the intel_iommu, iommu and pcirealloc + // kernel args if they are configured + kargs, err := helpers.GetCurrentKernelArgs() + if err != nil { + return nil, err + } + desiredKernelArgs := KargStateMapType{ + consts.KernelArgPciRealloc: helpers.IsKernelArgsSet(kargs, consts.KernelArgPciRealloc), + consts.KernelArgIntelIommu: helpers.IsKernelArgsSet(kargs, consts.KernelArgIntelIommu), + consts.KernelArgIommuPt: helpers.IsKernelArgsSet(kargs, consts.KernelArgIommuPt), + consts.KernelArgIommuPassthrough: helpers.IsKernelArgsSet(kargs, consts.KernelArgIommuPassthrough), + consts.KernelArgRdmaShared: false, + consts.KernelArgRdmaExclusive: false, + } + return &GenericPlugin{ PluginName: PluginName, SpecVersion: "1.0", DriverStateMap: driverStateMap, - DesiredKernelArgs: make(map[string]bool), + DesiredKernelArgs: desiredKernelArgs, helpers: helpers, skipVFConfiguration: cfg.skipVFConfiguration, skipBridgeConfiguration: cfg.skipBridgeConfiguration, @@ -179,18 +194,13 @@ func (p *GenericPlugin) CheckStatusChanges(current *sriovnetworkv1.SriovNetworkN } } - missingKernelArgs, err := p.getMissingKernelArgs() + shouldUpdate, err := p.shouldUpdateKernelArgs() if err != nil { log.Log.Error(err, "generic-plugin CheckStatusChanges(): failed to verify missing kernel arguments") return false, err } - if len(missingKernelArgs) != 0 { - log.Log.V(0).Info("generic-plugin CheckStatusChanges(): kernel args missing", - "kernelArgs", missingKernelArgs) - } - - return len(missingKernelArgs) != 0, nil + return shouldUpdate, nil } func (p *GenericPlugin) syncDriverState() error { @@ -228,7 +238,7 @@ func (p *GenericPlugin) Apply() error { p.DesireState.Status.Interfaces, p.skipVFConfiguration); err != nil { // Catch the "cannot allocate memory" error and try to use PCI realloc if errors.Is(err, syscall.ENOMEM) { - p.addToDesiredKernelArgs(consts.KernelArgPciRealloc) + p.enableDesiredKernelArgs(consts.KernelArgPciRealloc) } return err } @@ -264,85 +274,84 @@ func needDriverCheckVdpaType(state *sriovnetworkv1.SriovNetworkNodeState, driver return false } -// setKernelArg Tries to add the kernel args via ostree or grubby. -func setKernelArg(karg string) (bool, error) { - log.Log.Info("generic plugin setKernelArg()") - var stdout, stderr bytes.Buffer - cmd := exec.Command("/bin/sh", scriptsPath, karg) - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - if err := cmd.Run(); err != nil { +// editKernelArg Tries to add the kernel args via ostree or grubby. +func editKernelArg(helper helper.HostHelpersInterface, mode, karg string) error { + log.Log.Info("generic plugin editKernelArg()", "mode", mode, "karg", karg) + _, _, err := helper.RunCommand("/bin/sh", scriptsPath, mode, karg) + if err != nil { // if grubby is not there log and assume kernel args are set correctly. if utils.IsCommandNotFound(err) { - log.Log.Error(err, "generic plugin setKernelArg(): grubby or ostree command not found. Please ensure that kernel arg are set", + log.Log.Error(err, "generic plugin editKernelArg(): grubby or ostree command not found. Please ensure that kernel arg are correct", "kargs", karg) - return false, nil - } - log.Log.Error(err, "generic plugin setKernelArg(): fail to enable kernel arg", "karg", karg) - return false, err - } - - i, err := strconv.Atoi(strings.TrimSpace(stdout.String())) - if err == nil { - if i > 0 { - log.Log.Info("generic plugin setKernelArg(): need to reboot node for kernel arg", "karg", karg) - return true, nil + return nil } + log.Log.Error(err, "generic plugin editKernelArg(): fail to edit kernel arg", "karg", karg) + return err } - return false, err + return nil } -// addToDesiredKernelArgs Should be called to queue a kernel arg to be added to the node. -func (p *GenericPlugin) addToDesiredKernelArgs(karg string) { - if _, ok := p.DesiredKernelArgs[karg]; !ok { - log.Log.Info("generic plugin addToDesiredKernelArgs(): Adding to desired kernel arg", "karg", karg) - p.DesiredKernelArgs[karg] = false - } +// enableDesiredKernelArgs Should be called to mark a kernel arg as enabled. +func (p *GenericPlugin) enableDesiredKernelArgs(karg string) { + log.Log.Info("generic plugin enableDesiredKernelArgs(): enable kernel arg", "karg", karg) + p.DesiredKernelArgs[karg] = true } -// getMissingKernelArgs gets Kernel arguments that have not been set. -func (p *GenericPlugin) getMissingKernelArgs() ([]string, error) { - missingArgs := make([]string, 0, len(p.DesiredKernelArgs)) - if len(p.DesiredKernelArgs) == 0 { - return nil, nil - } +// disableDesiredKernelArgs Should be called to mark a kernel arg as disabled. +func (p *GenericPlugin) disableDesiredKernelArgs(karg string) { + log.Log.Info("generic plugin disableDesiredKernelArgs(): disable kernel arg", "karg", karg) + p.DesiredKernelArgs[karg] = false +} +// shouldUpdateKernelArgs returns true if the DesiredKernelArgs state is not equal to the running kernel args in the system +func (p *GenericPlugin) shouldUpdateKernelArgs() (bool, error) { kargs, err := p.helpers.GetCurrentKernelArgs() if err != nil { - return nil, err + return false, err } - for desiredKarg := range p.DesiredKernelArgs { - if !p.helpers.IsKernelArgsSet(kargs, desiredKarg) { - missingArgs = append(missingArgs, desiredKarg) + for karg, kargState := range p.DesiredKernelArgs { + if kargState && !p.helpers.IsKernelArgsSet(kargs, karg) { + return true, nil + } + + if !kargState && p.helpers.IsKernelArgsSet(kargs, karg) { + return true, nil } } - return missingArgs, nil + return false, nil } // syncDesiredKernelArgs should be called to set all the kernel arguments. Returns bool if node update is needed. -func (p *GenericPlugin) syncDesiredKernelArgs(kargs []string) (bool, error) { +func (p *GenericPlugin) syncDesiredKernelArgs() (bool, error) { + kargs, err := p.helpers.GetCurrentKernelArgs() + if err != nil { + return false, err + } + needReboot := false + for karg, kargState := range p.DesiredKernelArgs { + if kargState { + err = editKernelArg(p.helpers, "add", karg) + if err != nil { + log.Log.Error(err, "generic-plugin syncDesiredKernelArgs(): fail to set kernel arg", "karg", karg) + return false, err + } - for _, karg := range kargs { - if p.DesiredKernelArgs[karg] { - log.Log.V(2).Info("generic-plugin syncDesiredKernelArgs(): previously attempted to set kernel arg", - "karg", karg) - } - // There is a case when we try to set the kernel argument here, the daemon could decide to not reboot because - // the daemon encountered a potentially one-time error. However we always want to make sure that the kernel - // argument is set once the daemon goes through node state sync again. - update, err := setKernelArg(karg) - if err != nil { - log.Log.Error(err, "generic-plugin syncDesiredKernelArgs(): fail to set kernel arg", "karg", karg) - return false, err - } - if update { - needReboot = true - log.Log.V(2).Info("generic-plugin syncDesiredKernelArgs(): need reboot for setting kernel arg", "karg", karg) + if !p.helpers.IsKernelArgsSet(kargs, karg) { + needReboot = true + } + } else { + err = editKernelArg(p.helpers, "remove", karg) + if err != nil { + log.Log.Error(err, "generic-plugin syncDesiredKernelArgs(): fail to remove kernel arg", "karg", karg) + return false, err + } + + if p.helpers.IsKernelArgsSet(kargs, karg) { + needReboot = true + } } - p.DesiredKernelArgs[karg] = true } return needReboot, nil } @@ -423,14 +432,14 @@ func (p *GenericPlugin) addVfioDesiredKernelArg(state *sriovnetworkv1.SriovNetwo kernelArgFnByCPUVendor := map[hostTypes.CPUVendor]func(){ hostTypes.CPUVendorIntel: func() { - p.addToDesiredKernelArgs(consts.KernelArgIntelIommu) - p.addToDesiredKernelArgs(consts.KernelArgIommuPt) + p.enableDesiredKernelArgs(consts.KernelArgIntelIommu) + p.enableDesiredKernelArgs(consts.KernelArgIommuPt) }, hostTypes.CPUVendorAMD: func() { - p.addToDesiredKernelArgs(consts.KernelArgIommuPt) + p.enableDesiredKernelArgs(consts.KernelArgIommuPt) }, hostTypes.CPUVendorARM: func() { - p.addToDesiredKernelArgs(consts.KernelArgIommuPassthrough) + p.enableDesiredKernelArgs(consts.KernelArgIommuPassthrough) }, } @@ -448,26 +457,41 @@ func (p *GenericPlugin) addVfioDesiredKernelArg(state *sriovnetworkv1.SriovNetwo } } +func (p *GenericPlugin) configRdmaKernelArg(state *sriovnetworkv1.SriovNetworkNodeState) error { + if state.Spec.System.RdmaMode == "" { + p.disableDesiredKernelArgs(consts.KernelArgRdmaExclusive) + p.disableDesiredKernelArgs(consts.KernelArgRdmaShared) + } else if state.Spec.System.RdmaMode == "shared" { + p.enableDesiredKernelArgs(consts.KernelArgRdmaShared) + p.disableDesiredKernelArgs(consts.KernelArgRdmaExclusive) + } else if state.Spec.System.RdmaMode == "exclusive" { + p.enableDesiredKernelArgs(consts.KernelArgRdmaExclusive) + p.disableDesiredKernelArgs(consts.KernelArgRdmaShared) + } else { + err := fmt.Errorf("unexpected rdma mode: %s", state.Spec.System.RdmaMode) + log.Log.Error(err, "generic-plugin configRdmaKernelArg(): failed to configure kernel arguments for rdma") + return err + } + + return p.helpers.SetRDMASubsystem(state.Spec.System.RdmaMode) +} + func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { needReboot := false p.addVfioDesiredKernelArg(state) - - missingKernelArgs, err := p.getMissingKernelArgs() + err := p.configRdmaKernelArg(state) if err != nil { - log.Log.Error(err, "generic-plugin needRebootNode(): failed to verify missing kernel arguments") return false, err } - if len(missingKernelArgs) != 0 { - needReboot, err = p.syncDesiredKernelArgs(missingKernelArgs) - if err != nil { - log.Log.Error(err, "generic-plugin needRebootNode(): failed to set the desired kernel arguments") - return false, err - } - if needReboot { - log.Log.V(2).Info("generic-plugin needRebootNode(): need reboot for updating kernel arguments") - } + needReboot, err = p.syncDesiredKernelArgs() + if err != nil { + log.Log.Error(err, "generic-plugin needRebootNode(): failed to set the desired kernel arguments") + return false, err + } + if needReboot { + log.Log.V(2).Info("generic-plugin needRebootNode(): need reboot for updating kernel arguments") } return needReboot, nil diff --git a/pkg/plugins/generic/generic_plugin_test.go b/pkg/plugins/generic/generic_plugin_test.go index 0a6674712..2e2aed326 100644 --- a/pkg/plugins/generic/generic_plugin_test.go +++ b/pkg/plugins/generic/generic_plugin_test.go @@ -34,6 +34,16 @@ var _ = Describe("Generic plugin", func() { ctrl = gomock.NewController(t) hostHelper = mock_helper.NewMockHostHelpersInterface(ctrl) + hostHelper.EXPECT().SetRDMASubsystem("").Return(nil).AnyTimes() + hostHelper.EXPECT().GetCurrentKernelArgs().Return("", nil).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgPciRealloc).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaExclusive).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgRdmaShared).Return(false).AnyTimes() + hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPassthrough).Return(false).AnyTimes() + + hostHelper.EXPECT().RunCommand(gomock.Any(), gomock.Any()).Return("", "", nil).AnyTimes() genericPlugin, err = NewGenericPlugin(hostHelper) Expect(err).ToNot(HaveOccurred()) @@ -898,20 +908,21 @@ var _ = Describe("Generic plugin", func() { }, } + rdmaState := &sriovnetworkv1.SriovNetworkNodeState{ + Spec: sriovnetworkv1.SriovNetworkNodeStateSpec{System: sriovnetworkv1.System{ + RdmaMode: consts.RdmaSubsystemModeShared, + }}, + Status: sriovnetworkv1.SriovNetworkNodeStateStatus{}, + } + It("should detect changes on status due to missing kernel args", func() { hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorIntel, nil) // Load required kernel args. genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) - Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ - consts.KernelArgIntelIommu: false, - consts.KernelArgIommuPt: false, - })) - - hostHelper.EXPECT().GetCurrentKernelArgs().Return("", nil) - hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIntelIommu).Return(false) - hostHelper.EXPECT().IsKernelArgsSet("", consts.KernelArgIommuPt).Return(false) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgIntelIommu]).To(BeTrue()) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgIommuPt]).To(BeTrue()) changed, err := genericPlugin.CheckStatusChanges(vfioNetworkNodeState) Expect(err).ToNot(HaveOccurred()) @@ -921,17 +932,52 @@ var _ = Describe("Generic plugin", func() { It("should set the correct kernel args on AMD CPUs", func() { hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorAMD, nil) genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) - Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ - consts.KernelArgIommuPt: false, - })) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgIommuPt]).To(BeTrue()) }) It("should set the correct kernel args on ARM CPUs", func() { hostHelper.EXPECT().GetCPUVendor().Return(hostTypes.CPUVendorARM, nil) genericPlugin.(*GenericPlugin).addVfioDesiredKernelArg(vfioNetworkNodeState) - Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs).To(Equal(map[string]bool{ - consts.KernelArgIommuPassthrough: false, - })) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgIommuPassthrough]).To(BeTrue()) + }) + + It("should enable rdma shared mode", func() { + hostHelper.EXPECT().SetRDMASubsystem(consts.RdmaSubsystemModeShared).Return(nil) + err := genericPlugin.(*GenericPlugin).configRdmaKernelArg(rdmaState) + Expect(err).ToNot(HaveOccurred()) + + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaShared]).To(BeTrue()) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaExclusive]).To(BeFalse()) + + changed, err := genericPlugin.CheckStatusChanges(rdmaState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeTrue()) + }) + It("should enable rdma exclusive mode", func() { + hostHelper.EXPECT().SetRDMASubsystem(consts.RdmaSubsystemModeExclusive).Return(nil) + rdmaState.Spec.System.RdmaMode = consts.RdmaSubsystemModeExclusive + err := genericPlugin.(*GenericPlugin).configRdmaKernelArg(rdmaState) + Expect(err).ToNot(HaveOccurred()) + + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaShared]).To(BeFalse()) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaExclusive]).To(BeTrue()) + + changed, err := genericPlugin.CheckStatusChanges(rdmaState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeTrue()) + }) + It("should not configure RDMA kernel args", func() { + hostHelper.EXPECT().SetRDMASubsystem("").Return(nil) + rdmaState.Spec.System = sriovnetworkv1.System{} + err := genericPlugin.(*GenericPlugin).configRdmaKernelArg(rdmaState) + Expect(err).ToNot(HaveOccurred()) + + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaShared]).To(BeFalse()) + Expect(genericPlugin.(*GenericPlugin).DesiredKernelArgs[consts.KernelArgRdmaExclusive]).To(BeFalse()) + + changed, err := genericPlugin.CheckStatusChanges(rdmaState) + Expect(err).ToNot(HaveOccurred()) + Expect(changed).To(BeFalse()) }) }) diff --git a/test/conformance/tests/test_networkpool.go b/test/conformance/tests/test_networkpool.go new file mode 100644 index 000000000..47d929013 --- /dev/null +++ b/test/conformance/tests/test_networkpool.go @@ -0,0 +1,345 @@ +package tests + +import ( + "fmt" + "strconv" + "strings" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "golang.org/x/net/context" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/cluster" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/discovery" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/network" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/pod" +) + +var _ = Describe("[sriov] NetworkPool", Ordered, func() { + var testNode string + var interfaces []*sriovv1.InterfaceExt + + BeforeAll(func() { + err := namespaces.Create(namespaces.Test, clients) + Expect(err).ToNot(HaveOccurred()) + err = namespaces.Clean(operatorNamespace, namespaces.Test, clients, discovery.Enabled()) + Expect(err).ToNot(HaveOccurred()) + + sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace) + Expect(err).ToNot(HaveOccurred()) + Expect(len(sriovInfos.Nodes)).ToNot(BeZero()) + + testNode, interfaces, err = sriovInfos.FindSriovDevicesAndNode() + Expect(err).ToNot(HaveOccurred()) + + By(fmt.Sprintf("Testing on node %s, %d devices found", testNode, len(interfaces))) + WaitForSRIOVStable() + }) + + AfterEach(func() { + err := namespaces.Clean(operatorNamespace, namespaces.Test, clients, discovery.Enabled()) + Expect(err).ToNot(HaveOccurred()) + + err = clients.DeleteAllOf(context.Background(), &sriovv1.SriovNetworkPoolConfig{}, client.InNamespace(operatorNamespace)) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + }) + + Context("Configure rdma namespace mode", func() { + It("should switch rdma mode", func() { + By("create a pool with only that node") + networkPool := &sriovv1.SriovNetworkPoolConfig{ + ObjectMeta: metav1.ObjectMeta{Name: testNode, Namespace: operatorNamespace}, + Spec: sriovv1.SriovNetworkPoolConfigSpec{RdmaMode: consts.RdmaSubsystemModeExclusive, + NodeSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"kubernetes.io/hostname": testNode}}}} + + By("configure rdma mode to exclusive") + err := clients.Create(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + By("waiting for operator to finish the configuration") + WaitForSRIOVStable() + nodeState := &sriovv1.SriovNetworkNodeState{} + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeState.Spec.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeExclusive)) + Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeExclusive)) + + By("Checking rdma mode and kernel args") + output, _, err := runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=0 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "1")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/etc/modprobe.d/sriov_network_operator_modules_config.conf | grep mode=0 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "1")).To(BeTrue()) + + By("configure rdma mode to shared") + networkPool.Spec.RdmaMode = consts.RdmaSubsystemModeShared + err = clients.Update(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeState.Spec.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + + By("Checking rdma mode and kernel args") + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=0 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "1")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/etc/modprobe.d/sriov_network_operator_modules_config.conf | grep mode=1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "1")).To(BeTrue()) + + By("removing rdma mode configuration") + err = clients.Delete(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode, Namespace: operatorNamespace}, nodeState) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeState.Spec.System.RdmaMode).To(Equal("")) + Expect(nodeState.Status.System.RdmaMode).To(Equal(consts.RdmaSubsystemModeShared)) + + By("Checking rdma mode and kernel args") + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=0 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "cat /host/proc/cmdline | grep ib_core.netns_mode=1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + + output, _, err = runCommandOnConfigDaemon(testNode, "/bin/bash", "-c", "ls /host/etc/modprobe.d | grep sriov_network_operator_modules_config.conf | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(output, "0")).To(BeTrue()) + }) + }) + + Context("Check rdma metrics inside a pod in exclusive mode", func() { + var iface *sriovv1.InterfaceExt + + BeforeAll(func() { + sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace) + Expect(err).ToNot(HaveOccurred()) + Expect(len(sriovInfos.Nodes)).ToNot(BeZero()) + + for _, node := range sriovInfos.Nodes { + iface, err = sriovInfos.FindOneMellanoxSriovDevice(node) + if err == nil { + testNode = node + break + } + } + + if iface == nil { + Skip("no mellanox card available to test rdma") + } + + networkPool := &sriovv1.SriovNetworkPoolConfig{ + ObjectMeta: metav1.ObjectMeta{Name: testNode, Namespace: operatorNamespace}, + Spec: sriovv1.SriovNetworkPoolConfigSpec{RdmaMode: consts.RdmaSubsystemModeExclusive, + NodeSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"kubernetes.io/hostname": testNode}}}} + + err = clients.Create(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + By("waiting for operator to finish the configuration") + WaitForSRIOVStable() + }) + + It("should run pod with RDMA cni and expose nic metrics and another one without rdma info", func() { + By("creating a policy") + resourceName := "testrdma" + _, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, iface.Name, testNode, 5, resourceName, "netdevice", + func(policy *sriovv1.SriovNetworkNodePolicy) { policy.Spec.IsRdma = true }) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + + By("Creating sriov network to use the rdma device") + sriovNetwork := &sriovv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rdmanetwork", + Namespace: operatorNamespace, + }, + Spec: sriovv1.SriovNetworkSpec{ + ResourceName: resourceName, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + NetworkNamespace: namespaces.Test, + MetaPluginsConfig: `{"type": "rdma"}`, + }} + + err = clients.Create(context.Background(), sriovNetwork) + Expect(err).ToNot(HaveOccurred()) + waitForNetAttachDef("test-rdmanetwork", namespaces.Test) + + sriovNetwork = &sriovv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-nordmanetwork", + Namespace: operatorNamespace, + }, + Spec: sriovv1.SriovNetworkSpec{ + ResourceName: resourceName, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + NetworkNamespace: namespaces.Test, + }} + + err = clients.Create(context.Background(), sriovNetwork) + Expect(err).ToNot(HaveOccurred()) + waitForNetAttachDef("test-nordmanetwork", namespaces.Test) + + podDefinition := pod.DefineWithNetworks([]string{"test-rdmanetwork"}) + firstPod, err := clients.Pods(namespaces.Test).Create(context.Background(), podDefinition, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + podDefinition = pod.DefineWithNetworks([]string{"test-nordmanetwork"}) + secondPod, err := clients.Pods(namespaces.Test).Create(context.Background(), podDefinition, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + firstPod = waitForPodRunning(firstPod) + secondPod = waitForPodRunning(secondPod) + + testedNode := &corev1.Node{} + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode}, testedNode) + Expect(err).ToNot(HaveOccurred()) + resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)] + allocatable, _ := resNum.AsInt64() + Expect(allocatable).ToNot(Equal(5)) + + By("restart device plugin") + pods, err := clients.Pods(operatorNamespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: "app=sriov-device-plugin", + FieldSelector: "spec.nodeName=" + testNode, + }) + Expect(err).ToNot(HaveOccurred()) + + for _, podObj := range pods.Items { + err = clients.Delete(context.Background(), &podObj) + Expect(err).ToNot(HaveOccurred()) + Eventually(func() bool { + searchPod := &corev1.Pod{} + err = clients.Get(context.Background(), client.ObjectKey{Name: podObj.Name, Namespace: podObj.Namespace}, searchPod) + if err != nil && errors.IsNotFound(err) { + return true + } + return false + }, 2*time.Minute, time.Second).Should(BeTrue()) + } + + By("checking the amount of allocatable devices remains after device plugin reset") + Consistently(func() int64 { + err = clients.Get(context.Background(), client.ObjectKey{Name: testNode}, testedNode) + Expect(err).ToNot(HaveOccurred()) + resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)] + newAllocatable, _ := resNum.AsInt64() + return newAllocatable + }, 1*time.Minute, 5*time.Second).Should(Equal(allocatable)) + + By("checking counters inside the pods") + strOut, _, err := pod.ExecCommand(clients, firstPod, "/bin/bash", "-c", "ip link show net1 | grep net1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(strOut, "1")).To(BeTrue()) + strOut, _, err = pod.ExecCommand(clients, firstPod, "/bin/bash", "-c", "ls /sys/bus/pci/devices/${PCIDEVICE_OPENSHIFT_IO_TESTRDMA}/infiniband/*/ports/*/hw_counters | wc -l") + strOut = strings.TrimSpace(strOut) + Expect(err).ToNot(HaveOccurred()) + num, err := strconv.Atoi(strOut) + Expect(err).ToNot(HaveOccurred()) + Expect(num).To(BeNumerically(">", 0)) + + strOut, _, err = pod.ExecCommand(clients, secondPod, "/bin/bash", "-c", "ls /sys/bus/pci/devices/${PCIDEVICE_OPENSHIFT_IO_TESTRDMA}/infiniband/ | wc -l") + Expect(err).ToNot(HaveOccurred()) + strOut = strings.TrimSpace(strOut) + num, err = strconv.Atoi(strOut) + Expect(err).ToNot(HaveOccurred()) + Expect(num).To(BeNumerically("==", 0)) + }) + }) + + Context("Check rdma metrics inside a pod in shared mode not exist", func() { + var iface *sriovv1.InterfaceExt + BeforeAll(func() { + sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace) + Expect(err).ToNot(HaveOccurred()) + Expect(len(sriovInfos.Nodes)).ToNot(BeZero()) + + for _, node := range sriovInfos.Nodes { + iface, err = sriovInfos.FindOneMellanoxSriovDevice(node) + if err == nil { + testNode = node + break + } + } + + if iface == nil { + Skip("no mellanox card available to test rdma") + } + + networkPool := &sriovv1.SriovNetworkPoolConfig{ + ObjectMeta: metav1.ObjectMeta{Name: testNode, Namespace: operatorNamespace}, + Spec: sriovv1.SriovNetworkPoolConfigSpec{RdmaMode: consts.RdmaSubsystemModeShared, + NodeSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"kubernetes.io/hostname": testNode}}}} + + err = clients.Create(context.Background(), networkPool) + Expect(err).ToNot(HaveOccurred()) + By("waiting for operator to finish the configuration") + WaitForSRIOVStable() + }) + + It("should run pod without RDMA cni and not expose nic metrics", func() { + By("creating a policy") + resourceName := "testrdma" + _, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, iface.Name, testNode, 5, resourceName, "netdevice", + func(policy *sriovv1.SriovNetworkNodePolicy) { policy.Spec.IsRdma = true }) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + + By("Creating sriov network to use the rdma device") + sriovNetwork := &sriovv1.SriovNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rdmanetwork", + Namespace: operatorNamespace, + }, + Spec: sriovv1.SriovNetworkSpec{ + ResourceName: resourceName, + IPAM: `{"type":"host-local","subnet":"10.10.10.0/24","rangeStart":"10.10.10.171","rangeEnd":"10.10.10.181","routes":[{"dst":"0.0.0.0/0"}],"gateway":"10.10.10.1"}`, + NetworkNamespace: namespaces.Test, + }} + + err = clients.Create(context.Background(), sriovNetwork) + Expect(err).ToNot(HaveOccurred()) + waitForNetAttachDef("test-rdmanetwork", namespaces.Test) + + podDefinition := pod.DefineWithNetworks([]string{"test-rdmanetwork"}) + firstPod, err := clients.Pods(namespaces.Test).Create(context.Background(), podDefinition, metav1.CreateOptions{}) + Expect(err).ToNot(HaveOccurred()) + firstPod = waitForPodRunning(firstPod) + + strOut, _, err := pod.ExecCommand(clients, firstPod, "/bin/bash", "-c", "ip link show net1 | grep net1 | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(strings.HasPrefix(strOut, "1")).To(BeTrue()) + strOut, _, err = pod.ExecCommand(clients, firstPod, "/bin/bash", "-c", "ls /sys/bus/pci/devices/${PCIDEVICE_OPENSHIFT_IO_TESTRDMA}/infiniband/*/ports/* | grep hw_counters | wc -l") + strOut = strings.TrimSpace(strOut) + Expect(err).ToNot(HaveOccurred()) + num, err := strconv.Atoi(strOut) + Expect(err).ToNot(HaveOccurred()) + Expect(num).To(BeNumerically("==", 0)) + }) + }) +}) diff --git a/test/scripts/enable-kargs_test.sh b/test/scripts/kargs_test.sh similarity index 61% rename from test/scripts/enable-kargs_test.sh rename to test/scripts/kargs_test.sh index 93a985700..053bd5200 100755 --- a/test/scripts/enable-kargs_test.sh +++ b/test/scripts/kargs_test.sh @@ -2,14 +2,14 @@ SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" -SUT_SCRIPT="${SCRIPTPATH}/../../bindata/scripts/enable-kargs.sh" +SUT_SCRIPT="${SCRIPTPATH}/../../bindata/scripts/kargs.sh" test_RpmOstree_Add_All_Arguments() { echo "a b c=d eee=fff" > ${FAKE_HOST}/proc/cmdline touch ${FAKE_HOST}/run/ostree-booted - output=`$SUT_SCRIPT X=Y W=Z` + output=`$SUT_SCRIPT add X=Y W=Z` assertEquals 0 $? assertEquals "2" $output @@ -22,7 +22,7 @@ test_RpmOstree_Add_Only_Missing_Arguments() { echo "a b c=d eee=fff K=L" > ${FAKE_HOST}/proc/cmdline touch ${FAKE_HOST}/run/ostree-booted - output=`$SUT_SCRIPT K=L X=Y` + output=`$SUT_SCRIPT add K=L X=Y` assertEquals 0 $? assertEquals "1" $output @@ -30,6 +30,29 @@ test_RpmOstree_Add_Only_Missing_Arguments() { assertNotContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--append K=L" } +test_RpmOstree_Delete_All_Arguments() { + echo "a b c=d eee=fff X=Y W=Z" > ${FAKE_HOST}/proc/cmdline + touch ${FAKE_HOST}/run/ostree-booted + + output=`$SUT_SCRIPT remove X=Y W=Z` + assertEquals 0 $? + assertEquals "2" $output + + assertContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--delete X=Y" + assertContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--delete W=Z" +} + +test_RpmOstree_Delete_Only_Exist_Arguments() { + echo "a b c=d eee=fff X=Y" > ${FAKE_HOST}/proc/cmdline + touch ${FAKE_HOST}/run/ostree-booted + + output=`$SUT_SCRIPT remove X=Y W=Z` + assertEquals 0 $? + assertEquals "1" $output + + assertContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--delete X=Y" + assertContains "`cat ${FAKE_HOST}/rpm-ostree_calls`" "--delete W=Z" +} ###### Mock /host directory ###### export FAKE_HOST="$(mktemp -d)" diff --git a/test/scripts/rpm-ostree_mock b/test/scripts/rpm-ostree_mock index db6f66040..06e6b1905 100755 --- a/test/scripts/rpm-ostree_mock +++ b/test/scripts/rpm-ostree_mock @@ -10,3 +10,9 @@ then # Caller is trying to read kernel arguments. cat /proc/cmdline fi + +if ! echo "$*" | grep -q "\--delete" +then + # Caller is trying to read kernel arguments. + cat /proc/cmdline +fi From baa41c97adeb9249f30c5707f4bc8deee5e30c31 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 7 Nov 2024 11:55:28 +0200 Subject: [PATCH 32/38] redesign device plugin always deploy sriov network device plugin and use a label to enable or disable it on the nodes Signed-off-by: Sebastian Sch --- .../plugins/sriov-device-plugin.yaml | 2 +- controllers/helper.go | 130 ++----- controllers/helper_test.go | 330 ------------------ .../sriovnetworknodepolicy_controller.go | 50 ++- .../sriovnetworknodepolicy_controller_test.go | 137 +++++++- controllers/sriovoperatorconfig_controller.go | 8 +- .../sriovoperatorconfig_controller_test.go | 110 +++--- controllers/suite_test.go | 7 + deploy/clusterrole.yaml | 6 - deploy/role.yaml | 12 +- .../templates/clusterrole.yaml | 6 - .../templates/role.yaml | 11 +- pkg/consts/constants.go | 4 + pkg/utils/cluster.go | 82 ++++- 14 files changed, 353 insertions(+), 542 deletions(-) delete mode 100644 controllers/helper_test.go diff --git a/bindata/manifests/plugins/sriov-device-plugin.yaml b/bindata/manifests/plugins/sriov-device-plugin.yaml index a0f433a06..3660ebf79 100644 --- a/bindata/manifests/plugins/sriov-device-plugin.yaml +++ b/bindata/manifests/plugins/sriov-device-plugin.yaml @@ -27,7 +27,7 @@ spec: hostNetwork: true nodeSelector: {{- range $key, $value := .NodeSelectorField }} - {{ $key }}: {{ $value }} + {{ $key }}: "{{ $value }}" {{- end }} tolerations: - operator: Exists diff --git a/controllers/helper.go b/controllers/helper.go index b90ad44f8..58c3ae697 100644 --- a/controllers/helper.go +++ b/controllers/helper.go @@ -22,7 +22,6 @@ import ( "encoding/json" "fmt" "os" - "sort" "strings" errs "github.com/pkg/errors" @@ -51,7 +50,7 @@ import ( ) var ( - webhooks = map[string](string){ + webhooks = map[string]string{ constants.InjectorWebHookName: constants.InjectorWebHookPath, constants.OperatorWebHookName: constants.OperatorWebHookPath, } @@ -162,29 +161,33 @@ func formatJSON(str string) (string, error) { return prettyJSON.String(), nil } +// GetDefaultNodeSelector return a nodeSelector with worker and linux os func GetDefaultNodeSelector() map[string]string { - return map[string]string{"node-role.kubernetes.io/worker": "", - "kubernetes.io/os": "linux"} + return map[string]string{ + "node-role.kubernetes.io/worker": "", + "kubernetes.io/os": "linux", + } } -// hasNoValidPolicy returns true if no SriovNetworkNodePolicy -// or only the (deprecated) "default" policy is present -func hasNoValidPolicy(pl []sriovnetworkv1.SriovNetworkNodePolicy) bool { - switch len(pl) { - case 0: - return true - case 1: - return pl[0].Name == constants.DefaultPolicyName - default: - return false +// GetDefaultNodeSelectorForDevicePlugin return a nodeSelector with worker linux os +// and the enabled sriov device plugin +func GetNodeSelectorForDevicePlugin(dc *sriovnetworkv1.SriovOperatorConfig) map[string]string { + if len(dc.Spec.ConfigDaemonNodeSelector) == 0 { + return map[string]string{ + "kubernetes.io/os": "linux", + constants.SriovDevicePluginLabel: constants.SriovDevicePluginLabelEnabled, + } } + + tmp := dc.Spec.DeepCopy() + tmp.ConfigDaemonNodeSelector[constants.SriovDevicePluginLabel] = constants.SriovDevicePluginLabelEnabled + return tmp.ConfigDaemonNodeSelector } func syncPluginDaemonObjs(ctx context.Context, client k8sclient.Client, scheme *runtime.Scheme, - dc *sriovnetworkv1.SriovOperatorConfig, - pl *sriovnetworkv1.SriovNetworkNodePolicyList) error { + dc *sriovnetworkv1.SriovOperatorConfig) error { logger := log.Log.WithName("syncPluginDaemonObjs") logger.V(1).Info("Start to sync sriov daemons objects") @@ -195,7 +198,7 @@ func syncPluginDaemonObjs(ctx context.Context, data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION") data.Data["ResourcePrefix"] = vars.ResourcePrefix data.Data["ImagePullSecrets"] = GetImagePullSecrets() - data.Data["NodeSelectorField"] = GetDefaultNodeSelector() + data.Data["NodeSelectorField"] = GetNodeSelectorForDevicePlugin(dc) data.Data["UseCDI"] = dc.Spec.UseCDI objs, err := renderDsForCR(constants.PluginPath, &data) if err != nil { @@ -203,34 +206,9 @@ func syncPluginDaemonObjs(ctx context.Context, return err } - if hasNoValidPolicy(pl.Items) { - for _, obj := range objs { - err := deleteK8sResource(ctx, client, obj) - if err != nil { - return err - } - } - return nil - } - // Sync DaemonSets for _, obj := range objs { - if obj.GetKind() == constants.DaemonSet && len(dc.Spec.ConfigDaemonNodeSelector) > 0 { - scheme := kscheme.Scheme - ds := &appsv1.DaemonSet{} - err = scheme.Convert(obj, ds, nil) - if err != nil { - logger.Error(err, "Fail to convert to DaemonSet") - return err - } - ds.Spec.Template.Spec.NodeSelector = dc.Spec.ConfigDaemonNodeSelector - err = scheme.Convert(ds, obj, nil) - if err != nil { - logger.Error(err, "Fail to convert to Unstructured") - return err - } - } - err = syncDsObject(ctx, client, scheme, dc, pl, obj) + err = syncDsObject(ctx, client, scheme, dc, obj) if err != nil { logger.Error(err, "Couldn't sync SR-IoV daemons objects") return err @@ -240,14 +218,7 @@ func syncPluginDaemonObjs(ctx context.Context, return nil } -func deleteK8sResource(ctx context.Context, client k8sclient.Client, in *uns.Unstructured) error { - if err := apply.DeleteObject(ctx, client, in); err != nil { - return fmt.Errorf("failed to delete object %v with err: %v", in, err) - } - return nil -} - -func syncDsObject(ctx context.Context, client k8sclient.Client, scheme *runtime.Scheme, dc *sriovnetworkv1.SriovOperatorConfig, pl *sriovnetworkv1.SriovNetworkNodePolicyList, obj *uns.Unstructured) error { +func syncDsObject(ctx context.Context, client k8sclient.Client, scheme *runtime.Scheme, dc *sriovnetworkv1.SriovOperatorConfig, obj *uns.Unstructured) error { logger := log.Log.WithName("syncDsObject") kind := obj.GetKind() logger.V(1).Info("Start to sync Objects", "Kind", kind) @@ -267,7 +238,7 @@ func syncDsObject(ctx context.Context, client k8sclient.Client, scheme *runtime. logger.Error(err, "Fail to convert to DaemonSet") return err } - err = syncDaemonSet(ctx, client, scheme, dc, pl, ds) + err = syncDaemonSet(ctx, client, scheme, dc, ds) if err != nil { logger.Error(err, "Fail to sync DaemonSet", "Namespace", ds.Namespace, "Name", ds.Name) return err @@ -276,54 +247,6 @@ func syncDsObject(ctx context.Context, client k8sclient.Client, scheme *runtime. return nil } -func setDsNodeAffinity(pl *sriovnetworkv1.SriovNetworkNodePolicyList, ds *appsv1.DaemonSet) error { - terms := nodeSelectorTermsForPolicyList(pl.Items) - if len(terms) > 0 { - ds.Spec.Template.Spec.Affinity = &corev1.Affinity{ - NodeAffinity: &corev1.NodeAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ - NodeSelectorTerms: terms, - }, - }, - } - } - return nil -} - -func nodeSelectorTermsForPolicyList(policies []sriovnetworkv1.SriovNetworkNodePolicy) []corev1.NodeSelectorTerm { - terms := []corev1.NodeSelectorTerm{} - for _, p := range policies { - // Note(adrianc): default policy is deprecated and ignored. - if p.Name == constants.DefaultPolicyName { - continue - } - - if len(p.Spec.NodeSelector) == 0 { - continue - } - expressions := []corev1.NodeSelectorRequirement{} - for k, v := range p.Spec.NodeSelector { - exp := corev1.NodeSelectorRequirement{ - Operator: corev1.NodeSelectorOpIn, - Key: k, - Values: []string{v}, - } - expressions = append(expressions, exp) - } - // sorting is needed to keep the daemon spec stable. - // the items are popped in a random order from the map - sort.Slice(expressions, func(i, j int) bool { - return expressions[i].Key < expressions[j].Key - }) - nodeSelector := corev1.NodeSelectorTerm{ - MatchExpressions: expressions, - } - terms = append(terms, nodeSelector) - } - - return terms -} - // renderDsForCR returns a busybox pod with the same name/namespace as the cr func renderDsForCR(path string, data *render.RenderData) ([]*uns.Unstructured, error) { logger := log.Log.WithName("renderDsForCR") @@ -336,16 +259,11 @@ func renderDsForCR(path string, data *render.RenderData) ([]*uns.Unstructured, e return objs, nil } -func syncDaemonSet(ctx context.Context, client k8sclient.Client, scheme *runtime.Scheme, dc *sriovnetworkv1.SriovOperatorConfig, pl *sriovnetworkv1.SriovNetworkNodePolicyList, in *appsv1.DaemonSet) error { +func syncDaemonSet(ctx context.Context, client k8sclient.Client, scheme *runtime.Scheme, dc *sriovnetworkv1.SriovOperatorConfig, in *appsv1.DaemonSet) error { logger := log.Log.WithName("syncDaemonSet") logger.V(1).Info("Start to sync DaemonSet", "Namespace", in.Namespace, "Name", in.Name) var err error - if pl != nil { - if err = setDsNodeAffinity(pl, in); err != nil { - return err - } - } if err = controllerutil.SetControllerReference(dc, in, scheme); err != nil { return err } diff --git a/controllers/helper_test.go b/controllers/helper_test.go deleted file mode 100644 index d998cf0da..000000000 --- a/controllers/helper_test.go +++ /dev/null @@ -1,330 +0,0 @@ -/* - - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package controllers - -import ( - "context" - "sync" - "testing" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - - "github.com/google/go-cmp/cmp" - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - controllerruntime "sigs.k8s.io/controller-runtime" - - sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" -) - -func TestNodeSelectorMerge(t *testing.T) { - table := []struct { - tname string - policies []sriovnetworkv1.SriovNetworkNodePolicy - expected []corev1.NodeSelectorTerm - }{ - { - tname: "testoneselector", - policies: []sriovnetworkv1.SriovNetworkNodePolicy{ - { - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{ - "foo": "bar", - }, - }, - }, - { - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{ - "bb": "cc", - }, - }, - }, - }, - expected: []corev1.NodeSelectorTerm{ - { - MatchExpressions: []corev1.NodeSelectorRequirement{ - { - Operator: corev1.NodeSelectorOpIn, - Key: "foo", - Values: []string{"bar"}, - }, - }, - }, - { - MatchExpressions: []corev1.NodeSelectorRequirement{ - { - Operator: corev1.NodeSelectorOpIn, - Key: "bb", - Values: []string{"cc"}, - }, - }, - }, - }, - }, - { - tname: "testtwoselectors", - policies: []sriovnetworkv1.SriovNetworkNodePolicy{ - { - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{ - "foo": "bar", - "foo1": "bar1", - }, - }, - }, - { - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{ - "bb": "cc", - "bb1": "cc1", - "bb2": "cc2", - }, - }, - }, - }, - expected: []corev1.NodeSelectorTerm{ - { - MatchExpressions: []corev1.NodeSelectorRequirement{ - { - Operator: corev1.NodeSelectorOpIn, - Key: "foo", - Values: []string{"bar"}, - }, - { - Operator: corev1.NodeSelectorOpIn, - Key: "foo1", - Values: []string{"bar1"}, - }, - }, - }, - { - MatchExpressions: []corev1.NodeSelectorRequirement{ - { - Operator: corev1.NodeSelectorOpIn, - Key: "bb", - Values: []string{"cc"}, - }, - { - Operator: corev1.NodeSelectorOpIn, - Key: "bb1", - Values: []string{"cc1"}, - }, - { - Operator: corev1.NodeSelectorOpIn, - Key: "bb2", - Values: []string{"cc2"}, - }, - }, - }, - }, - }, - { - tname: "testemptyselector", - policies: []sriovnetworkv1.SriovNetworkNodePolicy{ - { - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{}, - }, - }, - }, - expected: []corev1.NodeSelectorTerm{}, - }, - } - - for _, tc := range table { - t.Run(tc.tname, func(t *testing.T) { - selectors := nodeSelectorTermsForPolicyList(tc.policies) - if !cmp.Equal(selectors, tc.expected) { - t.Error(tc.tname, "Selectors not as expected", cmp.Diff(selectors, tc.expected)) - } - }) - } -} - -var _ = Describe("Helper Validation", Ordered, func() { - - var cancel context.CancelFunc - var ctx context.Context - var dc *sriovnetworkv1.SriovOperatorConfig - var in *appsv1.DaemonSet - - BeforeAll(func() { - By("Setup controller manager") - k8sManager, err := setupK8sManagerForTest() - Expect(err).ToNot(HaveOccurred()) - - ctx, cancel = context.WithCancel(context.Background()) - - wg := sync.WaitGroup{} - wg.Add(1) - go func() { - defer wg.Done() - defer GinkgoRecover() - By("Start controller manager") - err := k8sManager.Start(ctx) - Expect(err).ToNot(HaveOccurred()) - }() - - DeferCleanup(func() { - By("Shutdown controller manager") - cancel() - wg.Wait() - }) - }) - - BeforeEach(func() { - dc = &sriovnetworkv1.SriovOperatorConfig{ - ObjectMeta: controllerruntime.ObjectMeta{ - Name: "default", - Namespace: vars.Namespace, - UID: "12312312"}} - in = &appsv1.DaemonSet{ - ObjectMeta: controllerruntime.ObjectMeta{ - Name: "sriov-device-plugin", - Namespace: vars.Namespace}, - Spec: appsv1.DaemonSetSpec{ - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"app": "sriov-device-plugin"}}, - Template: corev1.PodTemplateSpec{ - ObjectMeta: controllerruntime.ObjectMeta{ - Labels: map[string]string{"app": "sriov-device-plugin"}}, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Image: "test:latest", - Name: "test", - }, - }, - }, - }}} - - err := k8sClient.Delete(ctx, in) - if err != nil { - Expect(errors.IsNotFound(err)).To(BeTrue()) - } - }) - - Context("syncDaemonSet", func() { - It("should create a new daemon", func() { - pl := &sriovnetworkv1.SriovNetworkNodePolicyList{Items: []sriovnetworkv1.SriovNetworkNodePolicy{ - {ObjectMeta: controllerruntime.ObjectMeta{Name: "test", Namespace: vars.Namespace}}, - }} - err := syncDaemonSet(ctx, k8sClient, vars.Scheme, dc, pl, in) - Expect(err).ToNot(HaveOccurred()) - Expect(in.Spec.Template.Spec.Affinity).To(BeNil()) - }) - It("should update affinity", func() { - pl := &sriovnetworkv1.SriovNetworkNodePolicyList{Items: []sriovnetworkv1.SriovNetworkNodePolicy{ - { - ObjectMeta: controllerruntime.ObjectMeta{ - Name: "test", - Namespace: vars.Namespace, - }, - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{"test": "test"}, - }, - }, - }} - - err := k8sClient.Create(ctx, in) - Expect(err).ToNot(HaveOccurred()) - - err = syncDaemonSet(ctx, k8sClient, vars.Scheme, dc, pl, in) - Expect(err).ToNot(HaveOccurred()) - Expect(in.Spec.Template.Spec.Affinity).ToNot(BeNil()) - Expect(in.Spec.Template.Spec.Affinity.NodeAffinity).ToNot(BeNil()) - Expect(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution).ToNot(BeNil()) - Expect(len(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms)).To(Equal(1)) - }) - It("should update affinity with multiple", func() { - pl := &sriovnetworkv1.SriovNetworkNodePolicyList{Items: []sriovnetworkv1.SriovNetworkNodePolicy{ - { - ObjectMeta: controllerruntime.ObjectMeta{ - Name: "test", - Namespace: vars.Namespace, - }, - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{"test": "test"}, - }, - }, - { - ObjectMeta: controllerruntime.ObjectMeta{ - Name: "test1", - Namespace: vars.Namespace, - }, - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{"test1": "test"}, - }, - }, - }} - - err := k8sClient.Create(ctx, in) - Expect(err).ToNot(HaveOccurred()) - - err = syncDaemonSet(ctx, k8sClient, vars.Scheme, dc, pl, in) - Expect(err).ToNot(HaveOccurred()) - Expect(in.Spec.Template.Spec.Affinity).ToNot(BeNil()) - Expect(in.Spec.Template.Spec.Affinity.NodeAffinity).ToNot(BeNil()) - Expect(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution).ToNot(BeNil()) - Expect(len(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms)).To(Equal(2)) - }) - It("should switch affinity", func() { - pl := &sriovnetworkv1.SriovNetworkNodePolicyList{Items: []sriovnetworkv1.SriovNetworkNodePolicy{ - { - ObjectMeta: controllerruntime.ObjectMeta{ - Name: "test1", - Namespace: vars.Namespace, - }, - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - NodeSelector: map[string]string{"test1": "test"}, - }, - }, - }} - - in.Spec.Template.Spec.Affinity = &corev1.Affinity{ - NodeAffinity: &corev1.NodeAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ - NodeSelectorTerms: []corev1.NodeSelectorTerm{{ - MatchExpressions: []corev1.NodeSelectorRequirement{{ - Operator: corev1.NodeSelectorOpIn, - Key: "test", - Values: []string{"test"}, - }}, - }}, - }, - }, - } - - err := k8sClient.Create(ctx, in) - Expect(err).ToNot(HaveOccurred()) - - err = syncDaemonSet(ctx, k8sClient, vars.Scheme, dc, pl, in) - Expect(err).ToNot(HaveOccurred()) - Expect(in.Spec.Template.Spec.Affinity).ToNot(BeNil()) - Expect(in.Spec.Template.Spec.Affinity.NodeAffinity).ToNot(BeNil()) - Expect(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution).ToNot(BeNil()) - Expect(len(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms)).To(Equal(1)) - Expect(len(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions)).To(Equal(1)) - Expect(in.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions[0].Key).To(Equal("test1")) - }) - }) -}) diff --git a/controllers/sriovnetworknodepolicy_controller.go b/controllers/sriovnetworknodepolicy_controller.go index 62218436f..29438b176 100644 --- a/controllers/sriovnetworknodepolicy_controller.go +++ b/controllers/sriovnetworknodepolicy_controller.go @@ -46,6 +46,7 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) @@ -133,10 +134,6 @@ func (r *SriovNetworkNodePolicyReconciler) Reconcile(ctx context.Context, req ct if err = r.syncDevicePluginConfigMap(ctx, defaultOpConf, policyList, nodeList); err != nil { return reconcile.Result{}, err } - // Render and sync Daemon objects - if err = syncPluginDaemonObjs(ctx, r.Client, r.Scheme, defaultOpConf, policyList); err != nil { - return reconcile.Result{}, err - } // All was successful. Request that this be re-triggered after ResyncPeriod, // so we can reconcile state again. @@ -182,6 +179,12 @@ func (r *SriovNetworkNodePolicyReconciler) SetupWithManager(mgr ctrl.Manager) er Info("Enqueuing sync for create event", "resource", e.Object.GetName()) qHandler(q) }, + UpdateFunc: func(ctx context.Context, e event.UpdateEvent, q workqueue.RateLimitingInterface) { + reflect.DeepEqual(e.ObjectOld.GetLabels(), e.ObjectNew.GetLabels()) + log.Log.WithName("SriovNetworkNodePolicy"). + Info("Enqueuing sync for create event", "resource", e.ObjectNew.GetName()) + qHandler(q) + }, DeleteFunc: func(ctx context.Context, e event.DeleteEvent, q workqueue.RateLimitingInterface) { log.Log.WithName("SriovNetworkNodePolicy"). Info("Enqueuing sync for delete event", "resource", e.Object.GetName()) @@ -220,6 +223,30 @@ func (r *SriovNetworkNodePolicyReconciler) syncDevicePluginConfigMap(ctx context return err } configData[node.Name] = string(config) + + if data.ResourceList == nil || len(data.ResourceList) == 0 { + // if we don't have policies we should add the disabled label for the device plugin + err = utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginLabel, constants.SriovDevicePluginLabelDisabled, r.Client) + if err != nil { + logger.Error(err, "failed to label node for device plugin label", + "labelKey", + constants.SriovDevicePluginLabel, + "labelValue", + constants.SriovDevicePluginLabelDisabled) + return err + } + } else { + // if we have policies we should add the enabled label for the device plugin + err = utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginLabel, constants.SriovDevicePluginLabelEnabled, r.Client) + if err != nil { + logger.Error(err, "failed to label node for device plugin label", + "labelKey", + constants.SriovDevicePluginLabel, + "labelValue", + constants.SriovDevicePluginLabelEnabled) + return err + } + } } cm := &corev1.ConfigMap{ @@ -304,8 +331,15 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con } } if !found { + // remove device plugin labels + logger.Info("removing device plugin label from node as SriovNetworkNodeState doesn't exist", "nodeStateName", ns.Name) + err = utils.RemoveLabelFromNode(ctx, ns.Name, constants.SriovDevicePluginLabel, r.Client) + if err != nil { + logger.Error(err, "Fail to remove device plugin label from node", "node", ns.Name) + return err + } logger.Info("Deleting SriovNetworkNodeState as node with that name doesn't exist", "nodeStateName", ns.Name) - err := r.Delete(ctx, &ns, &client.DeleteOptions{}) + err = r.Delete(ctx, &ns, &client.DeleteOptions{}) if err != nil { logger.Error(err, "Fail to Delete", "SriovNetworkNodeState CR:", ns.GetName()) return err @@ -423,13 +457,13 @@ func (r *SriovNetworkNodePolicyReconciler) renderDevicePluginConfigData(ctx cont found, i := resourceNameInList(p.Spec.ResourceName, &rcl) if found { - err := updateDevicePluginResource(ctx, &rcl.ResourceList[i], &p, nodeState) + err := updateDevicePluginResource(&rcl.ResourceList[i], &p, nodeState) if err != nil { return rcl, err } logger.V(1).Info("Update resource", "Resource", rcl.ResourceList[i]) } else { - rc, err := createDevicePluginResource(ctx, &p, nodeState) + rc, err := createDevicePluginResource(&p, nodeState) if err != nil { return rcl, err } @@ -450,7 +484,6 @@ func resourceNameInList(name string, rcl *dptypes.ResourceConfList) (bool, int) } func createDevicePluginResource( - ctx context.Context, p *sriovnetworkv1.SriovNetworkNodePolicy, nodeState *sriovnetworkv1.SriovNetworkNodeState) (*dptypes.ResourceConfig, error) { netDeviceSelectors := dptypes.NetDeviceSelectors{} @@ -524,7 +557,6 @@ func createDevicePluginResource( } func updateDevicePluginResource( - ctx context.Context, rc *dptypes.ResourceConfig, p *sriovnetworkv1.SriovNetworkNodePolicy, nodeState *sriovnetworkv1.SriovNetworkNodeState) error { diff --git a/controllers/sriovnetworknodepolicy_controller_test.go b/controllers/sriovnetworknodepolicy_controller_test.go index a116efe87..abdddbc91 100644 --- a/controllers/sriovnetworknodepolicy_controller_test.go +++ b/controllers/sriovnetworknodepolicy_controller_test.go @@ -3,14 +3,20 @@ package controllers import ( "context" "encoding/json" + "sync" "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" "github.com/google/go-cmp/cmp" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" - + k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" dptypes "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types" @@ -126,3 +132,132 @@ func TestRenderDevicePluginConfigData(t *testing.T) { }) } } + +var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() { + var cancel context.CancelFunc + var ctx context.Context + + BeforeAll(func() { + By("Create SriovOperatorConfig controller k8s objs") + config := makeDefaultSriovOpConfig() + Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) + DeferCleanup(func() { + err := k8sClient.Delete(context.Background(), config) + Expect(err).ToNot(HaveOccurred()) + }) + + // setup controller manager + By("Setup controller manager") + k8sManager, err := setupK8sManagerForTest() + Expect(err).ToNot(HaveOccurred()) + + err = (&SriovNetworkNodePolicyReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + FeatureGate: featuregate.New(), + }).SetupWithManager(k8sManager) + Expect(err).ToNot(HaveOccurred()) + + ctx, cancel = context.WithCancel(context.Background()) + + wg := sync.WaitGroup{} + wg.Add(1) + go func() { + defer wg.Done() + defer GinkgoRecover() + By("Start controller manager") + err := k8sManager.Start(ctx) + Expect(err).ToNot(HaveOccurred()) + }() + + DeferCleanup(func() { + By("Shut down manager") + cancel() + wg.Wait() + }) + }) + AfterEach(func() { + err := k8sClient.DeleteAllOf(context.Background(), &corev1.Node{}) + Expect(err).ToNot(HaveOccurred()) + + err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodePolicy{}, k8sclient.InNamespace(vars.Namespace)) + Expect(err).ToNot(HaveOccurred()) + + err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodeState{}, k8sclient.InNamespace(vars.Namespace)) + Expect(err).ToNot(HaveOccurred()) + }) + Context("device plugin labels", func() { + It("Should add the right labels to the nodes", func() { + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + Labels: map[string]string{"kubernetes.io/os": "linux", + "node-role.kubernetes.io/worker": ""}, + }} + Expect(k8sClient.Create(ctx, node)).To(Succeed()) + + nodeState := &sriovnetworkv1.SriovNetworkNodeState{} + Eventually(func(g Gomega) { + err := k8sClient.Get(context.TODO(), k8sclient.ObjectKey{Name: "node0", Namespace: testNamespace}, nodeState) + g.Expect(err).ToNot(HaveOccurred()) + }, time.Minute, time.Second).Should(Succeed()) + + Eventually(func(g Gomega) { + err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: node.Name}, node) + g.Expect(err).ToNot(HaveOccurred()) + value, exist := node.Labels[consts.SriovDevicePluginLabel] + g.Expect(exist).To(BeTrue()) + g.Expect(value).To(Equal(consts.SriovDevicePluginLabelDisabled)) + }, time.Minute, time.Second).Should(Succeed()) + + nodeState.Status.Interfaces = sriovnetworkv1.InterfaceExts{ + sriovnetworkv1.InterfaceExt{ + Vendor: "8086", + Driver: "i40e", + Mtu: 1500, + Name: "ens803f0", + PciAddress: "0000:86:00.0", + NumVfs: 0, + TotalVfs: 64, + }, + } + err := k8sClient.Status().Update(context.Background(), nodeState) + Expect(err).ToNot(HaveOccurred()) + + somePolicy := &sriovnetworkv1.SriovNetworkNodePolicy{} + somePolicy.SetNamespace(testNamespace) + somePolicy.SetName("some-policy") + somePolicy.Spec = sriovnetworkv1.SriovNetworkNodePolicySpec{ + NumVfs: 5, + NodeSelector: map[string]string{"node-role.kubernetes.io/worker": ""}, + NicSelector: sriovnetworkv1.SriovNetworkNicSelector{Vendor: "8086"}, + Priority: 20, + } + Expect(k8sClient.Create(context.Background(), somePolicy)).ToNot(HaveOccurred()) + + Eventually(func(g Gomega) { + err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: node.Name}, node) + g.Expect(err).ToNot(HaveOccurred()) + value, exist := node.Labels[consts.SriovDevicePluginLabel] + g.Expect(exist).To(BeTrue()) + g.Expect(value).To(Equal(consts.SriovDevicePluginLabelEnabled)) + }, time.Minute, time.Second).Should(Succeed()) + + delete(node.Labels, "node-role.kubernetes.io/worker") + err = k8sClient.Update(context.Background(), node) + Expect(err).ToNot(HaveOccurred()) + + Eventually(func(g Gomega) { + err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: node.Name}, node) + g.Expect(err).ToNot(HaveOccurred()) + _, exist := node.Labels[consts.SriovDevicePluginLabel] + g.Expect(exist).To(BeFalse()) + }, time.Minute, time.Second).Should(Succeed()) + + Eventually(func(g Gomega) { + err := k8sClient.Get(context.Background(), k8sclient.ObjectKey{Name: node.Name, Namespace: testNamespace}, nodeState) + Expect(err).To(HaveOccurred()) + Expect(errors.IsNotFound(err)).To(BeTrue()) + }, time.Minute, time.Second).Should(Succeed()) + }) + }) +}) diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index c9f21f428..f79614c44 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -44,12 +44,12 @@ import ( machinev1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - apply "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/apply" - consts "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/apply" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms" - render "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/render" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/render" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) @@ -140,7 +140,7 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. return reconcile.Result{}, err } - if err = syncPluginDaemonObjs(ctx, r.Client, r.Scheme, defaultConfig, policyList); err != nil { + if err = syncPluginDaemonObjs(ctx, r.Client, r.Scheme, defaultConfig); err != nil { return reconcile.Result{}, err } diff --git a/controllers/sriovoperatorconfig_controller_test.go b/controllers/sriovoperatorconfig_controller_test.go index 47e4fc09d..4674bd5b9 100644 --- a/controllers/sriovoperatorconfig_controller_test.go +++ b/controllers/sriovoperatorconfig_controller_test.go @@ -2,7 +2,6 @@ package controllers import ( "context" - "fmt" "os" "strings" "sync" @@ -30,7 +29,7 @@ import ( mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" - util "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" ) var _ = Describe("SriovOperatorConfig controller", Ordered, func() { @@ -41,10 +40,6 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { By("Create SriovOperatorConfig controller k8s objs") config := makeDefaultSriovOpConfig() Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) - DeferCleanup(func() { - err := k8sClient.Delete(context.Background(), config) - Expect(err).ToNot(HaveOccurred()) - }) somePolicy := &sriovnetworkv1.SriovNetworkNodePolicy{} somePolicy.SetNamespace(testNamespace) @@ -56,10 +51,6 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { Priority: 20, } Expect(k8sClient.Create(context.Background(), somePolicy)).ToNot(HaveOccurred()) - DeferCleanup(func() { - err := k8sClient.Delete(context.Background(), somePolicy) - Expect(err).ToNot(HaveOccurred()) - }) // setup controller manager By("Setup controller manager") @@ -101,6 +92,27 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { }) Context("When is up", func() { + AfterAll(func() { + err := k8sClient.DeleteAllOf(context.Background(), &corev1.Node{}) + Expect(err).ToNot(HaveOccurred()) + + err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodePolicy{}, client.InNamespace(vars.Namespace)) + Expect(err).ToNot(HaveOccurred()) + + err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovNetworkNodeState{}, client.InNamespace(vars.Namespace)) + Expect(err).ToNot(HaveOccurred()) + + err = k8sClient.DeleteAllOf(context.Background(), &sriovnetworkv1.SriovOperatorConfig{}, client.InNamespace(vars.Namespace)) + Expect(err).ToNot(HaveOccurred()) + + operatorConfigList := &sriovnetworkv1.SriovOperatorConfigList{} + Eventually(func(g Gomega) { + err = k8sClient.List(context.Background(), operatorConfigList, &client.ListOptions{Namespace: vars.Namespace}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(len(operatorConfigList.Items)).To(Equal(0)) + }, time.Minute, time.Second).Should(Succeed()) + }) + BeforeEach(func() { var err error config := &sriovnetworkv1.SriovOperatorConfig{} @@ -286,7 +298,6 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { daemonSet := &appsv1.DaemonSet{} Eventually(func() map[string]string { - // By("wait for DaemonSet NodeSelector") err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-network-config-daemon", Namespace: testNamespace}, daemonSet) if err != nil { return nil @@ -295,6 +306,32 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { }, util.APITimeout, util.RetryInterval).Should(Equal(nodeSelector)) }) + It("should be able to update the node selector of sriov-network-device-plugin", func() { + By("specify the configDaemonNodeSelector") + daemonSet := &appsv1.DaemonSet{} + Eventually(func(g Gomega) { + err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet) + g.Expect(err).ToNot(HaveOccurred()) + _, exist := daemonSet.Spec.Template.Spec.NodeSelector["node-role.kubernetes.io/worker"] + g.Expect(exist).To(BeFalse()) + _, exist = daemonSet.Spec.Template.Spec.NodeSelector[consts.SriovDevicePluginLabel] + g.Expect(exist).To(BeTrue()) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + + nodeSelector := map[string]string{"node-role.kubernetes.io/worker": ""} + restore := updateConfigDaemonNodeSelector(nodeSelector) + DeferCleanup(restore) + + Eventually(func(g Gomega) { + err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet) + g.Expect(err).ToNot(HaveOccurred()) + _, exist := daemonSet.Spec.Template.Spec.NodeSelector["node-role.kubernetes.io/worker"] + g.Expect(exist).To(BeTrue()) + _, exist = daemonSet.Spec.Template.Spec.NodeSelector[consts.SriovDevicePluginLabel] + g.Expect(exist).To(BeTrue()) + }, util.APITimeout, util.RetryInterval).Should(Succeed()) + }) + It("should be able to do multiple updates to the node selector of sriov-network-config-daemon", func() { By("changing the configDaemonNodeSelector") firstNodeSelector := map[string]string{"labelA": "", "labelB": "", "labelC": ""} @@ -427,8 +464,8 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { metricsDaemonset := appsv1.DaemonSet{} err := util.WaitForNamespacedObject(&metricsDaemonset, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout) g.Expect(err).NotTo(HaveOccurred()) - g.Expect(metricsDaemonset.Spec.Template.Spec.NodeSelector).To((Equal(nodeSelector))) - }).Should(Succeed()) + g.Expect(metricsDaemonset.Spec.Template.Spec.NodeSelector).To(Equal(nodeSelector)) + }, time.Minute, time.Second).Should(Succeed()) }) It("should deploy extra configuration when the Prometheus operator is installed", func() { @@ -521,53 +558,6 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() { g.Expect(injectorCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("ca-bundle-2\n"))) }, "1s").Should(Succeed()) }) - - It("should reconcile to a converging state when multiple node policies are set", func() { - By("Creating a consistent number of node policies") - for i := 0; i < 30; i++ { - p := &sriovnetworkv1.SriovNetworkNodePolicy{ - ObjectMeta: metav1.ObjectMeta{Namespace: testNamespace, Name: fmt.Sprintf("p%d", i)}, - Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ - Priority: 99, - NodeSelector: map[string]string{"foo": fmt.Sprintf("v%d", i)}, - }, - } - err := k8sClient.Create(context.Background(), p) - Expect(err).NotTo(HaveOccurred()) - } - - By("Triggering a the reconcile loop") - config := &sriovnetworkv1.SriovOperatorConfig{} - err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config) - Expect(err).NotTo(HaveOccurred()) - if config.ObjectMeta.Labels == nil { - config.ObjectMeta.Labels = make(map[string]string) - } - config.ObjectMeta.Labels["trigger-test"] = "test-reconcile-daemonset" - err = k8sClient.Update(context.Background(), config) - Expect(err).NotTo(HaveOccurred()) - - By("Wait until device-plugin Daemonset's affinity has been calculated") - var expectedAffinity *corev1.Affinity - - Eventually(func(g Gomega) { - daemonSet := &appsv1.DaemonSet{} - err = k8sClient.Get(context.Background(), types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet) - g.Expect(err).NotTo(HaveOccurred()) - // Wait until the last policy (with NodeSelector foo=v29) has been considered at least one time - g.Expect(daemonSet.Spec.Template.Spec.Affinity.String()).To(ContainSubstring("v29")) - expectedAffinity = daemonSet.Spec.Template.Spec.Affinity - }, "3s", "1s").Should(Succeed()) - - By("Verify device-plugin Daemonset's affinity doesn't change over time") - Consistently(func(g Gomega) { - daemonSet := &appsv1.DaemonSet{} - err = k8sClient.Get(context.Background(), types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(daemonSet.Spec.Template.Spec.Affinity). - To(Equal(expectedAffinity)) - }, "3s", "1s").Should(Succeed()) - }) }) }) diff --git a/controllers/suite_test.go b/controllers/suite_test.go index bc2f13b8e..9d5492e21 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -188,6 +188,13 @@ var _ = BeforeSuite(func() { } Expect(k8sClient.Create(context.Background(), ns)).Should(Succeed()) + sa := &corev1.ServiceAccount{TypeMeta: metav1.TypeMeta{}, + ObjectMeta: metav1.ObjectMeta{ + Name: "default", + Namespace: testNamespace, + }} + Expect(k8sClient.Create(context.Background(), sa)).Should(Succeed()) + // Create openshift Infrastructure infra := &openshiftconfigv1.Infrastructure{ ObjectMeta: metav1.ObjectMeta{ diff --git a/deploy/clusterrole.yaml b/deploy/clusterrole.yaml index e7a596061..e7a84394e 100644 --- a/deploy/clusterrole.yaml +++ b/deploy/clusterrole.yaml @@ -45,12 +45,6 @@ rules: - apiGroups: [""] resources: ["nodes"] verbs: ["get", "list", "watch", "patch", "update"] -- apiGroups: [""] - resources: ["pods"] - verbs: ["*"] -- apiGroups: ["apps"] - resources: ["daemonsets"] - verbs: ["get"] - apiGroups: [ "config.openshift.io" ] resources: [ "infrastructures" ] verbs: [ "get", "list", "watch" ] diff --git a/deploy/role.yaml b/deploy/role.yaml index 0a6c27a21..3bdcdc145 100644 --- a/deploy/role.yaml +++ b/deploy/role.yaml @@ -1,7 +1,6 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: - creationTimestamp: null name: sriov-network-operator rules: - apiGroups: @@ -76,13 +75,10 @@ rules: resources: - pods verbs: - - '*' -- apiGroups: - - apps - resources: - - daemonsets - verbs: - - '*' + - "get" + - "list" + - "watch" + - "delete" - apiGroups: - sriovnetwork.openshift.io resources: diff --git a/deployment/sriov-network-operator-chart/templates/clusterrole.yaml b/deployment/sriov-network-operator-chart/templates/clusterrole.yaml index 7cd8fd014..519d2c05c 100644 --- a/deployment/sriov-network-operator-chart/templates/clusterrole.yaml +++ b/deployment/sriov-network-operator-chart/templates/clusterrole.yaml @@ -49,12 +49,6 @@ rules: - apiGroups: [""] resources: ["nodes"] verbs: ["get", "list", "watch", "patch", "update"] - - apiGroups: [""] - resources: ["pods"] - verbs: ["*"] - - apiGroups: ["apps"] - resources: ["daemonsets"] - verbs: ["get"] - apiGroups: [ "config.openshift.io" ] resources: [ "infrastructures" ] verbs: [ "get", "list", "watch" ] diff --git a/deployment/sriov-network-operator-chart/templates/role.yaml b/deployment/sriov-network-operator-chart/templates/role.yaml index 6551b5775..56e5a5487 100644 --- a/deployment/sriov-network-operator-chart/templates/role.yaml +++ b/deployment/sriov-network-operator-chart/templates/role.yaml @@ -82,13 +82,10 @@ rules: resources: - pods verbs: - - '*' - - apiGroups: - - apps - resources: - - daemonsets - verbs: - - '*' + - "get" + - "list" + - "watch" + - "delete" - apiGroups: - sriovnetwork.openshift.io resources: diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index ba1830f5b..4ce478730 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -70,6 +70,10 @@ const ( MachineConfigPoolPausedAnnotationIdle = "Idle" MachineConfigPoolPausedAnnotationPaused = "Paused" + SriovDevicePluginLabel = "sriovnetwork.openshift.io/device-plugin" + SriovDevicePluginLabelEnabled = "Enabled" + SriovDevicePluginLabelDisabled = "Disabled" + NodeDrainAnnotation = "sriovnetwork.openshift.io/state" NodeStateDrainAnnotation = "sriovnetwork.openshift.io/desired-state" NodeStateDrainAnnotationCurrent = "sriovnetwork.openshift.io/current-state" diff --git a/pkg/utils/cluster.go b/pkg/utils/cluster.go index c5f1f333a..5f9aa7065 100644 --- a/pkg/utils/cluster.go +++ b/pkg/utils/cluster.go @@ -127,16 +127,17 @@ func ObjectHasAnnotation(obj metav1.Object, annoKey string, value string) bool { // AnnotateObject adds annotation to a kubernetes object func AnnotateObject(ctx context.Context, obj client.Object, key, value string, c client.Client) error { - log.Log.V(2).Info("AnnotateObject(): Annotate object", - "objectName", obj.GetName(), - "objectKind", obj.GetObjectKind(), - "annotation", value) newObj := obj.DeepCopyObject().(client.Object) if newObj.GetAnnotations() == nil { newObj.SetAnnotations(map[string]string{}) } if newObj.GetAnnotations()[key] != value { + log.Log.V(2).Info("AnnotateObject(): Annotate object", + "objectName", obj.GetName(), + "objectKind", obj.GetObjectKind(), + "annotationKey", key, + "annotationValue", value) newObj.GetAnnotations()[key] = value patch := client.MergeFrom(obj) err := c.Patch(ctx, @@ -160,3 +161,76 @@ func AnnotateNode(ctx context.Context, nodeName string, key, value string, c cli return AnnotateObject(ctx, node, key, value, c) } + +// labelObject adds label to a kubernetes object +func labelObject(ctx context.Context, obj client.Object, key, value string, c client.Client) error { + newObj := obj.DeepCopyObject().(client.Object) + if newObj.GetLabels() == nil { + newObj.SetLabels(map[string]string{}) + } + + if newObj.GetLabels()[key] != value { + log.Log.V(2).Info("labelObject(): label object", + "objectName", obj.GetName(), + "objectKind", obj.GetObjectKind(), + "labelKey", key, + "labelValue", value) + newObj.GetLabels()[key] = value + patch := client.MergeFrom(obj) + err := c.Patch(ctx, + newObj, patch) + if err != nil { + log.Log.Error(err, "labelObject(): Failed to patch object") + return err + } + } + + return nil +} + +// removeLabelObject remove a label from a kubernetes object +func removeLabelObject(ctx context.Context, obj client.Object, key string, c client.Client) error { + newObj := obj.DeepCopyObject().(client.Object) + if newObj.GetLabels() == nil { + newObj.SetLabels(map[string]string{}) + } + + _, exist := newObj.GetLabels()[key] + if exist { + log.Log.V(2).Info("removeLabelObject(): remove label from object", + "objectName", obj.GetName(), + "objectKind", obj.GetObjectKind(), + "labelKey", key) + delete(newObj.GetLabels(), key) + patch := client.MergeFrom(obj) + err := c.Patch(ctx, + newObj, patch) + if err != nil { + log.Log.Error(err, "removeLabelObject(): Failed to patch object") + return err + } + } + + return nil +} + +// LabelNode add label to a node +func LabelNode(ctx context.Context, nodeName string, key, value string, c client.Client) error { + node := &corev1.Node{} + err := c.Get(context.TODO(), client.ObjectKey{Name: nodeName}, node) + if err != nil { + return err + } + + return labelObject(ctx, node, key, value, c) +} + +func RemoveLabelFromNode(ctx context.Context, nodeName string, key string, c client.Client) error { + node := &corev1.Node{} + err := c.Get(context.TODO(), client.ObjectKey{Name: nodeName}, node) + if err != nil { + return err + } + + return removeLabelObject(ctx, node, key, c) +} From 8950f76a9c3b4661073385520ab0f0f3dc55ef2c Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Thu, 14 Nov 2024 09:02:53 -0500 Subject: [PATCH 33/38] deploy: relax Operator node affinity In the context of Hypershift (Hosted Clusters with OpenShift), where a Nodepool (terminology for a worker Node in HCP) is not a control-plane or a master Node but a worker, we can't force the Operator to be deployed on a master node that doesn't exist. Instead, we want to deploy it on a worker. The proposal here is to relax the rule and use `preferredDuringSchedulingIgnoredDuringExecution` instead so the scheduler will try to find a master node or fallback on other nodes if not found. --- deploy/operator.yaml | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/deploy/operator.yaml b/deploy/operator.yaml index e9fb25de3..f95d80c59 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -20,14 +20,22 @@ spec: spec: affinity: nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: node-role.kubernetes.io/master - operator: Exists - - matchExpressions: - - key: node-role.kubernetes.io/control-plane - operator: Exists + # In the context of Hypershift, the SR-IOV network + # Operator is deployed on Nodepools which are labeled + # as workers. So we relax the node affinity to prefer + # masters/control-plane when possible otherwise we + # schedule where it's possible. + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: "node-role.kubernetes.io/master" + operator: Exists + - weight: 1 + preference: + matchExpressions: + - key: "node-role.kubernetes.io/control-plane" + operator: Exists tolerations: - effect: NoSchedule key: node-role.kubernetes.io/master From 8a910047067548e26c1a543aa94c9e3d05fcd4e0 Mon Sep 17 00:00:00 2001 From: Clark Zinzow Date: Wed, 18 Sep 2024 12:20:14 -0700 Subject: [PATCH 34/38] Upgrade golangci-lint to work with Go 1.23 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f5ca7edc8..3196c8d46 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,7 @@ GOLANGCI_LINT = $(BIN_DIR)/golangci-lint # golangci-lint version should be updated periodically # we keep it fixed to avoid it from unexpectedly failing on the project # in case of a version bump -GOLANGCI_LINT_VER = v1.55.2 +GOLANGCI_LINT_VER = v1.61.0 .PHONY: all build clean gendeepcopy test test-e2e test-e2e-k8s run image fmt sync-manifests test-e2e-conformance manifests update-codegen From 1d920646025bdafd006b4d4ca4cb0b39517e41a8 Mon Sep 17 00:00:00 2001 From: Clark Zinzow Date: Sun, 24 Nov 2024 12:06:31 -0800 Subject: [PATCH 35/38] Add platform build arg. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3196c8d46..1873c8d0e 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ export OPERATOR_EXEC?=oc BUILD_GOPATH=$(TARGET_DIR):$(TARGET_DIR)/vendor:$(CURPATH)/cmd IMAGE_BUILDER?=docker -IMAGE_BUILD_OPTS?= +IMAGE_BUILD_OPTS?=--platform linux/amd64 DOCKERFILE?=Dockerfile DOCKERFILE_CONFIG_DAEMON?=Dockerfile.sriov-network-config-daemon DOCKERFILE_WEBHOOK?=Dockerfile.webhook From b98d857ca64a199dcd0ec1826d28cf7650341505 Mon Sep 17 00:00:00 2001 From: Clark Zinzow Date: Thu, 30 Jan 2025 18:37:01 -0800 Subject: [PATCH 36/38] Comment out Mellanox plugin's draining + rebooting for totalVfs + SRIOV_EN configs, which is buggy. --- pkg/plugins/mellanox/mellanox_plugin.go | 98 +++++++++++++------------ 1 file changed, 50 insertions(+), 48 deletions(-) diff --git a/pkg/plugins/mellanox/mellanox_plugin.go b/pkg/plugins/mellanox/mellanox_plugin.go index 10b0152bb..73e520709 100644 --- a/pkg/plugins/mellanox/mellanox_plugin.go +++ b/pkg/plugins/mellanox/mellanox_plugin.go @@ -92,54 +92,56 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS return } - for _, ifaceSpec := range mellanoxNicsSpec { - pciPrefix := mlx.GetPciAddressPrefix(ifaceSpec.PciAddress) - // skip processed nics, help not running the same logic 2 times for dual port NICs - if _, ok := processedNics[pciPrefix]; ok { - continue - } - processedNics[pciPrefix] = true - fwCurrent, fwNext, err := p.helpers.GetMlxNicFwData(ifaceSpec.PciAddress) - if err != nil { - return false, false, err - } - - isDualPort := mlx.IsDualPort(ifaceSpec.PciAddress, mellanoxNicsStatus) - // Attributes to change - attrs := &mlx.MlxNic{TotalVfs: -1} - var changeWithoutReboot bool - - totalVfs, totalVfsNeedReboot, totalVfsChangeWithoutReboot := mlx.HandleTotalVfs(fwCurrent, fwNext, attrs, ifaceSpec, isDualPort, mellanoxNicsSpec) - sriovEnNeedReboot, sriovEnChangeWithoutReboot := mlx.HandleEnableSriov(totalVfs, fwCurrent, fwNext, attrs) - needReboot = totalVfsNeedReboot || sriovEnNeedReboot - changeWithoutReboot = totalVfsChangeWithoutReboot || sriovEnChangeWithoutReboot - - needLinkChange, err := mlx.HandleLinkType(pciPrefix, fwCurrent, attrs, mellanoxNicsSpec, mellanoxNicsStatus) - if err != nil { - return false, false, err - } - needReboot = needReboot || needLinkChange - - // no FW changes allowed when NIC is externally managed - if ifaceSpec.ExternallyManaged { - if totalVfsNeedReboot || totalVfsChangeWithoutReboot { - return false, false, fmt.Errorf( - "interface %s required a change in the TotalVfs but the policy is externally managed failing: firmware TotalVf %d requested TotalVf %d", - ifaceSpec.PciAddress, fwCurrent.TotalVfs, totalVfs) - } - if needLinkChange { - return false, false, fmt.Errorf("change required for link type but the policy is externally managed, failing") - } - } - - if needReboot || changeWithoutReboot { - attributesToChange[ifaceSpec.PciAddress] = *attrs - } - - if needReboot { - pciAddressesToReset = append(pciAddressesToReset, ifaceSpec.PciAddress) - } - } + // for _, ifaceSpec := range mellanoxNicsSpec { + // pciPrefix := mlx.GetPciAddressPrefix(ifaceSpec.PciAddress) + // // skip processed nics, help not running the same logic 2 times for dual port NICs + // if _, ok := processedNics[pciPrefix]; ok { + // continue + // } + // processedNics[pciPrefix] = true + // fwCurrent, fwNext, err := p.helpers.GetMlxNicFwData(ifaceSpec.PciAddress) + // if err != nil { + // return false, false, err + // } + + // // isDualPort := mlx.IsDualPort(ifaceSpec.PciAddress, mellanoxNicsStatus) + // // Attributes to change + // attrs := &mlx.MlxNic{TotalVfs: -1} + // var changeWithoutReboot bool + + // // totalVfs, totalVfsNeedReboot, totalVfsChangeWithoutReboot := mlx.HandleTotalVfs(fwCurrent, fwNext, attrs, ifaceSpec, isDualPort, mellanoxNicsSpec) + // // sriovEnNeedReboot, sriovEnChangeWithoutReboot := mlx.HandleEnableSriov(totalVfs, fwCurrent, fwNext, attrs) + // // needReboot = totalVfsNeedReboot || sriovEnNeedReboot + // // changeWithoutReboot = totalVfsChangeWithoutReboot || sriovEnChangeWithoutReboot + // needReboot = false + // changeWithoutReboot = false + + // needLinkChange, err := mlx.HandleLinkType(pciPrefix, fwCurrent, attrs, mellanoxNicsSpec, mellanoxNicsStatus) + // if err != nil { + // return false, false, err + // } + // needReboot = needReboot || needLinkChange + + // // no FW changes allowed when NIC is externally managed + // if ifaceSpec.ExternallyManaged { + // // if totalVfsNeedReboot || totalVfsChangeWithoutReboot { + // // return false, false, fmt.Errorf( + // // "interface %s required a change in the TotalVfs but the policy is externally managed failing: firmware TotalVf %d requested TotalVf %d", + // // ifaceSpec.PciAddress, fwCurrent.TotalVfs, totalVfs) + // // } + // if needLinkChange { + // return false, false, fmt.Errorf("change required for link type but the policy is externally managed, failing") + // } + // } + + // if needReboot || changeWithoutReboot { + // attributesToChange[ifaceSpec.PciAddress] = *attrs + // } + + // if needReboot { + // pciAddressesToReset = append(pciAddressesToReset, ifaceSpec.PciAddress) + // } + // } // Set total VFs to 0 for mellanox interfaces with no spec for pciPrefix, portsMap := range mellanoxNicsStatus { From bfa0537d9796485cdd9e204d6e6ecf23b9bf72ae Mon Sep 17 00:00:00 2001 From: punker Date: Fri, 31 Jan 2025 12:49:42 +0000 Subject: [PATCH 37/38] Squash commits into one --- api/v1/helper.go | 138 +++++++++++++++++- api/v1/sriovibnetwork_types.go | 2 + .../cni-config/sriov/sriov-cni-config.yaml | 3 + ...vnetwork.openshift.io_sriovibnetworks.yaml | 4 + controllers/generic_network_controller.go | 23 ++- ...vnetwork.openshift.io_sriovibnetworks.yaml | 4 + pkg/helper/mock/mock_helper.go | 16 ++ pkg/host/internal/infiniband/infiniband.go | 15 ++ pkg/host/internal/sriov/sriov.go | 17 ++- pkg/host/internal/sriov/sriov_test.go | 1 + pkg/host/mock/mock_host.go | 15 ++ pkg/host/types/interfaces.go | 3 + 12 files changed, 233 insertions(+), 8 deletions(-) diff --git a/api/v1/helper.go b/api/v1/helper.go index bfdfbc473..08db95c47 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -39,6 +39,8 @@ const ( SriovCniStateOn = "on" SriovCniIpam = "\"ipam\"" SriovCniIpamEmpty = SriovCniIpam + ":{}" + + CniType = "ib-sriov" ) const invalidVfIndex = -1 @@ -49,7 +51,6 @@ var log = logf.Log.WithName("sriovnetwork") // NicIDMap contains supported mapping of IDs with each in the format of: // Vendor ID, Physical Function Device ID, Virtual Function Device ID var NicIDMap = []string{} - var InitialState SriovNetworkNodeState // NetFilterType Represents the NetFilter tags to be used @@ -220,6 +221,41 @@ func IsSwitchdevModeSpec(spec SriovNetworkNodeStateSpec) bool { return ContainsSwitchdevInterface(spec.Interfaces) } +func GetGUIDFromSriovNetworkNodeStateStatus(status SriovNetworkNodeStateStatus, interfaceIndex int) string { + // Check if we have enough interfaces + if interfaceIndex < 0 || interfaceIndex >= len(status.Interfaces) { + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): invalid interface index", + "index", interfaceIndex, + "total interfaces", len(status.Interfaces)) + return "" + } + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): interface index:", "index", interfaceIndex) + + // Get the specific interface by index + iface := status.Interfaces[interfaceIndex] + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): interface name:", "iface.Name", iface.Name) + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): interface VFs len():", "len(iface.VFs)", len(iface.VFs)) + // Only process InfiniBand interfaces + if strings.EqualFold(iface.LinkType, consts.LinkTypeIB) { + // If interface has VFs and at least one VF has a GUID + if len(iface.VFs) > 0 { + for _, vf := range iface.VFs { + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): interface VF Name:", "vf.Name", vf.Name) + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): interface VF GUID:", "vf.GUID", vf.GUID) + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): interface MAC:", "vf.Mac", vf.Mac) + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): interface MAC:", "vf.PciAddress", vf.PciAddress) + + // Return first valid GUID found + // Skip uninitialized GUIDs + if vf.GUID != "" && vf.GUID != consts.UninitializedNodeGUID { + return vf.GUID + } + } + } + } + return "" +} + // ContainsSwitchdevInterface returns true if provided interface list contains interface // with switchdev configuration func ContainsSwitchdevInterface(interfaces []Interface) bool { @@ -687,6 +723,102 @@ func (s *SriovNetworkNodeState) GetDriverByPciAddress(addr string) string { return "" } +// RenderNetAttDefWithGUID renders a net-att-def with GUID for ib-sriov CNI +func (cr *SriovIBNetwork) RenderNetAttDefWithGUID(status SriovNetworkNodeStateStatus) (*uns.Unstructured, error) { + logger := log.WithName("RenderNetAttDefWithGUID") + logger.Info("Start to render IB SRIOV CNI NetworkAttachmentDefinition") + + // Extract index from network name, tt usually comes with a digit at the end + // We're using this digit to distinguish between them and map a proper VF GUID in the NetAttachDef + re := regexp.MustCompile(`(\d+)$`) + matches := re.FindStringSubmatch(cr.Name) + interfaceIndex := 0 // default to first interface if no number found + if len(matches) > 1 { + if idx, err := strconv.Atoi(matches[1]); err == nil { + interfaceIndex = idx + } + } + + // render RawCNIConfig manifests + + data := render.MakeRenderData() + data.Data["CniType"] = CniType + + data.Data["pKeyConfigured"] = true + data.Data["pKey"] = cr.Spec.PKey + data.Data["SriovNetworkName"] = cr.Name + if cr.Spec.NetworkNamespace == "" { + data.Data["SriovNetworkNamespace"] = cr.Namespace + } else { + data.Data["SriovNetworkNamespace"] = cr.Spec.NetworkNamespace + } + + data.Data["SriovCniResourceName"] = os.Getenv("RESOURCE_PREFIX") + "/" + cr.Spec.ResourceName + if cr.Spec.ScanGUIDs { + logger.Info("Getting GUID from SriovNetworkNodeState") + if guid := GetGUIDFromSriovNetworkNodeStateStatus(status, interfaceIndex-1); guid != "" { + data.Data["GUID"] = guid + logger.Info(fmt.Sprintf("Found GUID in SriovNetworkNodeState: %s", guid)) + } + } + + data.Data["StateConfigured"] = true + switch cr.Spec.LinkState { + case SriovCniStateEnable: + data.Data["SriovCniState"] = SriovCniStateEnable + case SriovCniStateDisable: + data.Data["SriovCniState"] = SriovCniStateDisable + case SriovCniStateAuto: + data.Data["SriovCniState"] = SriovCniStateAuto + default: + data.Data["StateConfigured"] = false + } + + if cr.Spec.Capabilities == "" { + data.Data["CapabilitiesConfigured"] = false + } else { + data.Data["CapabilitiesConfigured"] = true + data.Data["SriovCniCapabilities"] = cr.Spec.Capabilities + } + + if cr.Spec.IPAM != "" { + data.Data["SriovCniIpam"] = SriovCniIpam + ":" + strings.Join(strings.Fields(cr.Spec.IPAM), "") + } else { + data.Data["SriovCniIpam"] = SriovCniIpamEmpty + } + + // metaplugins for the infiniband cni + data.Data["MetaPluginsConfigured"] = false + if cr.Spec.MetaPluginsConfig != "" { + data.Data["MetaPluginsConfigured"] = true + data.Data["MetaPlugins"] = cr.Spec.MetaPluginsConfig + } + + // logLevel and logFile are currently not supported by the ib-sriov-cni + data.Data["LogLevelConfigured"] = false + data.Data["LogFileConfigured"] = false + + objs, err := render.RenderDir(filepath.Join(ManifestsPath, "sriov"), &data) + if err != nil { + return nil, err + } + for _, obj := range objs { + raw, _ := json.Marshal(obj) + logger.Info("render NetworkAttachmentDefinition output", "raw", string(raw)) + } + return objs[0], nil +} + +func (cr *SriovNetwork) RenderNetAttDefWithGUID(status SriovNetworkNodeStateStatus) (*uns.Unstructured, error) { + // Not implemented + return cr.RenderNetAttDef() +} + +func (cr *OVSNetwork) RenderNetAttDefWithGUID(status SriovNetworkNodeStateStatus) (*uns.Unstructured, error) { + // Not implemented + return cr.RenderNetAttDef() +} + // RenderNetAttDef renders a net-att-def for ib-sriov CNI func (cr *SriovIBNetwork) RenderNetAttDef() (*uns.Unstructured, error) { logger := log.WithName("RenderNetAttDef") @@ -694,7 +826,7 @@ func (cr *SriovIBNetwork) RenderNetAttDef() (*uns.Unstructured, error) { // render RawCNIConfig manifests data := render.MakeRenderData() - data.Data["CniType"] = "ib-sriov" + data.Data["CniType"] = CniType data.Data["SriovNetworkName"] = cr.Name if cr.Spec.NetworkNamespace == "" { data.Data["SriovNetworkNamespace"] = cr.Namespace @@ -762,6 +894,8 @@ func (cr *SriovNetwork) RenderNetAttDef() (*uns.Unstructured, error) { // render RawCNIConfig manifests data := render.MakeRenderData() + data.Data["pKeyConfigured"] = false + data.Data["CniType"] = "sriov" data.Data["SriovNetworkName"] = cr.Name if cr.Spec.NetworkNamespace == "" { diff --git a/api/v1/sriovibnetwork_types.go b/api/v1/sriovibnetwork_types.go index d8634d9ca..ec0c1ab63 100644 --- a/api/v1/sriovibnetwork_types.go +++ b/api/v1/sriovibnetwork_types.go @@ -43,6 +43,8 @@ type SriovIBNetworkSpec struct { // MetaPluginsConfig configuration to be used in order to chain metaplugins to the sriov interface returned // by the operator. MetaPluginsConfig string `json:"metaPlugins,omitempty"` + PKey string `json:"pKey,omitempty"` + ScanGUIDs bool `json:"scanGuids,omitempty"` } // SriovIBNetworkStatus defines the observed state of SriovIBNetwork diff --git a/bindata/manifests/cni-config/sriov/sriov-cni-config.yaml b/bindata/manifests/cni-config/sriov/sriov-cni-config.yaml index 749e326c7..5590ae451 100644 --- a/bindata/manifests/cni-config/sriov/sriov-cni-config.yaml +++ b/bindata/manifests/cni-config/sriov/sriov-cni-config.yaml @@ -35,6 +35,9 @@ spec: "max_tx_rate":{{.SriovCniMaxTxRate}}, {{- end -}} {{- end -}} +{{- if .pKeyConfigured -}} + "pkey":"{{.pKey}}", +{{- end -}} {{- if .CapabilitiesConfigured -}} "capabilities":{{.SriovCniCapabilities}}, {{- end -}} diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml index 4b4b44d92..be14886cb 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml @@ -62,9 +62,13 @@ spec: networkNamespace: description: Namespace of the NetworkAttachmentDefinition custom resource type: string + pKey: + type: string resourceName: description: SRIOV Network device plugin endpoint resource name type: string + scanGuids: + type: boolean required: - resourceName type: object diff --git a/controllers/generic_network_controller.go b/controllers/generic_network_controller.go index e6b84d3aa..3f84454b4 100644 --- a/controllers/generic_network_controller.go +++ b/controllers/generic_network_controller.go @@ -19,6 +19,7 @@ package controllers import ( "context" "reflect" + "time" netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" corev1 "k8s.io/api/core/v1" @@ -44,6 +45,8 @@ type networkCRInstance interface { client.Object // renders NetAttDef from the network instance RenderNetAttDef() (*uns.Unstructured, error) + // RenderNetAttDefWithGUID renders NetAttDef with GUID (if available) + RenderNetAttDefWithGUID(status sriovnetworkv1.SriovNetworkNodeStateStatus) (*uns.Unstructured, error) // return name of the target namespace for the network NetworkNamespace() string } @@ -124,8 +127,26 @@ func (r *genericNetworkReconciler) Reconcile(ctx context.Context, req ctrl.Reque } return reconcile.Result{}, err } - raw, err := instance.RenderNetAttDef() + + // Get list of all SriovNetworkNodeStates + nodeStateList := &sriovnetworkv1.SriovNetworkNodeStateList{} + if err := r.List(ctx, nodeStateList, &client.ListOptions{ + Namespace: vars.Namespace, + }); err != nil { + return ctrl.Result{}, err + } + + // If no node states exist yet, requeue + if len(nodeStateList.Items) == 0 { + return ctrl.Result{RequeueAfter: time.Second * 5}, nil + } + + nodeState := nodeStateList.Items[0] + reqLogger.Info("Starting RenderNetAttDefWithGUID") + raw, err := instance.RenderNetAttDefWithGUID(nodeState.Status) + if err != nil { + reqLogger.Error(err, "Failed to render NetworkAttachmentDefinition") return reconcile.Result{}, err } netAttDef := &netattdefv1.NetworkAttachmentDefinition{} diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml index 4b4b44d92..be14886cb 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml @@ -62,9 +62,13 @@ spec: networkNamespace: description: Namespace of the NetworkAttachmentDefinition custom resource type: string + pKey: + type: string resourceName: description: SRIOV Network device plugin endpoint resource name type: string + scanGuids: + type: boolean required: - resourceName type: object diff --git a/pkg/helper/mock/mock_helper.go b/pkg/helper/mock/mock_helper.go index cfca2a768..27213adbd 100644 --- a/pkg/helper/mock/mock_helper.go +++ b/pkg/helper/mock/mock_helper.go @@ -5,6 +5,7 @@ package mock_helper import ( + "net" reflect "reflect" gomock "github.com/golang/mock/gomock" @@ -1193,3 +1194,18 @@ func (mr *MockHostHelpersInterfaceMockRecorder) WriteCheckpointFile(arg0 interfa mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "WriteCheckpointFile", reflect.TypeOf((*MockHostHelpersInterface)(nil).WriteCheckpointFile), arg0) } + +// GetVfGUID mocks base method +func (m *MockHostHelpersInterface) GetVfGUID(vfAddr string, pfAddr string, vfID int) (net.HardwareAddr, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetVfGUID", vfAddr, pfAddr, vfID) + ret0, _ := ret[0].(net.HardwareAddr) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetVfGUID indicates an expected call of GetVfGUID +func (mr *MockHostHelpersInterfaceMockRecorder) GetVfGUID(vfAddr, pfAddr string, vfID int) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetVfGUID", reflect.TypeOf((*MockHostHelpersInterface)(nil).GetVfGUID), vfAddr, pfAddr, vfID) +} diff --git a/pkg/host/internal/infiniband/infiniband.go b/pkg/host/internal/infiniband/infiniband.go index f54957bc3..0dc5d100d 100644 --- a/pkg/host/internal/infiniband/infiniband.go +++ b/pkg/host/internal/infiniband/infiniband.go @@ -55,6 +55,21 @@ func (i *infiniband) ConfigureVfGUID(vfAddr string, pfAddr string, vfID int, pfL return i.applyVfGUIDToInterface(guid, vfAddr, vfID, pfLink) } +// GetVfGUID gets a GUID from the pool for an IB VF device +func (i *infiniband) GetVfGUID(vfAddr string, pfAddr string, vfID int) (net.HardwareAddr, error) { + log.Log.Info("GetVfGUID(): configure vf guid", "vfAddr", vfAddr, "pfAddr", pfAddr, "vfID", vfID) + if i.guidPool == nil { + return nil, fmt.Errorf("no GUID pool available for VF %s", vfAddr) + } + guidFromPool, err := i.guidPool.GetVFGUID(pfAddr, vfID) + if err != nil { + log.Log.Info("GetVfGUID(): failed to get GUID from IB GUID pool", "address", vfAddr, "error", err) + return nil, err + } + log.Log.Info("GetVfGUID(): get vf guid", "address", vfAddr, "guid", guidFromPool) + return guidFromPool, nil +} + func (i *infiniband) applyVfGUIDToInterface(guid net.HardwareAddr, vfAddr string, vfID int, pfLink netlink.Link) error { if err := i.netlinkLib.LinkSetVfNodeGUID(pfLink, vfID, guid); err != nil { return err diff --git a/pkg/host/internal/sriov/sriov.go b/pkg/host/internal/sriov/sriov.go index 379cf6a70..12f96c878 100644 --- a/pkg/host/internal/sriov/sriov.go +++ b/pkg/host/internal/sriov/sriov.go @@ -122,24 +122,30 @@ func (s *sriov) ResetSriovDevice(ifaceStatus sriovnetworkv1.InterfaceExt) error return nil } -func (s *sriov) getVfInfo(vfAddr string, pfName string, eswitchMode string, devices []*ghw.PCIDevice) sriovnetworkv1.VirtualFunction { +func (s *sriov) getVfInfo(vfAddr string, pfAddr string, pfName string, eswitchMode string, devices []*ghw.PCIDevice) sriovnetworkv1.VirtualFunction { driver, err := s.dputilsLib.GetDriverName(vfAddr) if err != nil { log.Log.Error(err, "getVfInfo(): unable to parse device driver", "device", vfAddr) } - id, err := s.dputilsLib.GetVFID(vfAddr) + vfid, err := s.dputilsLib.GetVFID(vfAddr) if err != nil { log.Log.Error(err, "getVfInfo(): unable to get VF index", "device", vfAddr) } + guid, err := s.infinibandHelper.GetVfGUID(vfAddr, pfAddr, vfid) + if err != nil { + log.Log.Error(err, "GetVfGUID(): unable to get VF GUID", "device", vfAddr) + } + vf := sriovnetworkv1.VirtualFunction{ PciAddress: vfAddr, Driver: driver, - VfID: id, + VfID: vfid, VdpaType: s.vdpaHelper.DiscoverVDPAType(vfAddr), + GUID: guid.String(), } if eswitchMode == sriovnetworkv1.ESwithModeSwitchDev { - repName, err := s.sriovnetLib.GetVfRepresentor(pfName, id) + repName, err := s.sriovnetLib.GetVfRepresentor(pfName, vfid) if err != nil { log.Log.Error(err, "getVfInfo(): failed to get VF representor name", "device", vfAddr) } else { @@ -300,7 +306,7 @@ func (s *sriov) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]sri continue } for _, vf := range vfs { - instance := s.getVfInfo(vf, pfNetName, iface.EswitchMode, devices) + instance := s.getVfInfo(vf, pfNetName, iface.PciAddress, iface.EswitchMode, devices) iface.VFs = append(iface.VFs, instance) } } @@ -479,6 +485,7 @@ func (s *sriov) configSriovVFDevices(iface *sriovnetworkv1.Interface) error { if err := s.infinibandHelper.ConfigureVfGUID(addr, iface.PciAddress, vfID, pfLink); err != nil { return err } + if err := s.kernelHelper.Unbind(iface.PciAddress); err != nil { return err } diff --git a/pkg/host/internal/sriov/sriov_test.go b/pkg/host/internal/sriov/sriov_test.go index f30e93773..232bf82fb 100644 --- a/pkg/host/internal/sriov/sriov_test.go +++ b/pkg/host/internal/sriov/sriov_test.go @@ -92,6 +92,7 @@ var _ = Describe("SRIOV", func() { hostMock.EXPECT().GetNetDevLinkSpeed("enp216s0f0np0").Return("100000 Mb/s") hostMock.EXPECT().GetNetDevLinkAdminState("enp216s0f0np0").Return("up") hostMock.EXPECT().GetNetDevNodeGUID("0000:d8:00.2").Return("guid1") + hostMock.EXPECT().GetVfGUID("0000:d8:00.2", "enp216s0f0np0", 0).Return(net.HardwareAddr{}, nil) storeManagerMode.EXPECT().LoadPfsStatus("0000:d8:00.0").Return(nil, false, nil) dputilsLibMock.EXPECT().IsSriovPF("0000:d8:00.0").Return(true) diff --git a/pkg/host/mock/mock_host.go b/pkg/host/mock/mock_host.go index cb4d1480a..0432ee479 100644 --- a/pkg/host/mock/mock_host.go +++ b/pkg/host/mock/mock_host.go @@ -12,6 +12,7 @@ import ( store "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/store" types "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/types" netlink "github.com/vishvananda/netlink" + "net" ) // MockHostManagerInterface is a mock of HostManagerInterface interface. @@ -121,6 +122,11 @@ func (mr *MockHostManagerInterfaceMockRecorder) BindDriverByBusAndDevice(bus, de return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "BindDriverByBusAndDevice", reflect.TypeOf((*MockHostManagerInterface)(nil).BindDriverByBusAndDevice), bus, device, driver) } +func (mr *MockHostManagerInterfaceMockRecorder) GetVfGUID(vfAddr string, pfAddr string, vfID int) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetVfGUID", reflect.TypeOf((*MockHostManagerInterface)(nil).GetVfGUID), vfAddr, pfAddr, vfID) +} + // CheckRDMAEnabled mocks base method. func (m *MockHostManagerInterface) CheckRDMAEnabled() (bool, error) { m.ctrl.T.Helper() @@ -207,6 +213,15 @@ func (mr *MockHostManagerInterfaceMockRecorder) ConfigureVfGUID(vfAddr, pfAddr, return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ConfigureVfGUID", reflect.TypeOf((*MockHostManagerInterface)(nil).ConfigureVfGUID), vfAddr, pfAddr, vfID, pfLink) } +// GetVfGUID mocks base method. +func (m *MockHostManagerInterface) GetVfGUID(vfAddr, pfAddr string, vfID int) (net.HardwareAddr, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetVfGUID", vfAddr, pfAddr, vfID) + ret0, _ := ret[0].(net.HardwareAddr) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + // CreateVDPADevice mocks base method. func (m *MockHostManagerInterface) CreateVDPADevice(pciAddr, vdpaType string) error { m.ctrl.T.Helper() diff --git a/pkg/host/types/interfaces.go b/pkg/host/types/interfaces.go index 5918dca34..44e87fef4 100644 --- a/pkg/host/types/interfaces.go +++ b/pkg/host/types/interfaces.go @@ -1,6 +1,8 @@ package types import ( + "net" + "github.com/vishvananda/netlink" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" @@ -186,4 +188,5 @@ type BridgeInterface interface { type InfinibandInterface interface { // ConfigureVfGUID configures and sets a GUID for an IB VF device ConfigureVfGUID(vfAddr string, pfAddr string, vfID int, pfLink netlink.Link) error + GetVfGUID(vfAddr string, pfAddr string, vfID int) (net.HardwareAddr, error) } From 758ce7ba3d0391aef1dc3da55f37337abee078af Mon Sep 17 00:00:00 2001 From: punker Date: Fri, 31 Jan 2025 12:49:42 +0000 Subject: [PATCH 38/38] Squash commits into one --- api/v1/helper.go | 138 +++++++++++++++++- api/v1/sriovibnetwork_types.go | 2 + .../cni-config/sriov/sriov-cni-config.yaml | 3 + ...vnetwork.openshift.io_sriovibnetworks.yaml | 4 + controllers/generic_network_controller.go | 23 ++- ...vnetwork.openshift.io_sriovibnetworks.yaml | 4 + pkg/helper/mock/mock_helper.go | 16 ++ pkg/host/internal/infiniband/infiniband.go | 15 ++ pkg/host/internal/sriov/sriov.go | 25 ++-- pkg/host/internal/sriov/sriov_test.go | 1 + pkg/host/mock/mock_host.go | 15 ++ pkg/host/types/interfaces.go | 3 + 12 files changed, 234 insertions(+), 15 deletions(-) diff --git a/api/v1/helper.go b/api/v1/helper.go index 62ea0d2a5..959ec0616 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -40,6 +40,8 @@ const ( SriovCniStateOn = "on" SriovCniIpam = "\"ipam\"" SriovCniIpamEmpty = SriovCniIpam + ":{}" + + CniType = "ib-sriov" ) const invalidVfIndex = -1 @@ -50,7 +52,6 @@ var log = logf.Log.WithName("sriovnetwork") // NicIDMap contains supported mapping of IDs with each in the format of: // Vendor ID, Physical Function Device ID, Virtual Function Device ID var NicIDMap = []string{} - var InitialState SriovNetworkNodeState // NetFilterType Represents the NetFilter tags to be used @@ -221,6 +222,41 @@ func IsSwitchdevModeSpec(spec SriovNetworkNodeStateSpec) bool { return ContainsSwitchdevInterface(spec.Interfaces) } +func GetGUIDFromSriovNetworkNodeStateStatus(status SriovNetworkNodeStateStatus, interfaceIndex int) string { + // Check if we have enough interfaces + if interfaceIndex < 0 || interfaceIndex >= len(status.Interfaces) { + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): invalid interface index", + "index", interfaceIndex, + "total interfaces", len(status.Interfaces)) + return "" + } + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): interface index:", "index", interfaceIndex) + + // Get the specific interface by index + iface := status.Interfaces[interfaceIndex] + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): interface name:", "iface.Name", iface.Name) + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): interface VFs len():", "len(iface.VFs)", len(iface.VFs)) + // Only process InfiniBand interfaces + if strings.EqualFold(iface.LinkType, consts.LinkTypeIB) { + // If interface has VFs and at least one VF has a GUID + if len(iface.VFs) > 0 { + for _, vf := range iface.VFs { + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): interface VF Name:", "vf.Name", vf.Name) + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): interface VF GUID:", "vf.GUID", vf.GUID) + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): interface MAC:", "vf.Mac", vf.Mac) + log.Info("GetGUIDFromSriovNetworkNodeStateStatus(): interface MAC:", "vf.PciAddress", vf.PciAddress) + + // Return first valid GUID found + // Skip uninitialized GUIDs + if vf.GUID != "" && vf.GUID != consts.UninitializedNodeGUID { + return vf.GUID + } + } + } + } + return "" +} + // ContainsSwitchdevInterface returns true if provided interface list contains interface // with switchdev configuration func ContainsSwitchdevInterface(interfaces []Interface) bool { @@ -688,6 +724,102 @@ func (s *SriovNetworkNodeState) GetDriverByPciAddress(addr string) string { return "" } +// RenderNetAttDefWithGUID renders a net-att-def with GUID for ib-sriov CNI +func (cr *SriovIBNetwork) RenderNetAttDefWithGUID(status SriovNetworkNodeStateStatus) (*uns.Unstructured, error) { + logger := log.WithName("RenderNetAttDefWithGUID") + logger.Info("Start to render IB SRIOV CNI NetworkAttachmentDefinition") + + // Extract index from network name, tt usually comes with a digit at the end + // We're using this digit to distinguish between them and map a proper VF GUID in the NetAttachDef + re := regexp.MustCompile(`(\d+)$`) + matches := re.FindStringSubmatch(cr.Name) + interfaceIndex := 0 // default to first interface if no number found + if len(matches) > 1 { + if idx, err := strconv.Atoi(matches[1]); err == nil { + interfaceIndex = idx + } + } + + // render RawCNIConfig manifests + + data := render.MakeRenderData() + data.Data["CniType"] = CniType + + data.Data["pKeyConfigured"] = true + data.Data["pKey"] = cr.Spec.PKey + data.Data["SriovNetworkName"] = cr.Name + if cr.Spec.NetworkNamespace == "" { + data.Data["SriovNetworkNamespace"] = cr.Namespace + } else { + data.Data["SriovNetworkNamespace"] = cr.Spec.NetworkNamespace + } + + data.Data["SriovCniResourceName"] = os.Getenv("RESOURCE_PREFIX") + "/" + cr.Spec.ResourceName + if cr.Spec.ScanGUIDs { + logger.Info("Getting GUID from SriovNetworkNodeState") + if guid := GetGUIDFromSriovNetworkNodeStateStatus(status, interfaceIndex-1); guid != "" { + data.Data["GUID"] = guid + logger.Info(fmt.Sprintf("Found GUID in SriovNetworkNodeState: %s", guid)) + } + } + + data.Data["StateConfigured"] = true + switch cr.Spec.LinkState { + case SriovCniStateEnable: + data.Data["SriovCniState"] = SriovCniStateEnable + case SriovCniStateDisable: + data.Data["SriovCniState"] = SriovCniStateDisable + case SriovCniStateAuto: + data.Data["SriovCniState"] = SriovCniStateAuto + default: + data.Data["StateConfigured"] = false + } + + if cr.Spec.Capabilities == "" { + data.Data["CapabilitiesConfigured"] = false + } else { + data.Data["CapabilitiesConfigured"] = true + data.Data["SriovCniCapabilities"] = cr.Spec.Capabilities + } + + if cr.Spec.IPAM != "" { + data.Data["SriovCniIpam"] = SriovCniIpam + ":" + strings.Join(strings.Fields(cr.Spec.IPAM), "") + } else { + data.Data["SriovCniIpam"] = SriovCniIpamEmpty + } + + // metaplugins for the infiniband cni + data.Data["MetaPluginsConfigured"] = false + if cr.Spec.MetaPluginsConfig != "" { + data.Data["MetaPluginsConfigured"] = true + data.Data["MetaPlugins"] = cr.Spec.MetaPluginsConfig + } + + // logLevel and logFile are currently not supported by the ib-sriov-cni + data.Data["LogLevelConfigured"] = false + data.Data["LogFileConfigured"] = false + + objs, err := render.RenderDir(filepath.Join(ManifestsPath, "sriov"), &data) + if err != nil { + return nil, err + } + for _, obj := range objs { + raw, _ := json.Marshal(obj) + logger.Info("render NetworkAttachmentDefinition output", "raw", string(raw)) + } + return objs[0], nil +} + +func (cr *SriovNetwork) RenderNetAttDefWithGUID(status SriovNetworkNodeStateStatus) (*uns.Unstructured, error) { + // Not implemented + return cr.RenderNetAttDef() +} + +func (cr *OVSNetwork) RenderNetAttDefWithGUID(status SriovNetworkNodeStateStatus) (*uns.Unstructured, error) { + // Not implemented + return cr.RenderNetAttDef() +} + // RenderNetAttDef renders a net-att-def for ib-sriov CNI func (cr *SriovIBNetwork) RenderNetAttDef() (*uns.Unstructured, error) { logger := log.WithName("RenderNetAttDef") @@ -695,7 +827,7 @@ func (cr *SriovIBNetwork) RenderNetAttDef() (*uns.Unstructured, error) { // render RawCNIConfig manifests data := render.MakeRenderData() - data.Data["CniType"] = "ib-sriov" + data.Data["CniType"] = CniType data.Data["SriovNetworkName"] = cr.Name if cr.Spec.NetworkNamespace == "" { data.Data["SriovNetworkNamespace"] = cr.Namespace @@ -763,6 +895,8 @@ func (cr *SriovNetwork) RenderNetAttDef() (*uns.Unstructured, error) { // render RawCNIConfig manifests data := render.MakeRenderData() + data.Data["pKeyConfigured"] = false + data.Data["CniType"] = "sriov" data.Data["SriovNetworkName"] = cr.Name if cr.Spec.NetworkNamespace == "" { diff --git a/api/v1/sriovibnetwork_types.go b/api/v1/sriovibnetwork_types.go index d8634d9ca..ec0c1ab63 100644 --- a/api/v1/sriovibnetwork_types.go +++ b/api/v1/sriovibnetwork_types.go @@ -43,6 +43,8 @@ type SriovIBNetworkSpec struct { // MetaPluginsConfig configuration to be used in order to chain metaplugins to the sriov interface returned // by the operator. MetaPluginsConfig string `json:"metaPlugins,omitempty"` + PKey string `json:"pKey,omitempty"` + ScanGUIDs bool `json:"scanGuids,omitempty"` } // SriovIBNetworkStatus defines the observed state of SriovIBNetwork diff --git a/bindata/manifests/cni-config/sriov/sriov-cni-config.yaml b/bindata/manifests/cni-config/sriov/sriov-cni-config.yaml index 749e326c7..5590ae451 100644 --- a/bindata/manifests/cni-config/sriov/sriov-cni-config.yaml +++ b/bindata/manifests/cni-config/sriov/sriov-cni-config.yaml @@ -35,6 +35,9 @@ spec: "max_tx_rate":{{.SriovCniMaxTxRate}}, {{- end -}} {{- end -}} +{{- if .pKeyConfigured -}} + "pkey":"{{.pKey}}", +{{- end -}} {{- if .CapabilitiesConfigured -}} "capabilities":{{.SriovCniCapabilities}}, {{- end -}} diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml index 4b4b44d92..be14886cb 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml @@ -62,9 +62,13 @@ spec: networkNamespace: description: Namespace of the NetworkAttachmentDefinition custom resource type: string + pKey: + type: string resourceName: description: SRIOV Network device plugin endpoint resource name type: string + scanGuids: + type: boolean required: - resourceName type: object diff --git a/controllers/generic_network_controller.go b/controllers/generic_network_controller.go index e6b84d3aa..3f84454b4 100644 --- a/controllers/generic_network_controller.go +++ b/controllers/generic_network_controller.go @@ -19,6 +19,7 @@ package controllers import ( "context" "reflect" + "time" netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" corev1 "k8s.io/api/core/v1" @@ -44,6 +45,8 @@ type networkCRInstance interface { client.Object // renders NetAttDef from the network instance RenderNetAttDef() (*uns.Unstructured, error) + // RenderNetAttDefWithGUID renders NetAttDef with GUID (if available) + RenderNetAttDefWithGUID(status sriovnetworkv1.SriovNetworkNodeStateStatus) (*uns.Unstructured, error) // return name of the target namespace for the network NetworkNamespace() string } @@ -124,8 +127,26 @@ func (r *genericNetworkReconciler) Reconcile(ctx context.Context, req ctrl.Reque } return reconcile.Result{}, err } - raw, err := instance.RenderNetAttDef() + + // Get list of all SriovNetworkNodeStates + nodeStateList := &sriovnetworkv1.SriovNetworkNodeStateList{} + if err := r.List(ctx, nodeStateList, &client.ListOptions{ + Namespace: vars.Namespace, + }); err != nil { + return ctrl.Result{}, err + } + + // If no node states exist yet, requeue + if len(nodeStateList.Items) == 0 { + return ctrl.Result{RequeueAfter: time.Second * 5}, nil + } + + nodeState := nodeStateList.Items[0] + reqLogger.Info("Starting RenderNetAttDefWithGUID") + raw, err := instance.RenderNetAttDefWithGUID(nodeState.Status) + if err != nil { + reqLogger.Error(err, "Failed to render NetworkAttachmentDefinition") return reconcile.Result{}, err } netAttDef := &netattdefv1.NetworkAttachmentDefinition{} diff --git a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml index 4b4b44d92..be14886cb 100644 --- a/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml +++ b/deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml @@ -62,9 +62,13 @@ spec: networkNamespace: description: Namespace of the NetworkAttachmentDefinition custom resource type: string + pKey: + type: string resourceName: description: SRIOV Network device plugin endpoint resource name type: string + scanGuids: + type: boolean required: - resourceName type: object diff --git a/pkg/helper/mock/mock_helper.go b/pkg/helper/mock/mock_helper.go index b413ecdee..bb7839a9c 100644 --- a/pkg/helper/mock/mock_helper.go +++ b/pkg/helper/mock/mock_helper.go @@ -5,6 +5,7 @@ package mock_helper import ( + "net" reflect "reflect" gomock "github.com/golang/mock/gomock" @@ -1237,3 +1238,18 @@ func (mr *MockHostHelpersInterfaceMockRecorder) WriteCheckpointFile(arg0 interfa mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "WriteCheckpointFile", reflect.TypeOf((*MockHostHelpersInterface)(nil).WriteCheckpointFile), arg0) } + +// GetVfGUID mocks base method +func (m *MockHostHelpersInterface) GetVfGUID(vfAddr string, pfAddr string, vfID int) (net.HardwareAddr, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetVfGUID", vfAddr, pfAddr, vfID) + ret0, _ := ret[0].(net.HardwareAddr) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetVfGUID indicates an expected call of GetVfGUID +func (mr *MockHostHelpersInterfaceMockRecorder) GetVfGUID(vfAddr, pfAddr string, vfID int) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetVfGUID", reflect.TypeOf((*MockHostHelpersInterface)(nil).GetVfGUID), vfAddr, pfAddr, vfID) +} diff --git a/pkg/host/internal/infiniband/infiniband.go b/pkg/host/internal/infiniband/infiniband.go index f54957bc3..0dc5d100d 100644 --- a/pkg/host/internal/infiniband/infiniband.go +++ b/pkg/host/internal/infiniband/infiniband.go @@ -55,6 +55,21 @@ func (i *infiniband) ConfigureVfGUID(vfAddr string, pfAddr string, vfID int, pfL return i.applyVfGUIDToInterface(guid, vfAddr, vfID, pfLink) } +// GetVfGUID gets a GUID from the pool for an IB VF device +func (i *infiniband) GetVfGUID(vfAddr string, pfAddr string, vfID int) (net.HardwareAddr, error) { + log.Log.Info("GetVfGUID(): configure vf guid", "vfAddr", vfAddr, "pfAddr", pfAddr, "vfID", vfID) + if i.guidPool == nil { + return nil, fmt.Errorf("no GUID pool available for VF %s", vfAddr) + } + guidFromPool, err := i.guidPool.GetVFGUID(pfAddr, vfID) + if err != nil { + log.Log.Info("GetVfGUID(): failed to get GUID from IB GUID pool", "address", vfAddr, "error", err) + return nil, err + } + log.Log.Info("GetVfGUID(): get vf guid", "address", vfAddr, "guid", guidFromPool) + return guidFromPool, nil +} + func (i *infiniband) applyVfGUIDToInterface(guid net.HardwareAddr, vfAddr string, vfID int, pfLink netlink.Link) error { if err := i.netlinkLib.LinkSetVfNodeGUID(pfLink, vfID, guid); err != nil { return err diff --git a/pkg/host/internal/sriov/sriov.go b/pkg/host/internal/sriov/sriov.go index 3e5989bae..f2c3987d0 100644 --- a/pkg/host/internal/sriov/sriov.go +++ b/pkg/host/internal/sriov/sriov.go @@ -122,24 +122,30 @@ func (s *sriov) ResetSriovDevice(ifaceStatus sriovnetworkv1.InterfaceExt) error return nil } -func (s *sriov) getVfInfo(vfAddr string, pfName string, eswitchMode string, devices []*ghw.PCIDevice) sriovnetworkv1.VirtualFunction { +func (s *sriov) getVfInfo(vfAddr string, pfAddr string, pfName string, eswitchMode string, devices []*ghw.PCIDevice) sriovnetworkv1.VirtualFunction { driver, err := s.dputilsLib.GetDriverName(vfAddr) if err != nil { log.Log.Error(err, "getVfInfo(): unable to parse device driver", "device", vfAddr) } - id, err := s.dputilsLib.GetVFID(vfAddr) + vfid, err := s.dputilsLib.GetVFID(vfAddr) if err != nil { log.Log.Error(err, "getVfInfo(): unable to get VF index", "device", vfAddr) } + guid, err := s.infinibandHelper.GetVfGUID(vfAddr, pfAddr, vfid) + if err != nil { + log.Log.Error(err, "GetVfGUID(): unable to get VF GUID", "device", vfAddr) + } + vf := sriovnetworkv1.VirtualFunction{ PciAddress: vfAddr, Driver: driver, - VfID: id, + VfID: vfid, VdpaType: s.vdpaHelper.DiscoverVDPAType(vfAddr), + GUID: guid.String(), } if eswitchMode == sriovnetworkv1.ESwithModeSwitchDev { - repName, err := s.sriovnetLib.GetVfRepresentor(pfName, id) + repName, err := s.sriovnetLib.GetVfRepresentor(pfName, vfid) if err != nil { log.Log.Error(err, "getVfInfo(): failed to get VF representor name", "device", vfAddr) } else { @@ -217,7 +223,7 @@ func (s *sriov) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]sri return nil, fmt.Errorf("DiscoverSriovDevices(): error getting PCI info: %v", err) } - devices := pci.Devices + devices := pci.ListDevices() if len(devices) == 0 { return nil, fmt.Errorf("DiscoverSriovDevices(): could not retrieve PCI devices") } @@ -300,7 +306,7 @@ func (s *sriov) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]sri continue } for _, vf := range vfs { - instance := s.getVfInfo(vf, pfNetName, iface.EswitchMode, devices) + instance := s.getVfInfo(vf, pfNetName, iface.PciAddress, iface.EswitchMode, devices) iface.VFs = append(iface.VFs, instance) } } @@ -376,11 +382,6 @@ func (s *sriov) configureHWOptionsForSwitchdev(iface *sriovnetworkv1.Interface) log.Log.Error(err, "configureHWOptionsForSwitchdev(): fail to read current flow steering mode for the device", "device", iface.PciAddress) return err } - if currentFlowSteeringMode == "" { - log.Log.V(2).Info("configureHWOptionsForSwitchdev(): can't detect current flow_steering_mode mode for the device, skip", - "device", iface.PciAddress) - return nil - } if currentFlowSteeringMode == desiredFlowSteeringMode { return nil } @@ -484,7 +485,7 @@ func (s *sriov) configSriovVFDevices(iface *sriovnetworkv1.Interface) error { if err := s.infinibandHelper.ConfigureVfGUID(addr, iface.PciAddress, vfID, pfLink); err != nil { return err } - if err := s.kernelHelper.Unbind(addr); err != nil { + if err := s.kernelHelper.Unbind(iface.PciAddress); err != nil { return err } } else { diff --git a/pkg/host/internal/sriov/sriov_test.go b/pkg/host/internal/sriov/sriov_test.go index 319bacf54..df99bf5aa 100644 --- a/pkg/host/internal/sriov/sriov_test.go +++ b/pkg/host/internal/sriov/sriov_test.go @@ -87,6 +87,7 @@ var _ = Describe("SRIOV", func() { hostMock.EXPECT().GetNetDevLinkSpeed("enp216s0f0np0").Return("100000 Mb/s") hostMock.EXPECT().GetNetDevLinkAdminState("enp216s0f0np0").Return("up") hostMock.EXPECT().GetNetDevNodeGUID("0000:d8:00.2").Return("guid1") + hostMock.EXPECT().GetVfGUID("0000:d8:00.2", "enp216s0f0np0", 0).Return(net.HardwareAddr{}, nil) storeManagerMode.EXPECT().LoadPfsStatus("0000:d8:00.0").Return(nil, false, nil) dputilsLibMock.EXPECT().IsSriovPF("0000:d8:00.0").Return(true) diff --git a/pkg/host/mock/mock_host.go b/pkg/host/mock/mock_host.go index 095d270a9..a929c5cb9 100644 --- a/pkg/host/mock/mock_host.go +++ b/pkg/host/mock/mock_host.go @@ -12,6 +12,7 @@ import ( store "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/store" types "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/types" netlink "github.com/vishvananda/netlink" + "net" ) // MockHostManagerInterface is a mock of HostManagerInterface interface. @@ -121,6 +122,11 @@ func (mr *MockHostManagerInterfaceMockRecorder) BindDriverByBusAndDevice(bus, de return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "BindDriverByBusAndDevice", reflect.TypeOf((*MockHostManagerInterface)(nil).BindDriverByBusAndDevice), bus, device, driver) } +func (mr *MockHostManagerInterfaceMockRecorder) GetVfGUID(vfAddr string, pfAddr string, vfID int) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetVfGUID", reflect.TypeOf((*MockHostManagerInterface)(nil).GetVfGUID), vfAddr, pfAddr, vfID) +} + // CheckRDMAEnabled mocks base method. func (m *MockHostManagerInterface) CheckRDMAEnabled() (bool, error) { m.ctrl.T.Helper() @@ -207,6 +213,15 @@ func (mr *MockHostManagerInterfaceMockRecorder) ConfigureVfGUID(vfAddr, pfAddr, return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ConfigureVfGUID", reflect.TypeOf((*MockHostManagerInterface)(nil).ConfigureVfGUID), vfAddr, pfAddr, vfID, pfLink) } +// GetVfGUID mocks base method. +func (m *MockHostManagerInterface) GetVfGUID(vfAddr, pfAddr string, vfID int) (net.HardwareAddr, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetVfGUID", vfAddr, pfAddr, vfID) + ret0, _ := ret[0].(net.HardwareAddr) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + // CreateVDPADevice mocks base method. func (m *MockHostManagerInterface) CreateVDPADevice(pciAddr, vdpaType string) error { m.ctrl.T.Helper() diff --git a/pkg/host/types/interfaces.go b/pkg/host/types/interfaces.go index 6844ee5ae..84be27426 100644 --- a/pkg/host/types/interfaces.go +++ b/pkg/host/types/interfaces.go @@ -1,6 +1,8 @@ package types import ( + "net" + "github.com/vishvananda/netlink" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" @@ -190,6 +192,7 @@ type BridgeInterface interface { type InfinibandInterface interface { // ConfigureVfGUID configures and sets a GUID for an IB VF device ConfigureVfGUID(vfAddr string, pfAddr string, vfID int, pfLink netlink.Link) error + GetVfGUID(vfAddr string, pfAddr string, vfID int) (net.HardwareAddr, error) } type CPUVendor int