From 9f5b110ae1bf0938d17783104ffa4f3ded099e80 Mon Sep 17 00:00:00 2001 From: Mansi Kulkarni Date: Thu, 17 Oct 2024 11:54:48 -0400 Subject: [PATCH] [conrollers] Introduce metriccontroller This commit adds a new metrccontroller that watches the WMCO namespace and the node objects to create and sync metric endpoints. --- cmd/operator/main.go | 19 +- controllers/configmap_controller.go | 5 - controllers/metric_controller.go | 309 +++++++++++++++++++++++ controllers/node_controller.go | 5 + controllers/windowsmachine_controller.go | 9 - pkg/metrics/metrics.go | 219 +--------------- 6 files changed, 324 insertions(+), 242 deletions(-) create mode 100644 controllers/metric_controller.go diff --git a/cmd/operator/main.go b/cmd/operator/main.go index 7a50b1dd96..5c6a3c2ba3 100644 --- a/cmd/operator/main.go +++ b/cmd/operator/main.go @@ -27,7 +27,6 @@ import ( "github.com/openshift/windows-machine-config-operator/controllers" "github.com/openshift/windows-machine-config-operator/pkg/cluster" - "github.com/openshift/windows-machine-config-operator/pkg/metrics" "github.com/openshift/windows-machine-config-operator/pkg/nodeconfig/payload" "github.com/openshift/windows-machine-config-operator/pkg/servicescm" "github.com/openshift/windows-machine-config-operator/pkg/windows" @@ -266,22 +265,20 @@ func main() { os.Exit(1) } - //+kubebuilder:scaffold:builder - // The above marker tells kubebuilder that this is where the SetupWithManager function should be inserted when new - // controllers are generated by Operator SDK. - - metricsConfig, err := metrics.NewConfig(mgr, cfg, watchNamespace) + mReconciler, err := controllers.NewMetricReconciler(mgr, clusterConfig, cfg, watchNamespace) if err != nil { - setupLog.Error(err, "failed to create MetricsConfig object") + setupLog.Error(err, "unable to create metrics reconciler") os.Exit(1) } - - // Configure the metric resources - if err := metricsConfig.Configure(ctx); err != nil { - setupLog.Error(err, "error setting up metrics") + if err = mReconciler.SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Metrics") os.Exit(1) } + //+kubebuilder:scaffold:builder + // The above marker tells kubebuilder that this is where the SetupWithManager function should be inserted when new + // controllers are generated by Operator SDK. + // Create the singleton Windows services ConfigMap if err := configMapReconciler.EnsureServicesConfigMapExists(); err != nil { setupLog.Error(err, "error ensuring object exists", "singleton", types.NamespacedName{Namespace: watchNamespace, diff --git a/controllers/configmap_controller.go b/controllers/configmap_controller.go index 1e5ab5ae43..b218b44f65 100644 --- a/controllers/configmap_controller.go +++ b/controllers/configmap_controller.go @@ -287,11 +287,6 @@ func (r *ConfigMapReconciler) reconcileNodes(ctx context.Context, windowsInstanc if err = r.deconfigureInstances(instances, nodes); err != nil { return fmt.Errorf("error removing undesired nodes from cluster: %w", err) } - - // Once all the proper Nodes are in the cluster, configure the prometheus endpoints. - if err := r.prometheusNodeConfig.Configure(); err != nil { - return fmt.Errorf("unable to configure Prometheus: %w", err) - } return nil } diff --git a/controllers/metric_controller.go b/controllers/metric_controller.go new file mode 100644 index 0000000000..6d9c5a7d27 --- /dev/null +++ b/controllers/metric_controller.go @@ -0,0 +1,309 @@ +package controllers + +import ( + "context" + "fmt" + "reflect" + "sigs.k8s.io/controller-runtime/pkg/handler" + "strconv" + + "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + "github.com/openshift/windows-machine-config-operator/pkg/cluster" + "github.com/openshift/windows-machine-config-operator/pkg/condition" + "github.com/openshift/windows-machine-config-operator/pkg/metrics" + monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + monclient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/typed/monitoring/v1" +) + +//+kubebuilder:rbac:groups="",resources=namespaces,verbs=get;list;watch + +const ( + // MetricControllers is the name of this controller in logs and other outputs. + MetricControllers = "metrics" +) + +type metricReconciler struct { + *monclient.MonitoringV1Client + instanceReconciler +} + +func NewMetricReconciler(mgr manager.Manager, clusterConfig cluster.Config, cfg *rest.Config, watchNamespace string) (*metricReconciler, error) { + clientset, err := kubernetes.NewForConfig(mgr.GetConfig()) + if err != nil { + return nil, fmt.Errorf("error creating kubernetes clientset: %w", err) + } + mclient, err := monclient.NewForConfig(cfg) + if err != nil { + return nil, fmt.Errorf("error creating monitoring client: %w", err) + } + // Initialize prometheus configuration + pc, err := metrics.NewPrometheusNodeConfig(clientset, watchNamespace) + if err != nil { + return nil, fmt.Errorf("unable to initialize Prometheus configuration: %w", err) + } + + return &metricReconciler{ + MonitoringV1Client: mclient, + instanceReconciler: instanceReconciler{ + client: mgr.GetClient(), + log: ctrl.Log.WithName("controllers").WithName(MetricControllers), + k8sclientset: clientset, + clusterServiceCIDR: clusterConfig.Network().GetServiceCIDR(), + watchNamespace: watchNamespace, + recorder: mgr.GetEventRecorderFor(MetricControllers), + prometheusNodeConfig: pc, + }, + }, nil +} + +// Reconcile is part of the main kubernetes reconciliation loop which reads that state of the cluster for a +// Node object and aims to move the current state of the cluster closer to the desired state. +func (r *metricReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, err error) { + r.log = r.log.WithValues(NodeController, req.NamespacedName) + // Prevent WMCO upgrades while Node objects are being processed + if err := condition.MarkAsBusy(r.client, r.watchNamespace, r.recorder, NodeController); err != nil { + return ctrl.Result{}, err + } + defer func() { + err = markAsFreeOnSuccess(r.client, r.watchNamespace, r.recorder, NodeController, result.Requeue, err) + }() + // validate if cluster monitoring is enabled in the operator namespace + enabled, err := r.validate(ctx) + if err != nil { + return ctrl.Result{}, fmt.Errorf("error validating cluster monitoring label: %s", err) + } + // Proceed only if monitoring is enabled + if !enabled { + return ctrl.Result{}, nil + } + if err = r.Configure(ctx); err != nil { + return ctrl.Result{}, fmt.Errorf("error setting up metrics configurations: %w", err) + } + // configure Prometheus for Windows instances configured as nodes + if err := r.prometheusNodeConfig.Configure(); err != nil { + return ctrl.Result{}, fmt.Errorf("unable to configure Prometheus: %w", err) + } + + return ctrl.Result{}, nil +} + +// validate will verify if cluster monitoring is enabled in the operator namespace. If the label is set to false or not +// present, it will log and send warning events to the user. If the label holds a non-boolean value, returns an error. +func (r *metricReconciler) validate(ctx context.Context) (bool, error) { + // validate if metrics label is added to namespace + labelValue := false + var err error + wmcoNamespace, err := r.k8sclientset.CoreV1().Namespaces().Get(ctx, r.watchNamespace, metav1.GetOptions{}) + if err != nil { + return false, fmt.Errorf("error getting operator namespace: %w", err) + } + // if the label exists, update value from default of false + if value, ok := wmcoNamespace.Labels["openshift.io/cluster-monitoring"]; ok { + labelValue, err = strconv.ParseBool(value) + if err != nil { + return false, fmt.Errorf("monitoring label must have a boolean value: %w", err) + } + } + if !labelValue { + r.recorder.Eventf(wmcoNamespace, v1.EventTypeWarning, "labelValidationFailed", + "Cluster monitoring openshift.io/cluster-monitoring=true label is not enabled in %s namespace", r.watchNamespace) + } + metricsEnabled = labelValue + return metricsEnabled, nil +} + +// Configure takes care of all the required configuration steps +// for Prometheus monitoring like ensuring service monitor exists +// and creating metrics Endpoints object. +func (r *metricReconciler) Configure(ctx context.Context) error { + if err := r.ensureServiceMonitor(); err != nil { + return fmt.Errorf("error ensuring serviceMonitor exists: %w", err) + } + var subsets []v1.EndpointSubset + existingEndpoint, err := r.k8sclientset.CoreV1().Endpoints(r.watchNamespace).Get(ctx, metrics.WindowsMetricsResource, metav1.GetOptions{}) + if err != nil { + if !apierrors.IsNotFound(err) { + return fmt.Errorf("error retrieving %s endpoint: %w", metrics.WindowsMetricsResource, err) + } + } else { + subsets = existingEndpoint.Subsets + err = r.k8sclientset.CoreV1().Endpoints(r.watchNamespace).Delete(ctx, metrics.WindowsMetricsResource, metav1.DeleteOptions{}) + if err != nil { + return fmt.Errorf("error deleting %s endpoint: %w", metrics.WindowsMetricsResource, err) + } + } + if err := r.createEndpoint(subsets); err != nil { + return fmt.Errorf("error creating metrics Endpoint: %w", err) + } + return nil +} + +// ensureServiceMonitor creates a serviceMonitor object in the operator namespace if it does not exist. +func (r *metricReconciler) ensureServiceMonitor() error { + // get existing serviceMonitor object if it exists + existingSM, err := r.ServiceMonitors(r.watchNamespace).Get(context.TODO(), metrics.WindowsMetricsResource, metav1.GetOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf( + "error retrieving %s serviceMonitor: %w", metrics.WindowsMetricsResource, err) + } + + serverName := fmt.Sprintf("%s.%s.svc", metrics.WindowsMetricsResource, r.watchNamespace) + replacement := "$1" + expectedSM := &monv1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: metrics.WindowsMetricsResource, + Namespace: r.watchNamespace, + Labels: map[string]string{ + "name": metrics.WindowsMetricsResource, + }, + }, + Spec: monv1.ServiceMonitorSpec{ + Endpoints: []monv1.Endpoint{ + { + HonorLabels: true, + Interval: "30s", + Path: "/metrics", + Port: "metrics", + Scheme: "https", + BearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token", + TLSConfig: &monv1.TLSConfig{ + CAFile: "/etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt", + SafeTLSConfig: monv1.SafeTLSConfig{ + ServerName: &serverName, + }, + }, + RelabelConfigs: []monv1.RelabelConfig{ + { + Action: "replace", + Regex: "(.*)", + Replacement: &replacement, + TargetLabel: "instance", + SourceLabels: []monv1.LabelName{ + "__meta_kubernetes_endpoint_address_target_name", + }, + }, + }, + }, + }, + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": metrics.WindowsMetricsResource, + }, + }, + }, + } + + if err == nil { + // check if existing serviceMonitor's contents are as expected, delete it if not + if existingSM.Name == expectedSM.Name && existingSM.Namespace == expectedSM.Namespace && + reflect.DeepEqual(existingSM.Spec, expectedSM.Spec) { + return nil + } + err = r.ServiceMonitors(r.watchNamespace).Delete(context.TODO(), metrics.WindowsMetricsResource, + metav1.DeleteOptions{}) + if err != nil { + return fmt.Errorf("unable to delete service monitor %s/%s: %w", r.watchNamespace, metrics.WindowsMetricsResource, + err) + } + r.log.Info("Deleted malformed resource", "serviceMonitor", metrics.WindowsMetricsResource, + "namespace", r.watchNamespace) + } + + _, err = r.ServiceMonitors(r.watchNamespace).Create(context.TODO(), expectedSM, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("error creating service monitor: %w", err) + } + return nil +} + +// createEndpoint creates an endpoint object in the operator namespace. +// WMCO is no longer creating a service with a selector therefore no Endpoint +// object is created and WMCO needs to create the Endpoint object. +// We cannot create endpoints as a part of manifests deployment as +// Endpoints resources are not currently OLM-supported for bundle creation. +func (r *metricReconciler) createEndpoint(subsets []v1.EndpointSubset) error { + // create new Endpoint + newEndpoint := &v1.Endpoints{ + TypeMeta: metav1.TypeMeta{ + Kind: "Endpoints", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: metrics.WindowsMetricsResource, + Namespace: r.watchNamespace, + Labels: map[string]string{"name": metrics.WindowsMetricsResource}, + }, + Subsets: subsets, + } + _, err := r.k8sclientset.CoreV1().Endpoints(r.watchNamespace).Create(context.TODO(), + newEndpoint, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("error creating metrics Endpoint: %w", err) + } + return nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *metricReconciler) SetupWithManager(mgr ctrl.Manager) error { + metricsPredicate := predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { + return isMonitoringEnabled(e.Object, r.watchNamespace) + }, + UpdateFunc: func(e event.UpdateEvent) bool { + return isMonitoringEnabled(e.ObjectNew, r.watchNamespace) + }, + GenericFunc: func(e event.GenericEvent) bool { + return false + }, + DeleteFunc: func(e event.DeleteEvent) bool { + return false + }, + } + windowsNodePredicate := predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { + return isWindowsNode(e.Object) + }, + UpdateFunc: func(e event.UpdateEvent) bool { + return false + }, + GenericFunc: func(e event.GenericEvent) bool { + return false + }, + DeleteFunc: func(e event.DeleteEvent) bool { + return isWindowsNode(e.Object) + }, + } + return ctrl.NewControllerManagedBy(mgr). + For(&v1.Namespace{}, builder.WithPredicates(metricsPredicate)). + Watches(&v1.Node{}, &handler.EnqueueRequestForObject{}, builder.WithPredicates(windowsNodePredicate)). + Complete(r) +} + +// isMonitoringEnabled returns true if the given object namespace has monitoring label set to true +func isMonitoringEnabled(obj runtime.Object, watchNamespace string) bool { + namespace, ok := obj.(*v1.Namespace) + if !ok { + return false + } + if namespace.GetName() != watchNamespace { + return false + } + if value, ok := namespace.Labels["openshift.io/cluster-monitoring"]; ok { + labelValue, err := strconv.ParseBool(value) + if err != nil { + return false + } + return labelValue + } + return false +} diff --git a/controllers/node_controller.go b/controllers/node_controller.go index f48a77bb73..cef08af5db 100644 --- a/controllers/node_controller.go +++ b/controllers/node_controller.go @@ -44,6 +44,11 @@ const ( NodeController = "node" ) +var ( + // metricsEnabled specifies if metrics are enabled in the current cluster + metricsEnabled = true +) + // nodeReconciler holds the info required to reconcile a Node object, inclduing that of the underlying Windows instance type nodeReconciler struct { instanceReconciler diff --git a/controllers/windowsmachine_controller.go b/controllers/windowsmachine_controller.go index 98e28b07e4..5a9078de75 100644 --- a/controllers/windowsmachine_controller.go +++ b/controllers/windowsmachine_controller.go @@ -309,11 +309,6 @@ func (r *WindowsMachineReconciler) Reconcile(ctx context.Context, } } else if *machine.Status.Phase != provisionedPhase { log.V(1).Info("machine not provisioned", "phase", *machine.Status.Phase) - // configure Prometheus when a machine is not in `Running` or `Provisioned` phase. This configuration is - // required to update Endpoints object when Windows machines are being deleted. - if err := r.prometheusNodeConfig.Configure(); err != nil { - return ctrl.Result{}, fmt.Errorf("unable to configure Prometheus: %w", err) - } // Machine is not in provisioned or running state, nothing we should do as of now return ctrl.Result{}, nil } @@ -360,10 +355,6 @@ func (r *WindowsMachineReconciler) Reconcile(ctx context.Context, } r.recorder.Eventf(machine, core.EventTypeNormal, "MachineSetup", "Machine %s configured successfully", machine.Name) - // configure Prometheus after a Windows machine is configured as a Node. - if err := r.prometheusNodeConfig.Configure(); err != nil { - return ctrl.Result{}, fmt.Errorf("unable to configure Prometheus: %w", err) - } return ctrl.Result{}, nil } diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index dc5410dad5..544fc06516 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -4,24 +4,13 @@ import ( "context" "encoding/json" "fmt" - "reflect" - "strconv" - - monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - monclient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/typed/monitoring/v1" + "github.com/openshift/windows-machine-config-operator/pkg/nodeconfig" + "github.com/openshift/windows-machine-config-operator/pkg/patch" "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes" - k8sclient "k8s.io/client-go/kubernetes" - "k8s.io/client-go/rest" - "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/manager" - - "github.com/openshift/windows-machine-config-operator/pkg/nodeconfig" - "github.com/openshift/windows-machine-config-operator/pkg/patch" ) //+kubebuilder:rbac:groups="",resources=services;services/finalizers,verbs=create;get;delete @@ -33,8 +22,6 @@ import ( var ( log = ctrl.Log.WithName("metrics") - // metricsEnabled specifies if metrics are enabled in the current cluster - metricsEnabled = true ) const ( @@ -58,18 +45,6 @@ type PrometheusNodeConfig struct { namespace string } -// Config holds the information required to interact with metrics objects -type Config struct { - // a handle that allows us to interact with the Kubernetes API. - *kubernetes.Clientset - // a handle that allows us to interact with the Monitoring API. - *monclient.MonitoringV1Client - // namespace is the namespace in which metrics objects are created - namespace string - // recorder to generate events - recorder record.EventRecorder -} - // NewPrometheuopsNodeConfig creates a new instance for prometheusNodeConfig to be used by the caller. func NewPrometheusNodeConfig(clientset *kubernetes.Clientset, watchNamespace string) (*PrometheusNodeConfig, error) { @@ -79,26 +54,6 @@ func NewPrometheusNodeConfig(clientset *kubernetes.Clientset, watchNamespace str }, nil } -// NewConfig creates a new instance for Config to be used by the caller. -func NewConfig(mgr manager.Manager, cfg *rest.Config, namespace string) (*Config, error) { - if cfg == nil { - return nil, fmt.Errorf("config should not be nil") - } - oclient, err := k8sclient.NewForConfig(cfg) - if err != nil { - return nil, fmt.Errorf("error creating config client: %w", err) - } - mclient, err := monclient.NewForConfig(cfg) - if err != nil { - return nil, fmt.Errorf("error creating monitoring client: %w", err) - } - return &Config{Clientset: oclient, - MonitoringV1Client: mclient, - namespace: namespace, - recorder: mgr.GetEventRecorderFor("metrics"), - }, nil -} - // syncMetricsEndpoint updates the endpoint object with the new list of IP addresses from the Windows nodes and the // metrics port. func (pc *PrometheusNodeConfig) syncMetricsEndpoint(nodeEndpointAdressess []v1.EndpointAddress) error { @@ -134,11 +89,6 @@ func (pc *PrometheusNodeConfig) syncMetricsEndpoint(nodeEndpointAdressess []v1.E // Configure patches the endpoint object to reflect the current list Windows nodes. func (pc *PrometheusNodeConfig) Configure() error { - // Check if metrics are enabled in current cluster - if !metricsEnabled { - log.Info("install the prometheus-operator to enable Prometheus configuration") - return nil - } // get list of Windows nodes that are in Ready phase nodes, err := pc.k8sclientset.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{LabelSelector: nodeconfig.WindowsOSLabel, FieldSelector: "spec.unschedulable=false"}) @@ -217,168 +167,3 @@ func isEndpointsValid(nodes *v1.NodeList, endpoints *v1.Endpoints) bool { } return true } - -// Configure takes care of all the required configuration steps -// for Prometheus monitoring like validating monitoring label -// and creating metrics Endpoints object. -func (c *Config) Configure(ctx context.Context) error { - // validate if cluster monitoring is enabled in the operator namespace - enabled, err := c.validate(ctx) - if err != nil { - return fmt.Errorf("error validating cluster monitoring label: %s", err) - } - // Create Metrics Endpoint object only if monitoring is enabled - if !enabled { - return nil - } - if err := c.ensureServiceMonitor(); err != nil { - return fmt.Errorf("error ensuring serviceMonitor exists: %w", err) - } - // In the case of an operator restart, a previous Endpoint object will be deleted and a new one will - // be created to ensure we have a correct spec. - var subsets []v1.EndpointSubset - existingEndpoint, err := c.CoreV1().Endpoints(c.namespace).Get(ctx, WindowsMetricsResource, metav1.GetOptions{}) - if err != nil { - if !apierrors.IsNotFound(err) { - return fmt.Errorf("error retrieving %s endpoint: %w", WindowsMetricsResource, err) - } - } else { - subsets = existingEndpoint.Subsets - err = c.CoreV1().Endpoints(c.namespace).Delete(ctx, WindowsMetricsResource, metav1.DeleteOptions{}) - if err != nil { - return fmt.Errorf("error deleting %s endpoint: %w", WindowsMetricsResource, err) - } - } - if err := c.createEndpoint(subsets); err != nil { - return fmt.Errorf("error creating metrics Endpoint: %w", err) - } - return nil -} - -// validate will verify if cluster monitoring is enabled in the operator namespace. If the label is set to false or not -// present, it will log and send warning events to the user. If the label holds a non-boolean value, returns an error. -func (c *Config) validate(ctx context.Context) (bool, error) { - // validate if metrics label is added to namespace - wmcoNamespace, err := c.CoreV1().Namespaces().Get(ctx, c.namespace, metav1.GetOptions{}) - if err != nil { - return false, fmt.Errorf("error getting operator namespace: %w", err) - } - - labelValue := false - // if the label exists, update value from default of false - if value, ok := wmcoNamespace.Labels["openshift.io/cluster-monitoring"]; ok { - labelValue, err = strconv.ParseBool(value) - if err != nil { - return false, fmt.Errorf("monitoring label must have a boolean value: %w", err) - } - } - if !labelValue { - c.recorder.Eventf(wmcoNamespace, v1.EventTypeWarning, "labelValidationFailed", - "Cluster monitoring openshift.io/cluster-monitoring=true label is not enabled in %s namespace", c.namespace) - } - metricsEnabled = labelValue - return metricsEnabled, nil -} - -// createEndpoint creates an endpoint object in the operator namespace. -// WMCO is no longer creating a service with a selector therefore no Endpoint -// object is created and WMCO needs to create the Endpoint object. -// We cannot create endpoints as a part of manifests deployment as -// Endpoints resources are not currently OLM-supported for bundle creation. -func (c *Config) createEndpoint(subsets []v1.EndpointSubset) error { - // create new Endpoint - newEndpoint := &v1.Endpoints{ - TypeMeta: metav1.TypeMeta{ - Kind: "Endpoints", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: WindowsMetricsResource, - Namespace: c.namespace, - Labels: map[string]string{"name": WindowsMetricsResource}, - }, - Subsets: subsets, - } - _, err := c.CoreV1().Endpoints(c.namespace).Create(context.TODO(), - newEndpoint, metav1.CreateOptions{}) - if err != nil { - return fmt.Errorf("error creating metrics Endpoint: %w", err) - } - return nil -} - -// ensureServiceMonitor creates a serviceMonitor object in the operator namespace if it does not exist. -func (c *Config) ensureServiceMonitor() error { - // get existing serviceMonitor object if it exists - existingSM, err := c.ServiceMonitors(c.namespace).Get(context.TODO(), WindowsMetricsResource, metav1.GetOptions{}) - if err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf("error retrieving %s serviceMonitor: %w", WindowsMetricsResource, err) - } - - serverName := fmt.Sprintf("%s.%s.svc", WindowsMetricsResource, c.namespace) - replacement := "$1" - expectedSM := &monv1.ServiceMonitor{ - ObjectMeta: metav1.ObjectMeta{ - Name: WindowsMetricsResource, - Namespace: c.namespace, - Labels: map[string]string{ - "name": WindowsMetricsResource, - }, - }, - Spec: monv1.ServiceMonitorSpec{ - Endpoints: []monv1.Endpoint{ - { - HonorLabels: true, - Interval: "30s", - Path: "/metrics", - Port: "metrics", - Scheme: "https", - BearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token", - TLSConfig: &monv1.TLSConfig{ - CAFile: "/etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt", - SafeTLSConfig: monv1.SafeTLSConfig{ - ServerName: &serverName, - }, - }, - RelabelConfigs: []monv1.RelabelConfig{ - { - Action: "replace", - Regex: "(.*)", - Replacement: &replacement, - TargetLabel: "instance", - SourceLabels: []monv1.LabelName{ - "__meta_kubernetes_endpoint_address_target_name", - }, - }, - }, - }, - }, - Selector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": WindowsMetricsResource, - }, - }, - }, - } - - if err == nil { - // check if existing serviceMonitor's contents are as expected, delete it if not - if existingSM.Name == expectedSM.Name && existingSM.Namespace == expectedSM.Namespace && - reflect.DeepEqual(existingSM.Spec, expectedSM.Spec) { - return nil - } - err = c.ServiceMonitors(c.namespace).Delete(context.TODO(), WindowsMetricsResource, - metav1.DeleteOptions{}) - if err != nil { - return fmt.Errorf("unable to delete service monitor %s/%s: %w", c.namespace, WindowsMetricsResource, - err) - } - log.Info("Deleted malformed resource", "serviceMonitor", WindowsMetricsResource, - "namespace", c.namespace) - } - - _, err = c.ServiceMonitors(c.namespace).Create(context.TODO(), expectedSM, metav1.CreateOptions{}) - if err != nil { - return fmt.Errorf("error creating service monitor: %w", err) - } - return nil -}