diff --git a/cmd/operator/main.go b/cmd/operator/main.go index 7a50b1dd96..5c6a3c2ba3 100644 --- a/cmd/operator/main.go +++ b/cmd/operator/main.go @@ -27,7 +27,6 @@ import ( "github.com/openshift/windows-machine-config-operator/controllers" "github.com/openshift/windows-machine-config-operator/pkg/cluster" - "github.com/openshift/windows-machine-config-operator/pkg/metrics" "github.com/openshift/windows-machine-config-operator/pkg/nodeconfig/payload" "github.com/openshift/windows-machine-config-operator/pkg/servicescm" "github.com/openshift/windows-machine-config-operator/pkg/windows" @@ -266,22 +265,20 @@ func main() { os.Exit(1) } - //+kubebuilder:scaffold:builder - // The above marker tells kubebuilder that this is where the SetupWithManager function should be inserted when new - // controllers are generated by Operator SDK. - - metricsConfig, err := metrics.NewConfig(mgr, cfg, watchNamespace) + mReconciler, err := controllers.NewMetricReconciler(mgr, clusterConfig, cfg, watchNamespace) if err != nil { - setupLog.Error(err, "failed to create MetricsConfig object") + setupLog.Error(err, "unable to create metrics reconciler") os.Exit(1) } - - // Configure the metric resources - if err := metricsConfig.Configure(ctx); err != nil { - setupLog.Error(err, "error setting up metrics") + if err = mReconciler.SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Metrics") os.Exit(1) } + //+kubebuilder:scaffold:builder + // The above marker tells kubebuilder that this is where the SetupWithManager function should be inserted when new + // controllers are generated by Operator SDK. + // Create the singleton Windows services ConfigMap if err := configMapReconciler.EnsureServicesConfigMapExists(); err != nil { setupLog.Error(err, "error ensuring object exists", "singleton", types.NamespacedName{Namespace: watchNamespace, diff --git a/controllers/metric_controller.go b/controllers/metric_controller.go new file mode 100644 index 0000000000..d19b050b4b --- /dev/null +++ b/controllers/metric_controller.go @@ -0,0 +1,258 @@ +package controllers + +import ( + "context" + "fmt" + "reflect" + "strconv" + + "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + "github.com/openshift/windows-machine-config-operator/pkg/cluster" + "github.com/openshift/windows-machine-config-operator/pkg/condition" + "github.com/openshift/windows-machine-config-operator/pkg/metrics" + monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + monclient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/typed/monitoring/v1" +) + +//+kubebuilder:rbac:groups="",resources=namespaces,verbs=get;list;watch + +const ( + // MetricController is the name of this controller in logs and other outputs. + MetricController = "metrics" +) + +type metricReconciler struct { + *monclient.MonitoringV1Client + instanceReconciler +} + +func NewMetricReconciler(mgr manager.Manager, clusterConfig cluster.Config, cfg *rest.Config, watchNamespace string) (*metricReconciler, error) { + clientset, err := kubernetes.NewForConfig(mgr.GetConfig()) + if err != nil { + return nil, fmt.Errorf("error creating kubernetes clientset: %w", err) + } + mclient, err := monclient.NewForConfig(cfg) + if err != nil { + return nil, fmt.Errorf("error creating monitoring client: %w", err) + } + return &metricReconciler{ + MonitoringV1Client: mclient, + instanceReconciler: instanceReconciler{ + client: mgr.GetClient(), + log: ctrl.Log.WithName("controllers").WithName(MetricController), + k8sclientset: clientset, + clusterServiceCIDR: clusterConfig.Network().GetServiceCIDR(), + watchNamespace: watchNamespace, + recorder: mgr.GetEventRecorderFor(MetricController), + }, + }, nil +} + +// Reconcile is part of the main kubernetes reconciliation loop which reads that state of the cluster for a +// Node object and aims to move the current state of the cluster closer to the desired state. +func (r *metricReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, err error) { + r.log = r.log.WithValues(MetricController, req.NamespacedName) + // Prevent WMCO upgrades while Node objects are being processed + if err := condition.MarkAsBusy(r.client, r.watchNamespace, r.recorder, MetricController); err != nil { + return ctrl.Result{}, err + } + defer func() { + err = markAsFreeOnSuccess(r.client, r.watchNamespace, r.recorder, MetricController, result.Requeue, err) + }() + // validate if cluster monitoring is enabled in the operator namespace + enabled, err := r.validate(ctx) + if err != nil { + return ctrl.Result{}, fmt.Errorf("error validating cluster monitoring label: %s", err) + } + // Proceed only if monitoring is enabled + if !enabled { + return ctrl.Result{}, nil + } + if err := r.ensureServiceMonitor(); err != nil { + return ctrl.Result{}, fmt.Errorf("error ensuring serviceMonitor exists: %w", err) + } + return ctrl.Result{}, nil +} + +// validate will verify if cluster monitoring is enabled in the operator namespace. If the label is set to false or not +// present, it will log and send warning events to the user. If the label holds a non-boolean value, returns an error. +func (r *metricReconciler) validate(ctx context.Context) (bool, error) { + // validate if metrics label is added to namespace + labelValue := false + var err error + wmcoNamespace, err := r.k8sclientset.CoreV1().Namespaces().Get(ctx, r.watchNamespace, metav1.GetOptions{}) + if err != nil { + return false, fmt.Errorf("error getting operator namespace: %w", err) + } + // if the label exists, update value from default of false + if value, ok := wmcoNamespace.Labels["openshift.io/cluster-monitoring"]; ok { + labelValue, err = strconv.ParseBool(value) + if err != nil { + return false, fmt.Errorf("monitoring label must have a boolean value: %w", err) + } + } + if !labelValue { + r.recorder.Eventf(wmcoNamespace, v1.EventTypeWarning, "labelValidationFailed", + "Cluster monitoring openshift.io/cluster-monitoring=true label is not enabled in %s namespace", r.watchNamespace) + } + return labelValue, nil +} + +// ensureServiceMonitor creates a serviceMonitor object in the operator namespace if it does not exist. +func (r *metricReconciler) ensureServiceMonitor() error { + // get existing serviceMonitor object if it exists + existingSM, err := r.ServiceMonitors(r.watchNamespace).Get(context.TODO(), metrics.WindowsMetricsResource, metav1.GetOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf( + "error retrieving %s serviceMonitor: %w", metrics.WindowsMetricsResource, err) + } + + serverName := fmt.Sprintf("%s.%s.svc", metrics.WindowsMetricsResource, r.watchNamespace) + replacement0 := "$1" + replacement1 := "$1:9182" + replacement2 := "windows-exporter" + attachMetadataBool := true + expectedSM := &monv1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: metrics.WindowsMetricsResource, + Namespace: r.watchNamespace, + Labels: map[string]string{ + "name": metrics.WindowsMetricsResource, + }, + }, + Spec: monv1.ServiceMonitorSpec{ + AttachMetadata: &monv1.AttachMetadata{ + Node: &attachMetadataBool, + }, + Endpoints: []monv1.Endpoint{ + { + HonorLabels: true, + Interval: "30s", + Path: "/metrics", + Port: "https-metrics", + Scheme: "https", + BearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token", + TLSConfig: &monv1.TLSConfig{ + CAFile: "/etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt", + SafeTLSConfig: monv1.SafeTLSConfig{ + ServerName: &serverName, + }, + }, + RelabelConfigs: []monv1.RelabelConfig{ + { + Action: "replace", + Regex: "(.*)", + Replacement: &replacement0, + TargetLabel: "instance", + SourceLabels: []monv1.LabelName{ + "__meta_kubernetes_endpoint_address_target_name", + }, + }, + { + Action: "keep", + Regex: "windows", + SourceLabels: []monv1.LabelName{ + "__meta_kubernetes_node_label_kubernetes_io_os", + }, + }, + { + Action: "replace", + Regex: "(.+)(?::\\d+)", + Replacement: &replacement1, + TargetLabel: "__address__", + SourceLabels: []monv1.LabelName{ + "__address__", + }, + }, + { + Action: "replace", + Replacement: &replacement2, + TargetLabel: "job", + }, + }, + }, + }, + NamespaceSelector: monv1.NamespaceSelector{ + MatchNames: []string{"kube-system"}, + }, + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k8s-app": "kubelet", + }, + }, + }, + } + + if err == nil { + // check if existing serviceMonitor's contents are as expected, delete it if not + if existingSM.Name == expectedSM.Name && existingSM.Namespace == expectedSM.Namespace && + reflect.DeepEqual(existingSM.Spec, expectedSM.Spec) { + return nil + } + err = r.ServiceMonitors(r.watchNamespace).Delete(context.TODO(), metrics.WindowsMetricsResource, + metav1.DeleteOptions{}) + if err != nil { + return fmt.Errorf("unable to delete service monitor %s/%s: %w", r.watchNamespace, metrics.WindowsMetricsResource, + err) + } + r.log.Info("Deleted malformed resource", "serviceMonitor", metrics.WindowsMetricsResource, + "namespace", r.watchNamespace) + } + + _, err = r.ServiceMonitors(r.watchNamespace).Create(context.TODO(), expectedSM, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("error creating service monitor: %w", err) + } + return nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *metricReconciler) SetupWithManager(mgr ctrl.Manager) error { + metricsPredicate := predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { + return isMonitoringEnabled(e.Object, r.watchNamespace) + }, + UpdateFunc: func(e event.UpdateEvent) bool { + return isMonitoringEnabled(e.ObjectNew, r.watchNamespace) + }, + GenericFunc: func(e event.GenericEvent) bool { + return isMonitoringEnabled(e.Object, r.watchNamespace) + }, + DeleteFunc: func(e event.DeleteEvent) bool { + return false + }, + } + return ctrl.NewControllerManagedBy(mgr). + For(&v1.Namespace{}, builder.WithPredicates(metricsPredicate)). + Complete(r) +} + +// isMonitoringEnabled returns true if the given object namespace has monitoring label set to true +func isMonitoringEnabled(obj runtime.Object, watchNamespace string) bool { + namespace, ok := obj.(*v1.Namespace) + if !ok { + return false + } + if namespace.GetName() != watchNamespace { + return false + } + if value, ok := namespace.Labels["openshift.io/cluster-monitoring"]; ok { + labelValue, err := strconv.ParseBool(value) + if err != nil { + return false + } + return labelValue + } + return false +} diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index e66506471b..fc3f12c1e4 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -1,39 +1,7 @@ package metrics -import ( - "context" - "fmt" - "reflect" - "strconv" - - "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" - k8sclient "k8s.io/client-go/kubernetes" - "k8s.io/client-go/rest" - "k8s.io/client-go/tools/record" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/manager" - - monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - monclient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/typed/monitoring/v1" -) - -//+kubebuilder:rbac:groups="",resources=services;services/finalizers,verbs=create;get;delete -//+kubebuilder:rbac:groups="",resources=namespaces,verbs=get -//+kubebuilder:rbac:groups="",resources=nodes,verbs=list -//+kubebuilder:rbac:groups="monitoring.coreos.com",resources=servicemonitors,verbs=create;get;delete -//+kubebuilder:rbac:groups="",resources=events,verbs=* - -var ( - log = ctrl.Log.WithName("metrics") - // metricsEnabled specifies if metrics are enabled in the current cluster - metricsEnabled = true -) - const ( - // metricsPortName specifies the portname used for Prometheus monitoring + // PortName specifies the portname used for Prometheus monitoring PortName = "metrics" // Host is the host address used by Windows metrics Host = "0.0.0.0" @@ -43,231 +11,3 @@ const ( // by current operator version. Its name is defined through the bundle manifests WindowsMetricsResource = "windows-exporter" ) - -// Config holds the information required to interact with metrics objects -type Config struct { - // a handle that allows us to interact with the Kubernetes API. - *kubernetes.Clientset - // a handle that allows us to interact with the Monitoring API. - *monclient.MonitoringV1Client - // namespace is the namespace in which metrics objects are created - namespace string - // recorder to generate events - recorder record.EventRecorder -} - -// NewConfig creates a new instance for Config to be used by the caller. -func NewConfig(mgr manager.Manager, cfg *rest.Config, namespace string) (*Config, error) { - if cfg == nil { - return nil, fmt.Errorf("config should not be nil") - } - oclient, err := k8sclient.NewForConfig(cfg) - if err != nil { - return nil, fmt.Errorf("error creating config client: %w", err) - } - mclient, err := monclient.NewForConfig(cfg) - if err != nil { - return nil, fmt.Errorf("error creating monitoring client: %w", err) - } - return &Config{Clientset: oclient, - MonitoringV1Client: mclient, - namespace: namespace, - recorder: mgr.GetEventRecorderFor("metrics"), - }, nil -} - -// Configure takes care of all the required configuration steps -// for Prometheus monitoring like validating monitoring label -// and creating metrics Endpoints object. -func (c *Config) Configure(ctx context.Context) error { - // validate if cluster monitoring is enabled in the operator namespace - enabled, err := c.validate(ctx) - if err != nil { - return fmt.Errorf("error validating cluster monitoring label: %s", err) - } - // Create Metrics Endpoint object only if monitoring is enabled - if !enabled { - return nil - } - if err := c.ensureServiceMonitor(); err != nil { - return fmt.Errorf("error ensuring serviceMonitor exists: %w", err) - } - // In the case of an operator restart, a previous Endpoint object will be deleted and a new one will - // be created to ensure we have a correct spec. - var subsets []v1.EndpointSubset - existingEndpoint, err := c.CoreV1().Endpoints(c.namespace).Get(ctx, WindowsMetricsResource, metav1.GetOptions{}) - if err != nil { - if !apierrors.IsNotFound(err) { - return fmt.Errorf("error retrieving %s endpoint: %w", WindowsMetricsResource, err) - } - } else { - subsets = existingEndpoint.Subsets - err = c.CoreV1().Endpoints(c.namespace).Delete(ctx, WindowsMetricsResource, metav1.DeleteOptions{}) - if err != nil { - return fmt.Errorf("error deleting %s endpoint: %w", WindowsMetricsResource, err) - } - } - if err := c.createEndpoint(subsets); err != nil { - return fmt.Errorf("error creating metrics Endpoint: %w", err) - } - return nil -} - -// validate will verify if cluster monitoring is enabled in the operator namespace. If the label is set to false or not -// present, it will log and send warning events to the user. If the label holds a non-boolean value, returns an error. -func (c *Config) validate(ctx context.Context) (bool, error) { - // validate if metrics label is added to namespace - wmcoNamespace, err := c.CoreV1().Namespaces().Get(ctx, c.namespace, metav1.GetOptions{}) - if err != nil { - return false, fmt.Errorf("error getting operator namespace: %w", err) - } - - labelValue := false - // if the label exists, update value from default of false - if value, ok := wmcoNamespace.Labels["openshift.io/cluster-monitoring"]; ok { - labelValue, err = strconv.ParseBool(value) - if err != nil { - return false, fmt.Errorf("monitoring label must have a boolean value: %w", err) - } - } - if !labelValue { - c.recorder.Eventf(wmcoNamespace, v1.EventTypeWarning, "labelValidationFailed", - "Cluster monitoring openshift.io/cluster-monitoring=true label is not enabled in %s namespace", c.namespace) - } - metricsEnabled = labelValue - return metricsEnabled, nil -} - -// createEndpoint creates an endpoint object in the operator namespace. -// WMCO is no longer creating a service with a selector therefore no Endpoint -// object is created and WMCO needs to create the Endpoint object. -// We cannot create endpoints as a part of manifests deployment as -// Endpoints resources are not currently OLM-supported for bundle creation. -func (c *Config) createEndpoint(subsets []v1.EndpointSubset) error { - // create new Endpoint - newEndpoint := &v1.Endpoints{ - TypeMeta: metav1.TypeMeta{ - Kind: "Endpoints", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: WindowsMetricsResource, - Namespace: c.namespace, - Labels: map[string]string{"name": WindowsMetricsResource}, - }, - Subsets: subsets, - } - _, err := c.CoreV1().Endpoints(c.namespace).Create(context.TODO(), - newEndpoint, metav1.CreateOptions{}) - if err != nil { - return fmt.Errorf("error creating metrics Endpoint: %w", err) - } - return nil -} - -// ensureServiceMonitor creates a serviceMonitor object in the operator namespace if it does not exist. -func (c *Config) ensureServiceMonitor() error { - // get existing serviceMonitor object if it exists - existingSM, err := c.ServiceMonitors(c.namespace).Get(context.TODO(), WindowsMetricsResource, metav1.GetOptions{}) - if err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf( - "error retrieving %s serviceMonitor: %w", WindowsMetricsResource, err) - } - - serverName := fmt.Sprintf("%s.%s.svc", WindowsMetricsResource, c.namespace) - instanceLabel := "$1" - portLabel := "$1:9182" - jobLabel := "windows-exporter" - attachMetadataBool := true - expectedSM := &monv1.ServiceMonitor{ - ObjectMeta: metav1.ObjectMeta{ - Name: WindowsMetricsResource, - Namespace: c.namespace, - Labels: map[string]string{ - "name": WindowsMetricsResource, - }, - }, - Spec: monv1.ServiceMonitorSpec{ - AttachMetadata: &monv1.AttachMetadata{ - Node: &attachMetadataBool, - }, - Endpoints: []monv1.Endpoint{ - { - HonorLabels: true, - Interval: "30s", - Path: "/metrics", - Port: "https-metrics", - Scheme: "https", - BearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token", - TLSConfig: &monv1.TLSConfig{ - CAFile: "/etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt", - SafeTLSConfig: monv1.SafeTLSConfig{ - ServerName: &serverName, - }, - }, - RelabelConfigs: []monv1.RelabelConfig{ - { - Action: "replace", - Regex: "(.*)", - Replacement: &instanceLabel, - TargetLabel: "instance", - SourceLabels: []monv1.LabelName{ - "__meta_kubernetes_endpoint_address_target_name", - }, - }, - { - Action: "keep", - Regex: "windows", - SourceLabels: []monv1.LabelName{ - "__meta_kubernetes_node_label_kubernetes_io_os", - }, - }, - { - Action: "replace", - Regex: "(.+)(?::\\d+)", - Replacement: &portLabel, - TargetLabel: "__address__", - SourceLabels: []monv1.LabelName{ - "__address__", - }, - }, - { - Action: "replace", - Replacement: &jobLabel, - TargetLabel: "job", - }, - }, - }, - }, - NamespaceSelector: monv1.NamespaceSelector{ - MatchNames: []string{"kube-system"}, - }, - Selector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "k8s-app": "kubelet", - }, - }, - }, - } - - if err == nil { - // check if existing serviceMonitor's contents are as expected, delete it if not - if existingSM.Name == expectedSM.Name && existingSM.Namespace == expectedSM.Namespace && - reflect.DeepEqual(existingSM.Spec, expectedSM.Spec) { - return nil - } - err = c.ServiceMonitors(c.namespace).Delete(context.TODO(), WindowsMetricsResource, - metav1.DeleteOptions{}) - if err != nil { - return fmt.Errorf("unable to delete service monitor %s/%s: %w", c.namespace, WindowsMetricsResource, - err) - } - log.Info("Deleted malformed resource", "serviceMonitor", WindowsMetricsResource, - "namespace", c.namespace) - } - - _, err = c.ServiceMonitors(c.namespace).Create(context.TODO(), expectedSM, metav1.CreateOptions{}) - if err != nil { - return fmt.Errorf("error creating service monitor: %w", err) - } - return nil -}