Skip to content

Commit 7764719

Browse files
author
Arvind Thirumurugan
committed
approval controller, metric collector controllers
Signed-off-by: Arvind Thirumurugan <arvindth@microsoft.com>
1 parent c5b416f commit 7764719

File tree

67 files changed

+5743
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+5743
-0
lines changed

approval-controller-metric-collector/README.md

Lines changed: 477 additions & 0 deletions
Large diffs are not rendered by default.
109 KB
Loading
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Makefile for ApprovalRequest Controller
2+
3+
# Image settings
4+
IMAGE_NAME ?= approval-request-controller
5+
IMAGE_TAG ?= latest
6+
REGISTRY ?=
7+
8+
# Build settings
9+
GOOS ?= $(shell go env GOOS)
10+
GOARCH ?= $(shell go env GOARCH)
11+
12+
# Tools
13+
CONTROLLER_GEN_VERSION ?= v0.16.0
14+
CONTROLLER_GEN = go run sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_GEN_VERSION)
15+
16+
.PHONY: help
17+
help: ## Display this help
18+
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
19+
20+
##@ Code Generation
21+
22+
.PHONY: manifests
23+
manifests: ## Generate CRD manifests
24+
$(CONTROLLER_GEN) crd paths="./apis/..." output:crd:artifacts:config=config/crd/bases
25+
26+
.PHONY: generate
27+
generate: ## Generate DeepCopy code
28+
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./apis/..."
29+
30+
##@ Build
31+
32+
.PHONY: docker-build
33+
docker-build: ## Build docker image
34+
docker buildx build \
35+
--file docker/approval-request-controller.Dockerfile \
36+
--output=type=docker \
37+
--platform=linux/$(GOARCH) \
38+
--build-arg GOARCH=$(GOARCH) \
39+
--tag $(IMAGE_NAME):$(IMAGE_TAG) \
40+
--build-context kubefleet=.. \
41+
..
42+
43+
.PHONY: docker-push
44+
docker-push: ## Push docker image
45+
docker push $(REGISTRY)$(IMAGE_NAME):$(IMAGE_TAG)
46+
47+
##@ Development
48+
49+
.PHONY: run
50+
run: ## Run controller locally
51+
cd .. && go run ./approval-request-controller/cmd/approvalrequestcontroller/main.go
52+
53+
##@ Deployment
54+
55+
.PHONY: install
56+
install: ## Install helm chart
57+
helm install approval-request-controller ./charts/approval-request-controller \
58+
--namespace fleet-system \
59+
--create-namespace \
60+
--set image.repository=$(IMAGE_NAME) \
61+
--set image.tag=$(IMAGE_TAG)
62+
63+
.PHONY: upgrade
64+
upgrade: ## Upgrade helm chart
65+
helm upgrade approval-request-controller ./charts/approval-request-controller \
66+
--namespace fleet-system \
67+
--set image.repository=$(IMAGE_NAME) \
68+
--set image.tag=$(IMAGE_TAG)
69+
70+
.PHONY: uninstall
71+
uninstall: ## Uninstall helm chart
72+
helm uninstall approval-request-controller --namespace fleet-system
73+
74+
##@ Kind
75+
76+
.PHONY: kind-load
77+
kind-load: docker-build ## Build and load image into kind cluster
78+
kind load docker-image $(IMAGE_NAME):$(IMAGE_TAG) --name hub
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
# ApprovalRequest Controller
2+
3+
The ApprovalRequest Controller is a standalone controller that runs on the **hub cluster** to automate approval decisions for staged updates based on workload health metrics.
4+
5+
## Overview
6+
7+
This controller is designed to be a standalone component that can run independently from the main kubefleet repository. It:
8+
- Uses kubefleet v0.1.2 as an external dependency
9+
- Includes its own APIs for MetricCollectorReport and WorkloadTracker
10+
- Watches `ApprovalRequest` and `ClusterApprovalRequest` resources (from kubefleet)
11+
- Creates `MetricCollector` resources on member clusters via ClusterResourcePlacement
12+
- Monitors workload health via `MetricCollectorReport` objects
13+
- Automatically approves requests when all tracked workloads are healthy
14+
- Runs every 15 seconds to check health status
15+
16+
## Architecture
17+
18+
The controller is designed to run on the hub cluster and:
19+
1. Deploys MetricCollector instances to member clusters using CRP
20+
2. Collects health metrics from MetricCollectorReports
21+
3. Compares metrics against WorkloadTracker specifications
22+
4. Approves ApprovalRequests when all workloads are healthy
23+
24+
## Installation
25+
26+
### Prerequisites
27+
28+
The following CRDs must be installed on the hub cluster (installed by kubefleet hub-agent):
29+
- `approvalrequests.placement.kubernetes-fleet.io`
30+
- `clusterapprovalrequests.placement.kubernetes-fleet.io`
31+
- `clusterresourceplacements.placement.kubernetes-fleet.io`
32+
- `clusterresourceoverrides.placement.kubernetes-fleet.io`
33+
- `clusterstagedupdateruns.placement.kubernetes-fleet.io`
34+
- `stagedupdateruns.placement.kubernetes-fleet.io`
35+
36+
The following CRDs are installed by this chart:
37+
- `metriccollectors.metric.kubernetes-fleet.io`
38+
- `metriccollectorreports.metric.kubernetes-fleet.io`
39+
- `workloadtrackers.metric.kubernetes-fleet.io`
40+
41+
### Install via Helm
42+
43+
```bash
44+
# Build the image
45+
make docker-build IMAGE_NAME=approval-request-controller IMAGE_TAG=latest
46+
47+
# Load into kind (if using kind)
48+
kind load docker-image approval-request-controller:latest --name hub
49+
50+
# Install the chart
51+
helm install approval-request-controller ./charts/approval-request-controller \
52+
--namespace fleet-system \
53+
--create-namespace
54+
```
55+
56+
## Configuration
57+
58+
The controller watches for:
59+
- `ApprovalRequest` (namespaced)
60+
- `ClusterApprovalRequest` (cluster-scoped)
61+
62+
Both resources from kubefleet are monitored, and the controller creates `MetricCollector` resources on appropriate member clusters based on the staged update configuration.
63+
64+
### Health Check Interval
65+
66+
The controller checks workload health every **15 seconds**. This interval is configurable via the `reconcileInterval` parameter in the Helm chart.
67+
68+
## API Reference
69+
70+
### WorkloadTracker
71+
72+
`WorkloadTracker` is a cluster-scoped custom resource that defines which workloads the approval controller should monitor for health metrics before auto-approving staged rollouts.
73+
74+
#### Example: Single Workload
75+
76+
```yaml
77+
apiVersion: metric.kubernetes-fleet.io/v1beta1
78+
kind: WorkloadTracker
79+
metadata:
80+
name: sample-workload-tracker
81+
workloads:
82+
- name: sample-metric-app
83+
namespace: test-ns
84+
```
85+
86+
#### Example: Multiple Workloads
87+
88+
```yaml
89+
apiVersion: metric.kubernetes-fleet.io/v1beta1
90+
kind: WorkloadTracker
91+
metadata:
92+
name: multi-workload-tracker
93+
workloads:
94+
- name: frontend
95+
namespace: production
96+
- name: backend-api
97+
namespace: production
98+
- name: worker-service
99+
namespace: production
100+
```
101+
102+
#### Usage Notes
103+
104+
- **Cluster-scoped:** WorkloadTracker is a cluster-scoped resource, not namespaced
105+
- **Optional:** If no WorkloadTracker exists, the controller will skip health checks and won't auto-approve
106+
- **Single instance:** The controller expects one WorkloadTracker per cluster and uses the first one found
107+
- **Health criteria:** All workloads listed must report healthy (metric value = 1.0) before approval
108+
- **Prometheus metrics:** Each workload should expose `workload_health` metrics that the MetricCollector can query
109+
110+
For a complete example, see: [`./examples/workloadtracker/workloadtracker.yaml`](./examples/workloadtracker/workloadtracker.yaml)
111+
112+
## Additional Resources
113+
114+
- **Main Tutorial:** See [`../README.md`](../README.md) for a complete end-to-end tutorial on setting up automated staged rollouts with approval automation
115+
- **Metric Collector:** See [`../metric-collector/README.md`](../metric-collector/README.md) for details on the metric collection component that runs on member clusters
116+
- **KubeFleet Documentation:** [Azure/fleet](https://github.com/Azure/fleet) - Multi-cluster orchestration platform
117+
- **Example Configurations:**
118+
- [`./examples/workloadtracker/`](./examples/workloadtracker/) - WorkloadTracker resource examples
119+
- [`./examples/stagedupdaterun/`](./examples/stagedupdaterun/) - Staged update configuration examples
120+
- [`./examples/prometheus/`](./examples/prometheus/) - Prometheus deployment and configuration for metric collection
121+
```
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/*
2+
Copyright 2025 The KubeFleet Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
// Package v1alpha1 contains API Schema definitions for the placement v1beta1 API group
18+
// +kubebuilder:object:generate=true
19+
// +groupName=metric.kubernetes-fleet.io
20+
package v1alpha1
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
Copyright 2025 The KubeFleet Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
// +kubebuilder:object:generate=true
18+
// +groupName=metric.kubernetes-fleet.io
19+
package v1alpha1
20+
21+
import (
22+
"k8s.io/apimachinery/pkg/runtime/schema"
23+
"sigs.k8s.io/controller-runtime/pkg/scheme"
24+
)
25+
26+
var (
27+
// GroupVersion is group version used to register these objects
28+
GroupVersion = schema.GroupVersion{Group: "metric.kubernetes-fleet.io", Version: "v1alpha1"}
29+
30+
// SchemeBuilder is used to add go types to the GroupVersionKind scheme
31+
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
32+
33+
// AddToScheme adds the types in this group-version to the given scheme.
34+
AddToScheme = SchemeBuilder.AddToScheme
35+
)

0 commit comments

Comments
 (0)