From 9ba91f391f95339fbcf8163b1d247ee3d50bbe8e Mon Sep 17 00:00:00 2001
From: Roelof Kuijpers <roelof.kuijpers@energyessentials.nl>
Date: Wed, 9 Apr 2025 09:52:35 +0200
Subject: [PATCH 1/8] fix: stuck at 'Progressing' #15317

Signed-off-by: Roelof Kuijpers <roelof.kuijpers@energyessentials.nl>
---
 pkg/health/health_pod.go                      | 34 +++++---
 ...-restart-never-with-ignore-annotation.yaml | 79 +++++++++++++++++++
 2 files changed, 103 insertions(+), 10 deletions(-)
 create mode 100644 pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml

diff --git a/pkg/health/health_pod.go b/pkg/health/health_pod.go
index 9ebcef558..c106a1f5e 100644
--- a/pkg/health/health_pod.go
+++ b/pkg/health/health_pod.go
@@ -12,6 +12,10 @@ import (
 	"github.com/argoproj/gitops-engine/pkg/utils/kube"
 )
 
+const (
+	AnnotationIgnoreRestartPolicy = "argocd.argoproj.io/ignore-restart-policy"
+)
+
 func getPodHealth(obj *unstructured.Unstructured) (*HealthStatus, error) {
 	gvk := obj.GroupVersionKind()
 	switch gvk {
@@ -93,9 +97,9 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) {
 		}
 
 		return &HealthStatus{Status: HealthStatusDegraded, Message: ""}, nil
+
 	case corev1.PodRunning:
-		switch pod.Spec.RestartPolicy {
-		case corev1.RestartPolicyAlways:
+		getHealthStatus := func(pod *corev1.Pod) (*HealthStatus, error) {
 			// if pod is ready, it is automatically healthy
 			if podutils.IsPodReady(pod) {
 				return &HealthStatus{
@@ -117,14 +121,24 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) {
 				Status:  HealthStatusProgressing,
 				Message: pod.Status.Message,
 			}, nil
-		case corev1.RestartPolicyOnFailure, corev1.RestartPolicyNever:
-			// pods set with a restart policy of OnFailure or Never, have a finite life.
-			// These pods are typically resource hooks. Thus, we consider these as Progressing
-			// instead of healthy.
-			return &HealthStatus{
-				Status:  HealthStatusProgressing,
-				Message: pod.Status.Message,
-			}, nil
+		}
+		if _, hook := pod.Annotations[AnnotationIgnoreRestartPolicy]; hook {
+			return getHealthStatus(pod)
+		} else {
+			switch pod.Spec.RestartPolicy {
+			case corev1.RestartPolicyAlways:
+				return getHealthStatus(pod)
+			case corev1.RestartPolicyOnFailure, corev1.RestartPolicyNever:
+				// Most pods set with a restart policy of OnFailure or Never, have a finite life.
+				// These pods are typically resource hooks. Thus, we consider these as Progressing
+				// instead of healthy. If this is unwanted, e.g., when the pod is managed by an
+				// operator and therefore has a restart policy of OnFailure or Never, then use the
+				// the AnnotationIgnoreRestartPolicy annotation.
+				return &HealthStatus{
+					Status:  HealthStatusProgressing,
+					Message: pod.Status.Message,
+				}, nil
+			}
 		}
 	}
 	return &HealthStatus{
diff --git a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml
new file mode 100644
index 000000000..432473dbd
--- /dev/null
+++ b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml
@@ -0,0 +1,79 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  creationTimestamp: 2018-12-02T09:15:16Z
+  name: my-pod
+  namespace: argocd
+  resourceVersion: "151053"
+  selfLink: /api/v1/namespaces/argocd/pods/my-pod
+  uid: c86e909c-f612-11e8-a057-fe5f49266390
+  annotations:
+    argocd.argoproj.io/ignore-restart-policy: "true"
+spec:
+  containers:
+  - command:
+    - sh
+    - -c
+    - sleep 10
+    image: alpine:latest
+    imagePullPolicy: Always
+    name: main
+    resources: {}
+    terminationMessagePath: /dev/termination-log
+    terminationMessagePolicy: File
+    volumeMounts:
+    - mountPath: /var/run/secrets/kubernetes.io/serviceaccount
+      name: default-token-f9jvj
+      readOnly: true
+  dnsPolicy: ClusterFirst
+  nodeName: minikube
+  restartPolicy: Never
+  schedulerName: default-scheduler
+  securityContext: {}
+  serviceAccount: default
+  serviceAccountName: default
+  terminationGracePeriodSeconds: 30
+  tolerations:
+  - effect: NoExecute
+    key: node.kubernetes.io/not-ready
+    operator: Exists
+    tolerationSeconds: 300
+  - effect: NoExecute
+    key: node.kubernetes.io/unreachable
+    operator: Exists
+    tolerationSeconds: 300
+  volumes:
+  - name: default-token-f9jvj
+    secret:
+      defaultMode: 420
+      secretName: default-token-f9jvj
+status:
+  conditions:
+  - lastProbeTime: null
+    lastTransitionTime: 2018-12-02T09:15:16Z
+    status: "True"
+    type: Initialized
+  - lastProbeTime: null
+    lastTransitionTime: 2018-12-02T09:15:19Z
+    status: "True"
+    type: Ready
+  - lastProbeTime: null
+    lastTransitionTime: 2018-12-02T09:15:16Z
+    status: "True"
+    type: PodScheduled
+  containerStatuses:
+  - containerID: docker://acfb261d6c1fe8c543438a202de62cb06c137fa93a2d59262d764470e96f3195
+    image: alpine:latest
+    imageID: docker-pullable://alpine@sha256:621c2f39f8133acb8e64023a94dbdf0d5ca81896102b9e57c0dc184cadaf5528
+    lastState: {}
+    name: main
+    ready: true
+    restartCount: 0
+    state:
+      running:
+        startedAt: 2018-12-02T09:15:19Z
+  hostIP: 192.168.64.41
+  phase: Running
+  podIP: 172.17.0.9
+  qosClass: BestEffort
+  startTime: 2018-12-02T09:15:16Z

From b0a321932629acf3f802e8c2e246ec528ea03b26 Mon Sep 17 00:00:00 2001
From: RoelofKuijpers <roelof.kuijpers@energyessentials.nl>
Date: Wed, 9 Apr 2025 10:22:16 +0200
Subject: [PATCH 2/8] Update health_test.go

Signed-off-by: Roelof Kuijpers <roelof.kuijpers@energyessentials.nl>
---
 pkg/health/health_test.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pkg/health/health_test.go b/pkg/health/health_test.go
index ef945eb46..20ddc5f5c 100644
--- a/pkg/health/health_test.go
+++ b/pkg/health/health_test.go
@@ -103,6 +103,7 @@ func TestPod(t *testing.T) {
 	assertAppHealth(t, "./testdata/pod-error.yaml", HealthStatusDegraded)
 	assertAppHealth(t, "./testdata/pod-running-restart-always.yaml", HealthStatusHealthy)
 	assertAppHealth(t, "./testdata/pod-running-restart-never.yaml", HealthStatusProgressing)
+	assertAppHealth(t, "./testdata/pod-running-restart-never-with-ignore-annotation.yaml", HealthStatusHealthy)
 	assertAppHealth(t, "./testdata/pod-running-restart-onfailure.yaml", HealthStatusProgressing)
 	assertAppHealth(t, "./testdata/pod-failed.yaml", HealthStatusDegraded)
 	assertAppHealth(t, "./testdata/pod-succeeded.yaml", HealthStatusHealthy)

From 07cb6a0dec7bb9581ad516cda751779e7ee4ffed Mon Sep 17 00:00:00 2001
From: RoelofKuijpers <roelof.kuijpers@energyessentials.nl>
Date: Wed, 9 Apr 2025 12:06:40 +0200
Subject: [PATCH 3/8] Update
 pod-running-restart-never-with-ignore-annotation.yaml so it passes the checks
 of the Quality Gate

Signed-off-by: Roelof Kuijpers <roelof.kuijpers@energyessentials.nl>
---
 ...ng-restart-never-with-ignore-annotation.yaml | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml
index 432473dbd..4c2b3682a 100644
--- a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml
+++ b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml
@@ -15,10 +15,16 @@ spec:
     - sh
     - -c
     - sleep 10
-    image: alpine:latest
+    image: alpine:3.21
     imagePullPolicy: Always
     name: main
-    resources: {}
+    resources:
+      requests:
+        memory: "128Mi"
+        cpu: "250m"
+      limits:
+        memory: "256Mi"
+        cpu: "500m"
     terminationMessagePath: /dev/termination-log
     terminationMessagePolicy: File
     volumeMounts:
@@ -32,6 +38,7 @@ spec:
   securityContext: {}
   serviceAccount: default
   serviceAccountName: default
+  automountServiceAccountToken: false
   terminationGracePeriodSeconds: 30
   tolerations:
   - effect: NoExecute
@@ -62,9 +69,9 @@ status:
     status: "True"
     type: PodScheduled
   containerStatuses:
-  - containerID: docker://acfb261d6c1fe8c543438a202de62cb06c137fa93a2d59262d764470e96f3195
-    image: alpine:latest
-    imageID: docker-pullable://alpine@sha256:621c2f39f8133acb8e64023a94dbdf0d5ca81896102b9e57c0dc184cadaf5528
+  - containerID: containerd://adc73c2c0ae3f1fd9bf294abd834e740042ee375de680c0cfcdd90d863a73b8b
+    image: alpine:3.21
+    imageID: docker.io/library/alpine@sha256:a8560b36e8b8210634f77d9f7f9efd7ffa463e380b75e2e74aff4511df3ef88c
     lastState: {}
     name: main
     ready: true

From 9a9eb190e9a077a408d25810d210610e8964020e Mon Sep 17 00:00:00 2001
From: Roelof Kuijpers <roelof.kuijpers@energyessentials.nl>
Date: Wed, 9 Apr 2025 13:57:34 +0200
Subject: [PATCH 4/8] add storage request

Signed-off-by: Roelof Kuijpers <roelof.kuijpers@energyessentials.nl>
---
 .../pod-running-restart-never-with-ignore-annotation.yaml        | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml
index 4c2b3682a..1cc27e5f1 100644
--- a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml
+++ b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml
@@ -20,6 +20,7 @@ spec:
     name: main
     resources:
       requests:
+        ephemeral-storage: "100Mi"
         memory: "128Mi"
         cpu: "250m"
       limits:

From 75c2fe7edb3d41f634e596402c486b28ebc4125a Mon Sep 17 00:00:00 2001
From: RoelofKuijpers <roelof.kuijpers@energyessentials.nl>
Date: Mon, 21 Apr 2025 18:13:20 +0200
Subject: [PATCH 5/8] Update pkg/health/health_pod.go

improve code readability

Co-authored-by: sivchari <shibuuuu5@gmail.com>
Signed-off-by: Roelof Kuijpers <roelof.kuijpers@energyessentials.nl>
---
 pkg/health/health_pod.go | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/pkg/health/health_pod.go b/pkg/health/health_pod.go
index c106a1f5e..7d874f284 100644
--- a/pkg/health/health_pod.go
+++ b/pkg/health/health_pod.go
@@ -122,13 +122,12 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) {
 				Message: pod.Status.Message,
 			}, nil
 		}
-		if _, hook := pod.Annotations[AnnotationIgnoreRestartPolicy]; hook {
+                policy := pod.Spec.RestartPolicy
+		if _, ok := pod.Annotations[AnnotationIgnoreRestartPolicy]; ok || policy == corev1.RestartPolicyAlways {
 			return getHealthStatus(pod)
-		} else {
-			switch pod.Spec.RestartPolicy {
-			case corev1.RestartPolicyAlways:
-				return getHealthStatus(pod)
-			case corev1.RestartPolicyOnFailure, corev1.RestartPolicyNever:
+		}
+		
+			 if policy == corev1.RestartPolicyOnFailure || policy == corev1.RestartPolicyNever {
 				// Most pods set with a restart policy of OnFailure or Never, have a finite life.
 				// These pods are typically resource hooks. Thus, we consider these as Progressing
 				// instead of healthy. If this is unwanted, e.g., when the pod is managed by an

From 2c955b613a6e91c51f4a04862678a5d2051dbe2a Mon Sep 17 00:00:00 2001
From: Roelof Kuijpers <roelof.kuijpers@energyessentials.nl>
Date: Mon, 21 Apr 2025 18:30:38 +0200
Subject: [PATCH 6/8] Improve code readability fix

Signed-off-by: Roelof Kuijpers <roelof.kuijpers@energyessentials.nl>
---
 pkg/health/health_pod.go | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/pkg/health/health_pod.go b/pkg/health/health_pod.go
index 7d874f284..d54c52cb0 100644
--- a/pkg/health/health_pod.go
+++ b/pkg/health/health_pod.go
@@ -122,22 +122,21 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) {
 				Message: pod.Status.Message,
 			}, nil
 		}
-                policy := pod.Spec.RestartPolicy
+		policy := pod.Spec.RestartPolicy
 		if _, ok := pod.Annotations[AnnotationIgnoreRestartPolicy]; ok || policy == corev1.RestartPolicyAlways {
 			return getHealthStatus(pod)
 		}
-		
-			 if policy == corev1.RestartPolicyOnFailure || policy == corev1.RestartPolicyNever {
-				// Most pods set with a restart policy of OnFailure or Never, have a finite life.
-				// These pods are typically resource hooks. Thus, we consider these as Progressing
-				// instead of healthy. If this is unwanted, e.g., when the pod is managed by an
-				// operator and therefore has a restart policy of OnFailure or Never, then use the
-				// the AnnotationIgnoreRestartPolicy annotation.
-				return &HealthStatus{
-					Status:  HealthStatusProgressing,
-					Message: pod.Status.Message,
-				}, nil
-			}
+
+		if policy == corev1.RestartPolicyOnFailure || policy == corev1.RestartPolicyNever {
+			// Most pods set with a restart policy of OnFailure or Never, have a finite life.
+			// These pods are typically resource hooks. Thus, we consider these as Progressing
+			// instead of healthy. If this is unwanted, e.g., when the pod is managed by an
+			// operator and therefore has a restart policy of OnFailure or Never, then use the
+			// the AnnotationIgnoreRestartPolicy annotation.
+			return &HealthStatus{
+				Status:  HealthStatusProgressing,
+				Message: pod.Status.Message,
+			}, nil
 		}
 	}
 	return &HealthStatus{

From 8ef020ab8235f540912341d3770d690caf19b3dd Mon Sep 17 00:00:00 2001
From: Roelof Kuijpers <roelof.kuijpers@energyessentials.nl>
Date: Fri, 4 Jul 2025 00:47:56 +0200
Subject: [PATCH 7/8] Move annotation into common package

Signed-off-by: Roelof Kuijpers <roelof.kuijpers@energyessentials.nl>
---
 pkg/health/health_pod.go | 7 ++-----
 pkg/sync/common/types.go | 3 +++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pkg/health/health_pod.go b/pkg/health/health_pod.go
index d54c52cb0..cd697d119 100644
--- a/pkg/health/health_pod.go
+++ b/pkg/health/health_pod.go
@@ -9,13 +9,10 @@ import (
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/kubectl/pkg/util/podutils"
 
+	"github.com/argoproj/gitops-engine/pkg/sync/common"
 	"github.com/argoproj/gitops-engine/pkg/utils/kube"
 )
 
-const (
-	AnnotationIgnoreRestartPolicy = "argocd.argoproj.io/ignore-restart-policy"
-)
-
 func getPodHealth(obj *unstructured.Unstructured) (*HealthStatus, error) {
 	gvk := obj.GroupVersionKind()
 	switch gvk {
@@ -123,7 +120,7 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) {
 			}, nil
 		}
 		policy := pod.Spec.RestartPolicy
-		if _, ok := pod.Annotations[AnnotationIgnoreRestartPolicy]; ok || policy == corev1.RestartPolicyAlways {
+		if _, ok := pod.Annotations[common.AnnotationIgnoreRestartPolicy]; ok || policy == corev1.RestartPolicyAlways {
 			return getHealthStatus(pod)
 		}
 
diff --git a/pkg/sync/common/types.go b/pkg/sync/common/types.go
index 002bb23da..5981b9753 100644
--- a/pkg/sync/common/types.go
+++ b/pkg/sync/common/types.go
@@ -17,6 +17,9 @@ const (
 	// AnnotationKeyHookDeletePolicy is the policy of deleting a hook
 	AnnotationKeyHookDeletePolicy = "argocd.argoproj.io/hook-delete-policy"
 	AnnotationDeletionApproved    = "argocd.argoproj.io/deletion-approved"
+	// AnnotationIgnoreRestartPolicy ignores restart policy, useful for operator-managed
+	// pods to be considered healthy
+	AnnotationIgnoreRestartPolicy = "argocd.argoproj.io/ignore-restart-policy"
 
 	// Sync option that disables dry run in resource is missing in the cluster
 	SyncOptionSkipDryRunOnMissingResource = "SkipDryRunOnMissingResource=true"

From b216058d04d1ab71b34e72a120ba5ca9894307f1 Mon Sep 17 00:00:00 2001
From: Roelof Kuijpers <roelof.kuijpers@energyessentials.nl>
Date: Tue, 29 Jul 2025 14:10:27 +0200
Subject: [PATCH 8/8] Add documentation

Signed-off-by: Roelof Kuijpers <roelof.kuijpers@energyessentials.nl>
---
 pkg/health/health_pod.go | 2 ++
 pkg/sync/doc.go          | 8 ++++++++
 2 files changed, 10 insertions(+)

diff --git a/pkg/health/health_pod.go b/pkg/health/health_pod.go
index cd697d119..837886554 100644
--- a/pkg/health/health_pod.go
+++ b/pkg/health/health_pod.go
@@ -120,6 +120,8 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) {
 			}, nil
 		}
 		policy := pod.Spec.RestartPolicy
+		// If the pod has the AnnotationIgnoreRestartPolicy annotation or its restart policy is Always,
+		// then treat it as a long-running pod and check its health status.
 		if _, ok := pod.Annotations[common.AnnotationIgnoreRestartPolicy]; ok || policy == corev1.RestartPolicyAlways {
 			return getHealthStatus(pod)
 		}
diff --git a/pkg/sync/doc.go b/pkg/sync/doc.go
index f4f5d8725..0b6bdbc19 100644
--- a/pkg/sync/doc.go
+++ b/pkg/sync/doc.go
@@ -65,6 +65,14 @@ The following policies define when the hook will be deleted.
   - HookFailed - the hook resource is deleted after the hook failed.
   - BeforeHookCreation - any existing hook resource is deleted before the new one is created
 
+**Pods with restartPolicy Never or OnFailure**
+
+During synchronization, the application may show the resource health as *Progressing* when it deploys a Pod that has the `restartPolicy` set to `Never` or `OnFailure` (see Kubernetes docs for restart policy). Generally, these resources behave like a Job and are expected to complete. This is intended behavior, since Jobs are commonly used for sync hooks and must finish before an application is considered *Healthy*.
+
+A workaround is to use the annotation: argocd.argoproj.io/ignore-restart-policy: "true".
+
+When this annotation is set on the Pod resource, the controller will ignore the `restartPolicy` and consider the Pod *Running* as a valid healthy state.
+
 # Sync Waves
 
 The waves allow to group sync execution of syncing process into batches when each batch is executed sequentially one after