Skip to content

Commit

Permalink
Save annotation of restarts
Browse files Browse the repository at this point in the history
This change will count the number of restarts for the populator pod on
the destination PVC. That allows us to limit the number of recreations
of the pod. After 3 attempts, it will stop recreating it.

Signed-off-by: Liran Rotenberg <lrotenbe@redhat.com>
  • Loading branch information
liranr23 committed Nov 8, 2023
1 parent 92195e1 commit a2ae56e
Showing 1 changed file with 36 additions and 1 deletion.
37 changes: 36 additions & 1 deletion pkg/lib-volume-populator/populator-machinery/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ const (
reasonPVCCreationError = "PopulatorPVCCreationError"
reasonPopulatorProgress = "PopulatorProgress"
AnnDefaultNetwork = "v1.multus-cni.io/default-network"
AnnPopulatorReCreations = "recreations"

qemuGroup = 107
)
Expand Down Expand Up @@ -696,7 +697,20 @@ func (c *controller) syncPvc(ctx context.Context, key, pvcNamespace, pvcName str

if corev1.PodSucceeded != pod.Status.Phase {
if corev1.PodFailed == pod.Status.Phase {
c.recorder.Eventf(pvc, corev1.EventTypeWarning, reasonPodFailed, "Populator failed: %s", pod.Status.Message)
restarts, ok := pvc.Annotations[AnnPopulatorReCreations]
if !ok {
err = c.retryFailedPopulator(ctx, pvc, populatorNamespace, pod.Name, "1")
return err
}
restartsInteger, err := strconv.Atoi(restarts)
if err != nil {
return err
}
if restartsInteger < 3 {
err = c.retryFailedPopulator(ctx, pvc, populatorNamespace, pod.Name, strconv.Itoa(restartsInteger+1))
return err
}
c.recorder.Eventf(pvc, corev1.EventTypeWarning, reasonPodFailed, "Populator failed after few (3) attempts: Please check the logs of the populator pod, %s/%s", populatorNamespace, pod.Name)
}
// We'll get called again later when the pod succeeds
return nil
Expand Down Expand Up @@ -791,6 +805,27 @@ func (c *controller) syncPvc(ctx context.Context, key, pvcNamespace, pvcName str
return nil
}

func (c *controller) retryFailedPopulator(ctx context.Context, pvc *corev1.PersistentVolumeClaim, namespace, podName, counter string) (err error) {
pvc.Annotations[AnnPopulatorReCreations] = counter
err = c.updatePvc(ctx, pvc, namespace)
if err != nil {
return err
}
err = c.kubeClient.CoreV1().Pods(namespace).Delete(ctx, podName, metav1.DeleteOptions{})
if err != nil {
return err
}
return
}

func (c *controller) updatePvc(ctx context.Context, pvc *corev1.PersistentVolumeClaim, namespace string) (err error) {
_, err = c.kubeClient.CoreV1().PersistentVolumeClaims(namespace).Update(ctx, pvc, metav1.UpdateOptions{})
if err != nil {
return err
}
return
}

func (c *controller) updateProgress(pvc *corev1.PersistentVolumeClaim, podIP string, cr *unstructured.Unstructured) error {
populatorKind := pvc.Spec.DataSourceRef.Kind
var diskRegex = regexp.MustCompile(fmt.Sprintf(`volume_populators_%s\{%s=\"([0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12})"\} (\d{1,3}.*)`,
Expand Down

0 comments on commit a2ae56e

Please sign in to comment.