diff --git a/charts/lighthouse/README.md b/charts/lighthouse/README.md index ed4700eef..b05af9ed2 100644 --- a/charts/lighthouse/README.md +++ b/charts/lighthouse/README.md @@ -168,6 +168,7 @@ helm uninstall my-lighthouse --namespace lighthouse | `tektoncontroller.resources.requests` | object | Resource requests applied to the tekton controller pods | `{"cpu":"80m","memory":"128Mi"}` | | `tektoncontroller.service` | object | Service settings for the tekton controller | `{"annotations":{}}` | | `tektoncontroller.terminationGracePeriodSeconds` | int | Termination grace period for tekton controller pods | `180` | +| `tektoncontroller.enableRerunStatusUpdate` | bool | Enable updating the status at the git provider when PipelineRuns are rerun | `false` | | `tektoncontroller.tolerations` | list | [Tolerations](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) applied to the tekton controller pods | `[]` | | `user` | string | Git user name (used when GitHub app authentication is not enabled) | `""` | | `webhooks.affinity` | object | [Affinity rules](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity) applied to the webhooks pods | `{}` | diff --git a/charts/lighthouse/templates/tekton-controller-deployment.yaml b/charts/lighthouse/templates/tekton-controller-deployment.yaml index 681998131..ab3182bdc 100644 --- a/charts/lighthouse/templates/tekton-controller-deployment.yaml +++ b/charts/lighthouse/templates/tekton-controller-deployment.yaml @@ -27,6 +27,7 @@ spec: - --namespace={{ .Release.Namespace }} - --dashboard-url={{ .Values.tektoncontroller.dashboardURL }} - --dashboard-template={{ .Values.tektoncontroller.dashboardTemplate }} + - --enable-rerun-status-update={{ .Values.tektoncontroller.enableRerunStatusUpdate | default false }} ports: - name: metrics containerPort: 8080 diff --git a/charts/lighthouse/templates/tekton-controller-role.yaml b/charts/lighthouse/templates/tekton-controller-role.yaml index 0d0c47f0d..3d82838eb 100644 --- a/charts/lighthouse/templates/tekton-controller-role.yaml +++ b/charts/lighthouse/templates/tekton-controller-role.yaml @@ -19,12 +19,18 @@ rules: - list - get - watch + {{- if .Values.tektoncontroller.enableRerunStatusUpdate }} + - update + {{- end }} - apiGroups: - lighthouse.jenkins.io resources: - lighthousebreakpoints - lighthousejobs verbs: + {{- if .Values.tektoncontroller.enableRerunStatusUpdate }} + - create + {{- end }} - get - update - list diff --git a/charts/lighthouse/values.yaml b/charts/lighthouse/values.yaml index 54f43949c..dd083d255 100644 --- a/charts/lighthouse/values.yaml +++ b/charts/lighthouse/values.yaml @@ -275,6 +275,9 @@ tektoncontroller: # tektoncontroller.terminationGracePeriodSeconds -- Termination grace period for tekton controller pods terminationGracePeriodSeconds: 180 + # tektoncontroller.enableRerunStatusUpdate -- Enable updating the status at the git provider when PipelineRuns are rerun + enableRerunStatusUpdate: false + image: # tektoncontroller.image.repository -- Template for computing the tekton controller docker image repository repository: "{{ .Values.image.parentRepository }}/lighthouse-tekton-controller" diff --git a/cmd/tektoncontroller/main.go b/cmd/tektoncontroller/main.go index e3f2c8de3..ac3d9fd6b 100644 --- a/cmd/tektoncontroller/main.go +++ b/cmd/tektoncontroller/main.go @@ -16,9 +16,10 @@ import ( ) type options struct { - namespace string - dashboardURL string - dashboardTemplate string + namespace string + dashboardURL string + dashboardTemplate string + enableRerunStatusUpdate bool } func (o *options) Validate() error { @@ -30,6 +31,7 @@ func gatherOptions(fs *flag.FlagSet, args ...string) options { fs.StringVar(&o.namespace, "namespace", "", "The namespace to listen in") fs.StringVar(&o.dashboardURL, "dashboard-url", "", "The base URL for the Tekton Dashboard to link to for build reports") fs.StringVar(&o.dashboardTemplate, "dashboard-template", "", "The template expression for generating the URL to the build report based on the PipelineRun parameters. If not specified defaults to $LIGHTHOUSE_DASHBOARD_TEMPLATE") + fs.BoolVar(&o.enableRerunStatusUpdate, "enable-rerun-status-update", false, "Enable updating the status at the git provider when PipelineRuns are rerun") err := fs.Parse(args) if err != nil { logrus.WithError(err).Fatal("Invalid options") @@ -64,11 +66,18 @@ func main() { logrus.WithError(err).Fatal("Unable to start manager") } - reconciler := tektonengine.NewLighthouseJobReconciler(mgr.GetClient(), mgr.GetAPIReader(), mgr.GetScheme(), o.dashboardURL, o.dashboardTemplate, o.namespace) - if err = reconciler.SetupWithManager(mgr); err != nil { + lhJobReconciler := tektonengine.NewLighthouseJobReconciler(mgr.GetClient(), mgr.GetAPIReader(), mgr.GetScheme(), o.dashboardURL, o.dashboardTemplate, o.namespace) + if err = lhJobReconciler.SetupWithManager(mgr); err != nil { logrus.WithError(err).Fatal("Unable to create controller") } + if o.enableRerunStatusUpdate { + rerunPipelineRunReconciler := tektonengine.NewRerunPipelineRunReconciler(mgr.GetClient(), mgr.GetScheme()) + if err = rerunPipelineRunReconciler.SetupWithManager(mgr); err != nil { + logrus.WithError(err).Fatal("Unable to create RerunPipelineRun controller") + } + } + defer interrupts.WaitForGracefulShutdown() if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { logrus.WithError(err).Fatal("Problem running manager") diff --git a/pkg/engines/tekton/pipelinerun_rerun_controller.go b/pkg/engines/tekton/pipelinerun_rerun_controller.go new file mode 100644 index 000000000..a58d81b45 --- /dev/null +++ b/pkg/engines/tekton/pipelinerun_rerun_controller.go @@ -0,0 +1,176 @@ +package tekton + +import ( + "context" + "fmt" + "regexp" + + "github.com/google/uuid" + lighthousev1alpha1 "github.com/jenkins-x/lighthouse/pkg/apis/lighthouse/v1alpha1" + "github.com/jenkins-x/lighthouse/pkg/util" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + pipelinev1beta1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/predicate" +) + +// RerunPipelineRunReconciler reconciles PipelineRun objects with the rerun label +type RerunPipelineRunReconciler struct { + client client.Client + logger *logrus.Entry + scheme *runtime.Scheme +} + +// NewRerunPipelineRunReconciler creates a new RerunPipelineRunReconciler +func NewRerunPipelineRunReconciler(client client.Client, scheme *runtime.Scheme) *RerunPipelineRunReconciler { + return &RerunPipelineRunReconciler{ + client: client, + logger: logrus.NewEntry(logrus.StandardLogger()).WithField("controller", "RerunPipelineRunController"), + scheme: scheme, + } +} + +// SetupWithManager sets up the controller with the Manager. +func (r *RerunPipelineRunReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&pipelinev1beta1.PipelineRun{}). + WithEventFilter(predicate.NewPredicateFuncs(func(object client.Object) bool { + labels := object.GetLabels() + _, exists := labels[util.DashboardTektonRerun] + return exists + })). + Complete(r) +} + +// Reconcile handles the reconciliation logic for rerun PipelineRuns +func (r *RerunPipelineRunReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + r.logger.Infof("Reconciling rerun PipelineRun %s", req.NamespacedName) + + // Fetch the Rerun PipelineRun instance + var rerunPipelineRun pipelinev1beta1.PipelineRun + if err := r.client.Get(ctx, req.NamespacedName, &rerunPipelineRun); err != nil { + r.logger.Errorf("Failed to get rerun PipelineRun: %s", err) + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // Check if the Rerun PipelineRun already has an ownerReference set + if len(rerunPipelineRun.OwnerReferences) > 0 { + r.logger.Infof("PipelineRun %s already has an ownerReference set, skipping.", req.NamespacedName) + return ctrl.Result{}, nil + } + + // Extract Rerun PipelineRun Parent Name + rerunPipelineRunParentName, ok := rerunPipelineRun.Labels[util.DashboardTektonRerun] + if !ok { + return ctrl.Result{}, nil + } + + // Get Rerun PipelineRun parent PipelineRun + var rerunPipelineRunParent pipelinev1beta1.PipelineRun + if err := r.client.Get(ctx, types.NamespacedName{Namespace: req.Namespace, Name: rerunPipelineRunParentName}, &rerunPipelineRunParent); err != nil { + r.logger.Warningf("Unable to get Rerun Parent PipelineRun %s: %v", rerunPipelineRunParentName, err) + // we'll ignore not-found errors, since they can't be fixed by an immediate + // requeue (we'll need to wait for a new notification), and we can get them + // on deleted requests. + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // Check if the Rerun Parent PipelineRun doesn't already have an ownerReference set + if len(rerunPipelineRunParent.OwnerReferences) == 0 { + r.logger.Infof("Parent PipelineRun %s doesn't already have an ownerReference set, skipping.", rerunPipelineRunParentName) + return ctrl.Result{}, nil + } + + // get rerun pipelinerun parent pipelinerun parent lighthousejob + var parentPipelineRunParentLighthouseJob lighthousev1alpha1.LighthouseJob + parentPipelineRunRef := rerunPipelineRunParent.OwnerReferences[0] + if err := r.client.Get(ctx, types.NamespacedName{Namespace: req.Namespace, Name: parentPipelineRunRef.Name}, &parentPipelineRunParentLighthouseJob); err != nil { + r.logger.Warningf("Unable to get Rerun Parent PipelineRun Parent LighthouseJob %s: %v", parentPipelineRunRef.Name, err) + // we'll ignore not-found errors, since they can't be fixed by an immediate + // requeue (we'll need to wait for a new notification), and we can get them + // on deleted requests. + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // Clone the LighthouseJob + rerunLhJob := parentPipelineRunParentLighthouseJob.DeepCopy() + rerunLhJob.APIVersion = parentPipelineRunParentLighthouseJob.APIVersion + rerunLhJob.Kind = parentPipelineRunParentLighthouseJob.Kind + + // Trim existing r-xxxxx suffix and append a new one + re := regexp.MustCompile(`-r-[a-f0-9]{5}$`) + baseName := re.ReplaceAllString(parentPipelineRunParentLighthouseJob.Name, "") + rerunLhJob.Name = fmt.Sprintf("%s-%s", baseName, fmt.Sprintf("r-%s", uuid.NewString()[:5])) + + rerunLhJob.ResourceVersion = "" + rerunLhJob.UID = "" + + // Create the new LighthouseJob + if err := r.client.Create(ctx, rerunLhJob); err != nil { + r.logger.Errorf("Failed to create new LighthouseJob: %s", err) + return ctrl.Result{}, err + } + + // Prepare the ownerReference + ownerReference := metav1.OwnerReference{ + APIVersion: parentPipelineRunParentLighthouseJob.APIVersion, + Kind: parentPipelineRunParentLighthouseJob.Kind, + Name: rerunLhJob.Name, + UID: rerunLhJob.UID, + Controller: ptr.To(true), + } + + // Set the ownerReference on the PipelineRun + rerunPipelineRun.OwnerReferences = append(rerunPipelineRun.OwnerReferences, ownerReference) + + // update ownerReference of rerun PipelineRun + f := func(job *pipelinev1beta1.PipelineRun) error { + // Patch the PipelineRun with the new ownerReference + if err := r.client.Update(ctx, &rerunPipelineRun); err != nil { + return errors.Wrapf(err, "failed to update PipelineRun with ownerReference") + } + return nil + } + err := r.retryModifyPipelineRun(ctx, req.NamespacedName, &rerunPipelineRun, f) + if err != nil { + return ctrl.Result{}, err + } + + r.logger.Infof("Successfully patched PipelineRun %s with new ownerReference to LighthouseJob %s", req.NamespacedName, rerunLhJob.Name) + + return ctrl.Result{}, nil +} + +// retryModifyPipelineRun tries to modify the PipelineRun, retrying if it fails +func (r *RerunPipelineRunReconciler) retryModifyPipelineRun(ctx context.Context, ns client.ObjectKey, pipelineRun *pipelinev1beta1.PipelineRun, f func(pipelineRun *pipelinev1beta1.PipelineRun) error) error { + const retryCount = 5 + + i := 0 + for { + i++ + err := f(pipelineRun) + if err == nil { + if i > 1 { + r.logger.Infof("Took %d attempts to update PipelineRun %s", i, pipelineRun.Name) + } + return nil + } + if i >= retryCount { + return fmt.Errorf("failed to update PipelineRun %s after %d attempts: %w", pipelineRun.Name, retryCount, err) + } + + if err := r.client.Get(ctx, ns, pipelineRun); err != nil { + r.logger.Warningf("Unable to get PipelineRun %s due to: %s", pipelineRun.Name, err) + // we'll ignore not-found errors, since they can't be fixed by an immediate + // requeue (we'll need to wait for a new notification), and we can get them + // on deleted requests. + return client.IgnoreNotFound(err) + } + } +} diff --git a/pkg/util/constants.go b/pkg/util/constants.go index 6643affa2..6346f10f9 100644 --- a/pkg/util/constants.go +++ b/pkg/util/constants.go @@ -91,4 +91,7 @@ const ( // LighthousePayloadTypeActivity is the activity type LighthousePayloadTypeActivity = "activity" + + // DashboardTektonRerun is added by Tekton when clicking on the Action > Rerun button + DashboardTektonRerun = "dashboard.tekton.dev/rerunOf" )