@@ -193,7 +193,7 @@ pub async fn reconcile_cluster_hibernation(cdb: &CoreDB, ctx: &Arc<Context>) ->
193
193
"enabled"
194
194
} ;
195
195
196
- let cluster_annotations = cluster. metadata . annotations . unwrap_or_default ( ) ;
196
+ let cluster_annotations = cluster. metadata . annotations . clone ( ) . unwrap_or_default ( ) ;
197
197
let hibernation_value = if cdb. spec . stop { "on" } else { "off" } ;
198
198
199
199
// Build the hibernation patch we want to apply to disable the CNPG cluster.
@@ -229,6 +229,10 @@ pub async fn reconcile_cluster_hibernation(cdb: &CoreDB, ctx: &Arc<Context>) ->
229
229
return Err ( action) ;
230
230
}
231
231
232
+ // If CNPG is already hibernated then there maybe a dangling PodMonitor still present
233
+ // This will not get cleaned up if already hibernated. We need to remove it manually
234
+ cleanup_hibernated_podmonitor ( ctx, namespace, name. clone ( ) , cdb, & cluster) . await ?;
235
+
232
236
patch_cluster_merge ( cdb, ctx, patch_hibernation_annotation) . await ?;
233
237
info ! (
234
238
"Toggled hibernation annotation of {} to '{}'" ,
@@ -402,6 +406,57 @@ async fn patch_appservice_deployments(
402
406
Ok ( ( ) )
403
407
}
404
408
409
+ /// Cleans up any dangling PodMonitor resources for a hibernated CloudNativePostgreSQL cluster.
410
+ ///
411
+ /// When a CNPG cluster is hibernated, there might be leftover PodMonitor resources that
412
+ /// need manual cleanup. This function handles that cleanup process.
413
+ ///
414
+ /// # Arguments
415
+ ///
416
+ /// * `client` - Kubernetes client for API operations
417
+ /// * `namespace` - Namespace where the cluster and PodMonitor reside
418
+ /// * `name` - Name of the cluster and associated PodMonitor
419
+ /// * `cdb` - Reference to the CloudNativePostgreSQL resource
420
+ /// * `cluster` - Reference to the Cluster resource
421
+ ///
422
+ /// # Returns
423
+ ///
424
+ /// * `Ok(())` if the cleanup was successful or if no action was needed
425
+ /// * `Err(Error)` if there was an error during cleanup that requires requeuing
426
+ ///
427
+ /// # Errors
428
+ ///
429
+ /// Returns an error if the PodMonitor deletion fails for reasons other than the resource not existing.
430
+ async fn cleanup_hibernated_podmonitor (
431
+ ctx : & Arc < Context > ,
432
+ namespace : String ,
433
+ name : String ,
434
+ cdb : & CoreDB ,
435
+ cluster : & Cluster ,
436
+ ) -> Result < ( ) , Action > {
437
+ if cdb. spec . stop && is_cluster_hibernated ( cluster) {
438
+ let client = ctx. client . clone ( ) ;
439
+ let podmonitor_api: Api < podmon:: PodMonitor > = Api :: namespaced ( client, & namespace) ;
440
+ match podmonitor_api. delete ( & name, & DeleteParams :: default ( ) ) . await {
441
+ Ok ( _) => {
442
+ info ! ( "Deleted PodMonitor for hibernated cluster {}" , name) ;
443
+ }
444
+ Err ( kube:: Error :: Api ( api_err) ) if api_err. code == 404 => {
445
+ debug ! ( "No PodMonitor found for hibernated cluster {}" , name) ;
446
+ }
447
+ Err ( e) => {
448
+ warn ! (
449
+ "Could not delete PodMonitor for hibernated cluster {}; retrying" ,
450
+ name
451
+ ) ;
452
+ debug ! ( "Caught error {}" , e) ;
453
+ return Err ( requeue_normal_with_jitter ( ) ) ;
454
+ }
455
+ }
456
+ }
457
+ Ok ( ( ) )
458
+ }
459
+
405
460
async fn update_pooler_instances (
406
461
pooler : & Option < Pooler > ,
407
462
cdb : & CoreDB ,
0 commit comments