Skip to content

Commit 4d279e2

Browse files
committed
make sure to delete podmonitors when instances are already hibernated
1 parent 9349649 commit 4d279e2

File tree

1 file changed

+56
-1
lines changed

1 file changed

+56
-1
lines changed

tembo-operator/src/cloudnativepg/hibernate.rs

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ pub async fn reconcile_cluster_hibernation(cdb: &CoreDB, ctx: &Arc<Context>) ->
193193
"enabled"
194194
};
195195

196-
let cluster_annotations = cluster.metadata.annotations.unwrap_or_default();
196+
let cluster_annotations = cluster.metadata.annotations.clone().unwrap_or_default();
197197
let hibernation_value = if cdb.spec.stop { "on" } else { "off" };
198198

199199
// Build the hibernation patch we want to apply to disable the CNPG cluster.
@@ -229,6 +229,10 @@ pub async fn reconcile_cluster_hibernation(cdb: &CoreDB, ctx: &Arc<Context>) ->
229229
return Err(action);
230230
}
231231

232+
// If CNPG is already hibernated then there maybe a dangling PodMonitor still present
233+
// This will not get cleaned up if already hibernated. We need to remove it manually
234+
cleanup_hibernated_podmonitor(ctx, namespace, name.clone(), cdb, &cluster).await?;
235+
232236
patch_cluster_merge(cdb, ctx, patch_hibernation_annotation).await?;
233237
info!(
234238
"Toggled hibernation annotation of {} to '{}'",
@@ -402,6 +406,57 @@ async fn patch_appservice_deployments(
402406
Ok(())
403407
}
404408

409+
/// Cleans up any dangling PodMonitor resources for a hibernated CloudNativePostgreSQL cluster.
410+
///
411+
/// When a CNPG cluster is hibernated, there might be leftover PodMonitor resources that
412+
/// need manual cleanup. This function handles that cleanup process.
413+
///
414+
/// # Arguments
415+
///
416+
/// * `client` - Kubernetes client for API operations
417+
/// * `namespace` - Namespace where the cluster and PodMonitor reside
418+
/// * `name` - Name of the cluster and associated PodMonitor
419+
/// * `cdb` - Reference to the CloudNativePostgreSQL resource
420+
/// * `cluster` - Reference to the Cluster resource
421+
///
422+
/// # Returns
423+
///
424+
/// * `Ok(())` if the cleanup was successful or if no action was needed
425+
/// * `Err(Error)` if there was an error during cleanup that requires requeuing
426+
///
427+
/// # Errors
428+
///
429+
/// Returns an error if the PodMonitor deletion fails for reasons other than the resource not existing.
430+
async fn cleanup_hibernated_podmonitor(
431+
ctx: &Arc<Context>,
432+
namespace: String,
433+
name: String,
434+
cdb: &CoreDB,
435+
cluster: &Cluster,
436+
) -> Result<(), Action> {
437+
if cdb.spec.stop && is_cluster_hibernated(cluster) {
438+
let client = ctx.client.clone();
439+
let podmonitor_api: Api<podmon::PodMonitor> = Api::namespaced(client, &namespace);
440+
match podmonitor_api.delete(&name, &DeleteParams::default()).await {
441+
Ok(_) => {
442+
info!("Deleted PodMonitor for hibernated cluster {}", name);
443+
}
444+
Err(kube::Error::Api(api_err)) if api_err.code == 404 => {
445+
debug!("No PodMonitor found for hibernated cluster {}", name);
446+
}
447+
Err(e) => {
448+
warn!(
449+
"Could not delete PodMonitor for hibernated cluster {}; retrying",
450+
name
451+
);
452+
debug!("Caught error {}", e);
453+
return Err(requeue_normal_with_jitter());
454+
}
455+
}
456+
}
457+
Ok(())
458+
}
459+
405460
async fn update_pooler_instances(
406461
pooler: &Option<Pooler>,
407462
cdb: &CoreDB,

0 commit comments

Comments
 (0)