From c3d407be5ad8fd9a9c3d538a049332d1a1924cf8 Mon Sep 17 00:00:00 2001 From: Matthieu Beteille Date: Tue, 10 Mar 2026 10:32:49 -0400 Subject: [PATCH 1/2] Make drain delay configurable via drainDelaySeconds field --- crd/restatedeployments.yaml | 8 ++++++++ .../restatedeployment/reconcilers/knative.rs | 5 +++-- .../restatedeployment/reconcilers/replicaset.rs | 3 ++- src/resources/restatedeployments.rs | 12 ++++++++++++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/crd/restatedeployments.yaml b/crd/restatedeployments.yaml index 03f7634..2b82ef1 100644 --- a/crd/restatedeployments.yaml +++ b/crd/restatedeployments.yaml @@ -138,6 +138,14 @@ spec: restate: description: Restate specific configuration properties: + drainDelaySeconds: + description: |- + Seconds to wait before removing old versions after they are drained. + Defaults to 300 (5 minutes). + format: int64 + minimum: 0.0 + nullable: true + type: integer register: description: The location of the Restate Admin API to register this deployment against oneOf: diff --git a/src/controllers/restatedeployment/reconcilers/knative.rs b/src/controllers/restatedeployment/reconcilers/knative.rs index 14c8202..ada254b 100644 --- a/src/controllers/restatedeployment/reconcilers/knative.rs +++ b/src/controllers/restatedeployment/reconcilers/knative.rs @@ -929,15 +929,16 @@ pub async fn cleanup_old_configurations( } (None, _, true) => { // endpoint exists and there's no valid remove_version_at annotation, create one + let drain_delay_seconds = rsd.spec.restate.drain_delay_seconds(); info!( configuration = %config_name, namespace = %namespace, - drain_delay = "5 minutes", + drain_delay_seconds, "Scheduling removal of old Configuration (after drain delay)" ); let remove_at = chrono::Utc::now() - .checked_add_signed(chrono::TimeDelta::minutes(5)) // Same as ReplicaSet cleanup + .checked_add_signed(chrono::TimeDelta::seconds(drain_delay_seconds)) .expect("remove_version_at in bounds"); let config_api: Api = Api::namespaced(ctx.client.clone(), namespace); diff --git a/src/controllers/restatedeployment/reconcilers/replicaset.rs b/src/controllers/restatedeployment/reconcilers/replicaset.rs index fbf3679..e2d9420 100644 --- a/src/controllers/restatedeployment/reconcilers/replicaset.rs +++ b/src/controllers/restatedeployment/reconcilers/replicaset.rs @@ -364,8 +364,9 @@ pub async fn cleanup_old_replicasets( rs_name, ); + let drain_delay_seconds = rsd.spec.restate.drain_delay_seconds(); let remove_at = chrono::Utc::now() - .checked_add_signed(chrono::TimeDelta::minutes(5)) // todo configurable? + .checked_add_signed(chrono::TimeDelta::seconds(drain_delay_seconds)) .expect("remove_version_at in bounds"); let params = PatchParams::apply("restate-operator/remove-version-at").force(); diff --git a/src/resources/restatedeployments.rs b/src/resources/restatedeployments.rs index 34c72b0..53295f9 100644 --- a/src/resources/restatedeployments.rs +++ b/src/resources/restatedeployments.rs @@ -247,6 +247,18 @@ pub struct RestateSpec { /// Force the use of HTTP/1.1 when registering with Restate #[serde(skip_serializing_if = "Option::is_none")] pub use_http11: Option, + + /// Seconds to wait before removing old versions after they are drained. + /// Defaults to 300 (5 minutes). + #[serde(skip_serializing_if = "Option::is_none")] + #[schemars(range(min = 0))] + pub drain_delay_seconds: Option, +} + +impl RestateSpec { + pub fn drain_delay_seconds(&self) -> i64 { + self.drain_delay_seconds.unwrap_or(300).max(0) + } } /// The location of the Restate Admin API to register this deployment against From 037de63abb5609083cdf9b25c984c4f28f9c4577 Mon Sep 17 00:00:00 2001 From: Matthieu Beteille Date: Mon, 16 Mar 2026 10:03:08 -0400 Subject: [PATCH 2/2] make logs consistent, add release note, and update .pkl file accordingly --- crd/RestateDeployment.pkl | 4 +++ .../unreleased/96-configurable-drain-delay.md | 30 +++++++++++++++++++ .../reconcilers/replicaset.rs | 11 +++---- 3 files changed, 40 insertions(+), 5 deletions(-) create mode 100644 release-notes/unreleased/96-configurable-drain-delay.md diff --git a/crd/RestateDeployment.pkl b/crd/RestateDeployment.pkl index a45ff96..98fb179 100644 --- a/crd/RestateDeployment.pkl +++ b/crd/RestateDeployment.pkl @@ -64,6 +64,10 @@ class Restate { /// Force the use of HTTP/1.1 when registering with Restate useHttp11: Boolean? + + /// Seconds to wait before removing old versions after they are drained. Defaults to 300 (5 + /// minutes). + drainDelaySeconds: Int? } /// The location of the Restate Admin API to register this deployment against diff --git a/release-notes/unreleased/96-configurable-drain-delay.md b/release-notes/unreleased/96-configurable-drain-delay.md new file mode 100644 index 0000000..9ded9c3 --- /dev/null +++ b/release-notes/unreleased/96-configurable-drain-delay.md @@ -0,0 +1,30 @@ +# Release Notes for PR #96: Configurable drain delay + +## New Feature + +### What Changed +Added a new optional `drainDelaySeconds` field to the RestateDeployment CRD's +`spec.restate` section. This controls how long the operator waits after a +deployment is drained before removing the old version. Previously hardcoded to +5 minutes (300 seconds). + +### Why This Matters +The default 5-minute safety buffer isn't always appropriate. Some environments +may want a longer window before old versions are cleaned up, while others may +want a shorter one. + +### Impact on Users +- **Existing deployments**: No impact. The default remains 300 seconds (5 minutes). +- **New deployments**: Can now configure the drain delay per RestateDeployment. + +### Migration Guidance +No migration needed. To configure a custom drain delay: + +```yaml +spec: + restate: + drainDelaySeconds: 600 # 10 minutes +``` + +### Related Issues +- PR #96: Make drain delay configurable via drainDelaySeconds field diff --git a/src/controllers/restatedeployment/reconcilers/replicaset.rs b/src/controllers/restatedeployment/reconcilers/replicaset.rs index e2d9420..f1351cf 100644 --- a/src/controllers/restatedeployment/reconcilers/replicaset.rs +++ b/src/controllers/restatedeployment/reconcilers/replicaset.rs @@ -359,12 +359,13 @@ pub async fn cleanup_old_replicasets( } (None, _, true) => { // endpoint exists and there's no valid remove_version_at annotation, create one - debug!( - "Scheduling removal (after drain delay) of old ReplicaSet {} in namespace {namespace}", - rs_name, - ); - let drain_delay_seconds = rsd.spec.restate.drain_delay_seconds(); + info!( + replicaset = %rs_name, + namespace = %namespace, + drain_delay_seconds, + "Scheduling removal of old ReplicaSet (after drain delay)" + ); let remove_at = chrono::Utc::now() .checked_add_signed(chrono::TimeDelta::seconds(drain_delay_seconds)) .expect("remove_version_at in bounds");