Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions crd/RestateDeployment.pkl
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ class Restate {

/// Force the use of HTTP/1.1 when registering with Restate
useHttp11: Boolean?

/// Seconds to wait before removing old versions after they are drained. Defaults to 300 (5
/// minutes).
drainDelaySeconds: Int?
}

/// The location of the Restate Admin API to register this deployment against
Expand Down
8 changes: 8 additions & 0 deletions crd/restatedeployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,14 @@ spec:
restate:
description: Restate specific configuration
properties:
drainDelaySeconds:
description: |-
Seconds to wait before removing old versions after they are drained.
Defaults to 300 (5 minutes).
format: int64
minimum: 0.0
nullable: true
type: integer
register:
description: The location of the Restate Admin API to register this deployment against
oneOf:
Expand Down
30 changes: 30 additions & 0 deletions release-notes/unreleased/96-configurable-drain-delay.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Release Notes for PR #96: Configurable drain delay

## New Feature

### What Changed
Added a new optional `drainDelaySeconds` field to the RestateDeployment CRD's
`spec.restate` section. This controls how long the operator waits after a
deployment is drained before removing the old version. Previously hardcoded to
5 minutes (300 seconds).

### Why This Matters
The default 5-minute safety buffer isn't always appropriate. Some environments
may want a longer window before old versions are cleaned up, while others may
want a shorter one.

### Impact on Users
- **Existing deployments**: No impact. The default remains 300 seconds (5 minutes).
- **New deployments**: Can now configure the drain delay per RestateDeployment.

### Migration Guidance
No migration needed. To configure a custom drain delay:

```yaml
spec:
restate:
drainDelaySeconds: 600 # 10 minutes
```

### Related Issues
- PR #96: Make drain delay configurable via drainDelaySeconds field
5 changes: 3 additions & 2 deletions src/controllers/restatedeployment/reconcilers/knative.rs
Original file line number Diff line number Diff line change
Expand Up @@ -929,15 +929,16 @@ pub async fn cleanup_old_configurations(
}
(None, _, true) => {
// endpoint exists and there's no valid remove_version_at annotation, create one
let drain_delay_seconds = rsd.spec.restate.drain_delay_seconds();
info!(
configuration = %config_name,
namespace = %namespace,
drain_delay = "5 minutes",
drain_delay_seconds,
"Scheduling removal of old Configuration (after drain delay)"
);

let remove_at = chrono::Utc::now()
.checked_add_signed(chrono::TimeDelta::minutes(5)) // Same as ReplicaSet cleanup
.checked_add_signed(chrono::TimeDelta::seconds(drain_delay_seconds))
.expect("remove_version_at in bounds");

let config_api: Api<Configuration> = Api::namespaced(ctx.client.clone(), namespace);
Expand Down
12 changes: 7 additions & 5 deletions src/controllers/restatedeployment/reconcilers/replicaset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,13 +359,15 @@ pub async fn cleanup_old_replicasets(
}
(None, _, true) => {
// endpoint exists and there's no valid remove_version_at annotation, create one
debug!(
"Scheduling removal (after drain delay) of old ReplicaSet {} in namespace {namespace}",
rs_name,
let drain_delay_seconds = rsd.spec.restate.drain_delay_seconds();
info!(
replicaset = %rs_name,
namespace = %namespace,
drain_delay_seconds,
"Scheduling removal of old ReplicaSet (after drain delay)"
);

let remove_at = chrono::Utc::now()
.checked_add_signed(chrono::TimeDelta::minutes(5)) // todo configurable?
.checked_add_signed(chrono::TimeDelta::seconds(drain_delay_seconds))
.expect("remove_version_at in bounds");

let params = PatchParams::apply("restate-operator/remove-version-at").force();
Expand Down
12 changes: 12 additions & 0 deletions src/resources/restatedeployments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,18 @@ pub struct RestateSpec {
/// Force the use of HTTP/1.1 when registering with Restate
#[serde(skip_serializing_if = "Option::is_none")]
pub use_http11: Option<bool>,

/// Seconds to wait before removing old versions after they are drained.
/// Defaults to 300 (5 minutes).
#[serde(skip_serializing_if = "Option::is_none")]
#[schemars(range(min = 0))]
pub drain_delay_seconds: Option<i64>,
}

impl RestateSpec {
pub fn drain_delay_seconds(&self) -> i64 {
self.drain_delay_seconds.unwrap_or(300).max(0)
}
}

/// The location of the Restate Admin API to register this deployment against
Expand Down
Loading