From 0bce9240be64eb114d95f0819091a7fd73fd87a0 Mon Sep 17 00:00:00 2001 From: Tom Harding Date: Fri, 6 Feb 2026 15:10:29 +0100 Subject: [PATCH 1/7] Make it so --- .../iac/concepts/resources/options/hooks.md | 163 ++++++++++++++++++ 1 file changed, 163 insertions(+) diff --git a/content/docs/iac/concepts/resources/options/hooks.md b/content/docs/iac/concepts/resources/options/hooks.md index 6c2747914555..8d58cf56283f 100644 --- a/content/docs/iac/concepts/resources/options/hooks.md +++ b/content/docs/iac/concepts/resources/options/hooks.md @@ -503,3 +503,166 @@ In order for delete hooks to run successfully, Pulumi must have access to any ne * When removing resources from your program, first remove *only* the resources you wish to delete, *leaving any delete hooks in place*. Upon running e.g. `pulumi up`, Pulumi will delete the resources and run any relevant delete hooks. Once this operation is complete, you can then remove the delete hooks from your program. * When running `pulumi destroy`, you must pass the `--run-program` flag to instruct Pulumi to run your program and register any hooks that are to be executed. If Pulumi detects that you are trying to `destroy` a stack that contains hooks _without_ the `--run-program` flag, it will fail with an error. + +## Error hooks + +Just as the other resource hooks can be executed before and after certain operations, you can also add hooks to run when operations fail. For example, to retry a failing resource registration, or to implement change the error-handling behaviour based on the type of error encountered. The inputs and outputs received will depend on the operation that fails: + +| Failed operation | Old inputs | New inputs | Old outputs | +|------------------|------------|------------|-------------| +| `create` | | ✓ | | +| `update` | ✓ | ✓ | ✓ | +| `delete` | ✓ | | ✓ | + +As well as the standard hook information and the name of the failing operation, error hooks also receive a list of errors encountered during previous runs (starting with the most recent). In other words, if a resource has failed three times, the hook receives three errors. The hook must then reply with a flag that determines whether to retry the operation, or whether to let the failure cascade and exit the program. + +{{< chooser language "typescript,python,go,csharp" >}} + +{{% choosable language typescript %}} +```typescript +import * as pulumi from "@pulumi/pulumi"; +import * as aws from "@pulumi/aws"; + +const notStartedRetryHook = new pulumi.ErrorHook( + "retry-when-not-started", + async (args) => { + const latestError = args.errors[0] ?? ""; + + if (!latestError.includes("resource has not yet started")) { + return false; // this is another type of error + } + + await new Promise((resolve) => setTimeout(resolve, 1000)); + return true; + }, +); + +const bucket = new aws.s3.Bucket("example-bucket", {}, { + hooks: { + onError: [notStartedRetryHook], + }, +}); +``` +{{% /choosable %}} + +{{% choosable language python %}} +```python +import time + +import pulumi +from pulumi_aws import s3 + + +def retry_when_not_started(args: pulumi.ErrorHookArgs) -> bool: + latest_error = args.errors[0] if args.errors else "" + + if "resource has not yet started" not in latest_error: + return False + + time.sleep(5) + return True + + +not_started_retry_hook = pulumi.ErrorHook( + "retry-when-not-started", + retry_when_not_started, +) + +bucket = s3.Bucket( + "example-bucket", + opts=pulumi.ResourceOptions( + hooks=pulumi.ResourceHookBinding( + on_error=[not_started_retry_hook], + ), + ), +) +``` +{{% /choosable %}} + +{{% choosable language go %}} +```go +package main + +import ( + "strings" + "time" + + "github.com/pulumi/pulumi-aws/sdk/v6/go/aws/s3" + "github.com/pulumi/pulumi/sdk/v3/go/pulumi" +) + +func main() { + pulumi.Run(func(ctx *pulumi.Context) error { + hook, err := ctx.RegisterErrorHook( + "retry-when-not-started", + func(args *pulumi.ErrorHookArgs) (bool, error) { + latest := "" + if len(args.Errors) > 0 { + latest = args.Errors[0] + } + + if !strings.Contains(latest, "resource has not yet started") { + return false, nil + } + + time.Sleep(5 * time.Second) + return true, nil + }, + ) + if err != nil { + return err + } + + _, err = s3.NewBucket(ctx, "example-bucket", nil, pulumi.ResourceHooks(&pulumi.ResourceHookBinding{ + OnError: []*pulumi.ErrorHook{hook}, + })) + if err != nil { + return err + } + + return nil + }) +} +``` +{{% /choosable %}} + +{{% choosable language csharp %}} +```csharp +using System; +using System.Threading; +using System.Threading.Tasks; +using Pulumi; +using Pulumi.Aws.S3; + +class ErrorHookStack : Stack +{ + public ErrorHookStack() + { + var retryHook = new ErrorHook( + "retry-when-not-started", + async (args, cancellationToken) => + { + var latestError = args.Errors.Count > 0 ? args.Errors[0] : ""; + + if (!latestError.Contains("resource has not yet started")) + { + return false; + } + + await Task.Delay(TimeSpan.FromSeconds(5), cancellationToken); + return true; + }); + + var bucket = new Bucket("example-bucket", new BucketArgs(), new CustomResourceOptions + { + Hooks = new ResourceHookBinding + { + OnError = { retryHook }, + }, + }); + } +} +``` +{{% /choosable %}} + +{{< /chooser >}} From 7ef2976ae0eebf5e16c58c600151a007672bddb2 Mon Sep 17 00:00:00 2001 From: Tom Harding Date: Fri, 6 Feb 2026 15:15:47 +0100 Subject: [PATCH 2/7] Be nice to AWS --- .../docs/iac/concepts/resources/options/hooks.md | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/content/docs/iac/concepts/resources/options/hooks.md b/content/docs/iac/concepts/resources/options/hooks.md index 8d58cf56283f..14cd1161f18a 100644 --- a/content/docs/iac/concepts/resources/options/hooks.md +++ b/content/docs/iac/concepts/resources/options/hooks.md @@ -521,7 +521,6 @@ As well as the standard hook information and the name of the failing operation, {{% choosable language typescript %}} ```typescript import * as pulumi from "@pulumi/pulumi"; -import * as aws from "@pulumi/aws"; const notStartedRetryHook = new pulumi.ErrorHook( "retry-when-not-started", @@ -537,7 +536,7 @@ const notStartedRetryHook = new pulumi.ErrorHook( }, ); -const bucket = new aws.s3.Bucket("example-bucket", {}, { +const res = new MyResource("res", {}, { hooks: { onError: [notStartedRetryHook], }, @@ -550,7 +549,6 @@ const bucket = new aws.s3.Bucket("example-bucket", {}, { import time import pulumi -from pulumi_aws import s3 def retry_when_not_started(args: pulumi.ErrorHookArgs) -> bool: @@ -568,8 +566,8 @@ not_started_retry_hook = pulumi.ErrorHook( retry_when_not_started, ) -bucket = s3.Bucket( - "example-bucket", +res = MyResource( + "res", opts=pulumi.ResourceOptions( hooks=pulumi.ResourceHookBinding( on_error=[not_started_retry_hook], @@ -587,7 +585,6 @@ import ( "strings" "time" - "github.com/pulumi/pulumi-aws/sdk/v6/go/aws/s3" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) @@ -613,7 +610,7 @@ func main() { return err } - _, err = s3.NewBucket(ctx, "example-bucket", nil, pulumi.ResourceHooks(&pulumi.ResourceHookBinding{ + _, err = NewMyResource(ctx, "res", &MyResourceArgs{}, pulumi.ResourceHooks(&pulumi.ResourceHookBinding{ OnError: []*pulumi.ErrorHook{hook}, })) if err != nil { @@ -632,7 +629,6 @@ using System; using System.Threading; using System.Threading.Tasks; using Pulumi; -using Pulumi.Aws.S3; class ErrorHookStack : Stack { @@ -653,7 +649,7 @@ class ErrorHookStack : Stack return true; }); - var bucket = new Bucket("example-bucket", new BucketArgs(), new CustomResourceOptions + var res = new MyResource("res", new MyResourceArgs(), new CustomResourceOptions { Hooks = new ResourceHookBinding { From acc2e3165e7d0349d02d43f828e419665df1c0d8 Mon Sep 17 00:00:00 2001 From: Tom Harding Date: Fri, 6 Feb 2026 16:50:16 +0100 Subject: [PATCH 3/7] WIP --- .../blog/handling-deployment-errors/index.md | 180 ++++++++++++++++++ .../iac/concepts/resources/options/hooks.md | 18 +- 2 files changed, 189 insertions(+), 9 deletions(-) create mode 100644 content/blog/handling-deployment-errors/index.md diff --git a/content/blog/handling-deployment-errors/index.md b/content/blog/handling-deployment-errors/index.md new file mode 100644 index 000000000000..852d39747af2 --- /dev/null +++ b/content/blog/handling-deployment-errors/index.md @@ -0,0 +1,180 @@ +--- +title: "New in Pulumi IaC: `onError` Resource Hook" +date: 2026-02-06 +meta_desc: "You can now use the `onError` resource hook to control the retry behaviour of failing resource registrations" +meta_image: meta.png +authors: + - tom-harding +tags: + - features + - iac + - releases +social: + twitter: "New in Pulumi IaC: the new `onError` hook gives you full control over deployment failures" + linkedin: "Pulumi introduces a new type of resource hook: the `onError` hook, letting you control the retry behaviour of resources that fail to create." +--- + +Last year, Pulumi IaC introduced the [resource hooks](/blog/resource-hooks/) feature, allowing you to run custom code at different points in the lifecycle of resources. Today, we'd like to talk about the latest addition to these hooks: the `onError` hook. + + + +## Recovering from errors + +A Pulumi program will run for as long as things go according to plan. When an error is encountered, this error is reported back to us with information about what went wrong. However, sometimes, these errors are intermitent or temporary. + +For this blog, we'll look at a common example: resource readiness. Often, we want to create resources that depend on things like DNS propagation, or the readiness state of other servers. In these cases, a Pulumi program can fail simply because we executed the program too quickly! + +In this case, we often don't want the program to fail - we just want to wait for a period of time and retry the operation. This is where the `onError` hook can help us: + +{{< chooser language "typescript,python,go,csharp" >}} + +{{% choosable language typescript %}} +```typescript +import * as pulumi from "@pulumi/pulumi"; + +const notStartedRetryHook = new pulumi.ErrorHook( + "retry-when-not-started", + async (args) => { + const latestError = args.errors[0] ?? ""; + + if (!latestError.includes("resource has not yet started")) { + return false; // do not retry, this is another type of error + } + + await new Promise((resolve) => setTimeout(resolve, 1000)); + return true; // retry + }, +); + +const res = new MyResource("res", {}, { + hooks: { + onError: [notStartedRetryHook], + }, +}); +``` +{{% /choosable %}} + +{{% choosable language python %}} +```python +import time + +import pulumi + + +def retry_when_not_started(args: pulumi.ErrorHookArgs) -> bool: + latest_error = args.errors[0] if args.errors else "" + + if "resource has not yet started" not in latest_error: + return False # do not retry, this is another type of error + + time.sleep(5) + return True # retry + + +not_started_retry_hook = pulumi.ErrorHook( + "retry-when-not-started", + retry_when_not_started, +) + +res = MyResource( + "res", + opts=pulumi.ResourceOptions( + hooks=pulumi.ResourceHookBinding( + on_error=[not_started_retry_hook], + ), + ), +) +``` +{{% /choosable %}} + +{{% choosable language go %}} +```go +package main + +import ( + "strings" + "time" + + "github.com/pulumi/pulumi/sdk/v3/go/pulumi" +) + +func main() { + pulumi.Run(func(ctx *pulumi.Context) error { + hook, err := ctx.RegisterErrorHook( + "retry-when-not-started", + func(args *pulumi.ErrorHookArgs) (bool, error) { + latest := "" + if len(args.Errors) > 0 { + latest = args.Errors[0] + } + + if !strings.Contains(latest, "resource has not yet started") { + return false, nil // do not retry, this is another type of error + } + + time.Sleep(5 * time.Second) + return true, nil // retry + }, + ) + if err != nil { + return err + } + + _, err = NewMyResource(ctx, "res", &MyResourceArgs{}, pulumi.ResourceHooks(&pulumi.ResourceHookBinding{ + OnError: []*pulumi.ErrorHook{hook}, + })) + if err != nil { + return err + } + + return nil + }) +} +``` +{{% /choosable %}} + +{{% choosable language csharp %}} +```csharp +using System; +using System.Threading; +using System.Threading.Tasks; +using Pulumi; + +class ErrorHookStack : Stack +{ + public ErrorHookStack() + { + var retryHook = new ErrorHook( + "retry-when-not-started", + async (args, cancellationToken) => + { + var latestError = args.Errors.Count > 0 ? args.Errors[0] : ""; + + if (!latestError.Contains("resource has not yet started")) + { + return false; // do not retry, this is another type of error + } + + await Task.Delay(TimeSpan.FromSeconds(5), cancellationToken); + return true; // retry + }); + + var res = new MyResource("res", new MyResourceArgs(), new CustomResourceOptions + { + Hooks = new ResourceHookBinding + { + OnError = { retryHook }, + }, + }); + } +} +``` +{{% /choosable %}} + +{{< /chooser >}} + +Each time the operation fails, the new error will be passed along with all the previous attempts' errors (newest first) to the error hook. The hook should then return either true or false to tell Pulumi whether to retry the operation or not. If we decide not to retry the operation, the program will fail as normal, with the most recent error being shown as the reason for failure. What the error hook does is up to you: the number of errors can be used to determine how many times the operation has failed, which means we can implement systems like exponential backoff or maximum retries as error hooks! + +## Next steps + +This feature is fully supported in our Node, Python, and Go SDKs as of v3.219.0, with .NET support landing in the next release. For more information, see the [hooks documentation](/docs/iac/concepts/resources/options/hooks/). Thanks for reading, and feel free to reach out with any questions via [GitHub](https://github.com/pulumi/pulumi), [X](https://x.com/pulumicorp), or our [Community Slack](https://slack.pulumi.com/). diff --git a/content/docs/iac/concepts/resources/options/hooks.md b/content/docs/iac/concepts/resources/options/hooks.md index 14cd1161f18a..0fdb8cd4ae25 100644 --- a/content/docs/iac/concepts/resources/options/hooks.md +++ b/content/docs/iac/concepts/resources/options/hooks.md @@ -514,7 +514,7 @@ Just as the other resource hooks can be executed before and after certain operat | `update` | ✓ | ✓ | ✓ | | `delete` | ✓ | | ✓ | -As well as the standard hook information and the name of the failing operation, error hooks also receive a list of errors encountered during previous runs (starting with the most recent). In other words, if a resource has failed three times, the hook receives three errors. The hook must then reply with a flag that determines whether to retry the operation, or whether to let the failure cascade and exit the program. +As well as the standard hook information and the name of the failing operation, error hooks also receive a list of errors encountered during previous runs (starting with the most recent). In other words, if a resource has failed three times, the hook receives three errors. The hook must then reply with a flag that determines whether to retry the operation, or whether to let the failure cascade and exit the program. Note that an operation can be retried up to 100 times. {{< chooser language "typescript,python,go,csharp" >}} @@ -528,11 +528,11 @@ const notStartedRetryHook = new pulumi.ErrorHook( const latestError = args.errors[0] ?? ""; if (!latestError.includes("resource has not yet started")) { - return false; // this is another type of error + return false; // do not retry, this is another type of error } await new Promise((resolve) => setTimeout(resolve, 1000)); - return true; + return true; // retry }, ); @@ -555,10 +555,10 @@ def retry_when_not_started(args: pulumi.ErrorHookArgs) -> bool: latest_error = args.errors[0] if args.errors else "" if "resource has not yet started" not in latest_error: - return False + return False # do not retry, this is another type of error time.sleep(5) - return True + return True # retry not_started_retry_hook = pulumi.ErrorHook( @@ -599,11 +599,11 @@ func main() { } if !strings.Contains(latest, "resource has not yet started") { - return false, nil + return false, nil // do not retry, this is another type of error } time.Sleep(5 * time.Second) - return true, nil + return true, nil // retry }, ) if err != nil { @@ -642,11 +642,11 @@ class ErrorHookStack : Stack if (!latestError.Contains("resource has not yet started")) { - return false; + return false; // do not retry, this is another type of error } await Task.Delay(TimeSpan.FromSeconds(5), cancellationToken); - return true; + return true; // retry }); var res = new MyResource("res", new MyResourceArgs(), new CustomResourceOptions From 753a89aa1d8b5352fcc10f2742f223b0b9087275 Mon Sep 17 00:00:00 2001 From: Tom Harding Date: Fri, 6 Feb 2026 16:57:08 +0100 Subject: [PATCH 4/7] Yes? --- content/blog/handling-deployment-errors/index.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/content/blog/handling-deployment-errors/index.md b/content/blog/handling-deployment-errors/index.md index 852d39747af2..0f1ef7daa07f 100644 --- a/content/blog/handling-deployment-errors/index.md +++ b/content/blog/handling-deployment-errors/index.md @@ -14,17 +14,13 @@ social: linkedin: "Pulumi introduces a new type of resource hook: the `onError` hook, letting you control the retry behaviour of resources that fail to create." --- -Last year, Pulumi IaC introduced the [resource hooks](/blog/resource-hooks/) feature, allowing you to run custom code at different points in the lifecycle of resources. Today, we'd like to talk about the latest addition to these hooks: the `onError` hook. +Last year, Pulumi IaC introduced the [resource hooks](/blog/resource-hooks/) feature, allowing you to run custom code at different points in the lifecycle of resources. Today, we'd like to introduce the latest addition to these hooks: the `onError` hook. ## Recovering from errors -A Pulumi program will run for as long as things go according to plan. When an error is encountered, this error is reported back to us with information about what went wrong. However, sometimes, these errors are intermitent or temporary. - -For this blog, we'll look at a common example: resource readiness. Often, we want to create resources that depend on things like DNS propagation, or the readiness state of other servers. In these cases, a Pulumi program can fail simply because we executed the program too quickly! - -In this case, we often don't want the program to fail - we just want to wait for a period of time and retry the operation. This is where the `onError` hook can help us: +When a Pulumi program encounters an error while creating, updating, or deleting a resource, this error halts the operation and the error is reported back to us with information about what went wrong. However, this isn't always what we want: sometimes, these errors are intermitent or temporary. For this blog, we'll look at a common example: resource readiness. Often, we want to create resources that depend on things like DNS propagation, or the readiness state of other servers. In these cases, a Pulumi program can fail simply because we executed the program too quickly! In this case, we often don't want the program to fail - we just want to wait for a period of time and retry the operation. This is where the `onError` hook can help us: {{< chooser language "typescript,python,go,csharp" >}} @@ -173,7 +169,7 @@ class ErrorHookStack : Stack {{< /chooser >}} -Each time the operation fails, the new error will be passed along with all the previous attempts' errors (newest first) to the error hook. The hook should then return either true or false to tell Pulumi whether to retry the operation or not. If we decide not to retry the operation, the program will fail as normal, with the most recent error being shown as the reason for failure. What the error hook does is up to you: the number of errors can be used to determine how many times the operation has failed, which means we can implement systems like exponential backoff or maximum retries as error hooks! +Each time the operation fails, the new error will be passed along with all the previous attempts' errors (newest first) to the error hook. The hook should then return either true or false to tell Pulumi whether to retry the operation or not. If we decide not to retry the operation, the program will fail as normal, with the most recent error being shown as the reason for failure. With this information, we can implement many failure models. For example, the number of errors tells us how many times the operation has failed. If all these failures have been readiness failures, we can use this to implement backoff mechanisms: perhaps we wait one second the first time, two seconds the second time, and so on. As another example, maybe we have some resource that is known to be intermitent, so we'll always retry once just in case. The callback exists in your language of choice, so you have full freedom over what and how these failures are handled. ## Next steps From f0092c20fe63d21ed8ad0f3bd96b064e79b08d00 Mon Sep 17 00:00:00 2001 From: Tom Harding Date: Fri, 6 Feb 2026 16:59:51 +0100 Subject: [PATCH 5/7] Claude --- content/docs/iac/concepts/resources/options/hooks.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/content/docs/iac/concepts/resources/options/hooks.md b/content/docs/iac/concepts/resources/options/hooks.md index 0fdb8cd4ae25..53456e4e5918 100644 --- a/content/docs/iac/concepts/resources/options/hooks.md +++ b/content/docs/iac/concepts/resources/options/hooks.md @@ -506,7 +506,7 @@ In order for delete hooks to run successfully, Pulumi must have access to any ne ## Error hooks -Just as the other resource hooks can be executed before and after certain operations, you can also add hooks to run when operations fail. For example, to retry a failing resource registration, or to implement change the error-handling behaviour based on the type of error encountered. The inputs and outputs received will depend on the operation that fails: +Just as the other resource hooks can be executed before and after certain operations, you can also add hooks to run when operations fail. For example, to retry a failing resource registration, or to change the error-handling behavior based on the type of error encountered. The inputs and outputs received will depend on the operation that fails: | Failed operation | Old inputs | New inputs | Old outputs | |------------------|------------|------------|-------------| @@ -514,7 +514,7 @@ Just as the other resource hooks can be executed before and after certain operat | `update` | ✓ | ✓ | ✓ | | `delete` | ✓ | | ✓ | -As well as the standard hook information and the name of the failing operation, error hooks also receive a list of errors encountered during previous runs (starting with the most recent). In other words, if a resource has failed three times, the hook receives three errors. The hook must then reply with a flag that determines whether to retry the operation, or whether to let the failure cascade and exit the program. Note that an operation can be retried up to 100 times. +As well as the standard hook information and the name of the failing operation, error hooks also receive a list of errors encountered during previous runs (starting with the most recent). In other words, if a resource has failed three times, the hook receives three errors. The hook must then reply with a flag that determines whether to retry the operation, or whether to let the failure cascade and exit the program. {{< chooser language "typescript,python,go,csharp" >}} @@ -531,7 +531,7 @@ const notStartedRetryHook = new pulumi.ErrorHook( return false; // do not retry, this is another type of error } - await new Promise((resolve) => setTimeout(resolve, 1000)); + await new Promise((resolve) => setTimeout(resolve, 5000)); return true; // retry }, ); @@ -662,3 +662,7 @@ class ErrorHookStack : Stack {{% /choosable %}} {{< /chooser >}} + +{{% notes type="info" %}} +An operation can only be retried a maximum of 100 times. After this, the engine will report the failure as a program failure, and the deployment will fail as normal. +{{% /notes %}} From 74c401f1992c71ca9004e7651932c44f6ff4f304 Mon Sep 17 00:00:00 2001 From: Tom Harding Date: Fri, 6 Feb 2026 17:04:45 +0100 Subject: [PATCH 6/7] Updates --- content/blog/handling-deployment-errors/index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/blog/handling-deployment-errors/index.md b/content/blog/handling-deployment-errors/index.md index 0f1ef7daa07f..8cbf2ed8c0ec 100644 --- a/content/blog/handling-deployment-errors/index.md +++ b/content/blog/handling-deployment-errors/index.md @@ -10,7 +10,7 @@ tags: - iac - releases social: - twitter: "New in Pulumi IaC: the new `onError` hook gives you full control over deployment failures" + twitter: "New in Pulumi IaC: the `onError` hook gives you full control over deployment failures" linkedin: "Pulumi introduces a new type of resource hook: the `onError` hook, letting you control the retry behaviour of resources that fail to create." --- @@ -20,7 +20,7 @@ Last year, Pulumi IaC introduced the [resource hooks](/blog/resource-hooks/) fea ## Recovering from errors -When a Pulumi program encounters an error while creating, updating, or deleting a resource, this error halts the operation and the error is reported back to us with information about what went wrong. However, this isn't always what we want: sometimes, these errors are intermitent or temporary. For this blog, we'll look at a common example: resource readiness. Often, we want to create resources that depend on things like DNS propagation, or the readiness state of other servers. In these cases, a Pulumi program can fail simply because we executed the program too quickly! In this case, we often don't want the program to fail - we just want to wait for a period of time and retry the operation. This is where the `onError` hook can help us: +When a Pulumi program encounters an error while creating, updating, or deleting a resource, this error halts the operation and the error is reported back to us with information about what went wrong. However, this isn't always what we want: sometimes, these errors are intermittent or temporary. For this blog, we'll look at a common example: resource readiness. Often, we want to create resources that depend on things like DNS propagation, or the readiness state of other servers. In these cases, a Pulumi program can fail simply because we executed the program too quickly! In this case, we often don't want the program to fail - we just want to wait for a period of time and retry the operation. This is where the `onError` hook can help us: {{< chooser language "typescript,python,go,csharp" >}} @@ -37,7 +37,7 @@ const notStartedRetryHook = new pulumi.ErrorHook( return false; // do not retry, this is another type of error } - await new Promise((resolve) => setTimeout(resolve, 1000)); + await new Promise((resolve) => setTimeout(resolve, 5000)); return true; // retry }, ); From 84bacb180a0f5c3f28d53e137c944a0ffe2cf69b Mon Sep 17 00:00:00 2001 From: Tom Harding Date: Fri, 6 Feb 2026 17:08:06 +0100 Subject: [PATCH 7/7] Lol --- content/blog/handling-deployment-errors/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/blog/handling-deployment-errors/index.md b/content/blog/handling-deployment-errors/index.md index 8cbf2ed8c0ec..b944c88422c6 100644 --- a/content/blog/handling-deployment-errors/index.md +++ b/content/blog/handling-deployment-errors/index.md @@ -169,7 +169,7 @@ class ErrorHookStack : Stack {{< /chooser >}} -Each time the operation fails, the new error will be passed along with all the previous attempts' errors (newest first) to the error hook. The hook should then return either true or false to tell Pulumi whether to retry the operation or not. If we decide not to retry the operation, the program will fail as normal, with the most recent error being shown as the reason for failure. With this information, we can implement many failure models. For example, the number of errors tells us how many times the operation has failed. If all these failures have been readiness failures, we can use this to implement backoff mechanisms: perhaps we wait one second the first time, two seconds the second time, and so on. As another example, maybe we have some resource that is known to be intermitent, so we'll always retry once just in case. The callback exists in your language of choice, so you have full freedom over what and how these failures are handled. +Each time the operation fails, the new error will be passed along with all the previous attempts' errors (newest first) to the error hook. The hook should then return either true or false to tell Pulumi whether to retry the operation or not. If we decide not to retry the operation, the program will fail as normal, with the most recent error being shown as the reason for failure. With this information, we can implement many failure models. For example, the number of errors tells us how many times the operation has failed. If all these failures have been readiness failures, we can use this to implement backoff mechanisms: perhaps we wait one second the first time, two seconds the second time, and so on. As another example, maybe we have some resource that is known to be intermittent, so we'll always retry once just in case. The callback exists in your language of choice, so you have full freedom over what and how these failures are handled. ## Next steps