From 4cef464f643bfae975280a32a468221c012e191d Mon Sep 17 00:00:00 2001 From: Apoorv Mahajan Date: Tue, 19 Oct 2021 00:33:06 +0530 Subject: [PATCH] feat(orca/clouddriver): make waitOnJobCompletion retries configurable --- .../tasks/job/WaitOnJobCompletion.groovy | 12 +++++-- .../config/TaskConfigurationProperties.java | 16 +++++++++ .../tasks/job/WaitOnJobCompletionTest.java | 35 +++++++++++++------ 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/job/WaitOnJobCompletion.groovy b/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/job/WaitOnJobCompletion.groovy index 605bc07d71..252100e98d 100644 --- a/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/job/WaitOnJobCompletion.groovy +++ b/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/job/WaitOnJobCompletion.groovy @@ -147,7 +147,11 @@ public class WaitOnJobCompletion implements CloudProviderAware, OverridableTimeo InputStream jobStream retrySupport.retry({ jobStream = katoRestService.collectJob(appName, account, location, name).body.in() - }, 6, 5000, false) // retry for 30 seconds + }, + configProperties.getJobStatusRetry().maxAttempts, + Duration.ofMillis(configProperties.getJobStatusRetry().getBackOffInMs()), + configProperties.getJobStatusRetry().exponentialBackoffEnabled + ) Map job = objectMapper.readValue(jobStream, new TypeReference() {}) outputs.jobStatus = job @@ -169,7 +173,11 @@ public class WaitOnJobCompletion implements CloudProviderAware, OverridableTimeo try { retrySupport.retry({ properties = katoRestService.getFileContents(appName, account, location, name, stage.context.propertyFile) - }, 6, 5000, false) // retry for 30 seconds + }, + configProperties.getFileContentRetry().maxAttempts, + Duration.ofMillis(configProperties.getFileContentRetry().getBackOffInMs()), + configProperties.getFileContentRetry().exponentialBackoffEnabled + ) } catch (Exception e) { if (status == ExecutionStatus.SUCCEEDED) { throw new ConfigurationException("Property File: ${stage.context.propertyFile} contents could not be retrieved. Error: " + e) diff --git a/orca-clouddriver/src/main/java/com/netflix/spinnaker/orca/clouddriver/config/TaskConfigurationProperties.java b/orca-clouddriver/src/main/java/com/netflix/spinnaker/orca/clouddriver/config/TaskConfigurationProperties.java index 95d074334b..afaa4b4048 100644 --- a/orca-clouddriver/src/main/java/com/netflix/spinnaker/orca/clouddriver/config/TaskConfigurationProperties.java +++ b/orca-clouddriver/src/main/java/com/netflix/spinnaker/orca/clouddriver/config/TaskConfigurationProperties.java @@ -47,6 +47,22 @@ public static class WaitOnJobCompletionTaskConfig { * Default or empty set means that no keys will be excluded. */ private Set excludeKeysFromOutputs = Set.of(); + + private Retries jobStatusRetry = new Retries(); + + private Retries fileContentRetry = new Retries(); + + @Data + public static class Retries { + // total number of attempts + int maxAttempts = 6; + + // time in ms to wait before subsequent retry attempts + long backOffInMs = 5000; + + // flag to enable exponential backoff + boolean exponentialBackoffEnabled = false; + } } @Data diff --git a/orca-clouddriver/src/test/java/com/netflix/spinnaker/orca/clouddriver/tasks/job/WaitOnJobCompletionTest.java b/orca-clouddriver/src/test/java/com/netflix/spinnaker/orca/clouddriver/tasks/job/WaitOnJobCompletionTest.java index 196ed9fc2c..09faa83b66 100644 --- a/orca-clouddriver/src/test/java/com/netflix/spinnaker/orca/clouddriver/tasks/job/WaitOnJobCompletionTest.java +++ b/orca-clouddriver/src/test/java/com/netflix/spinnaker/orca/clouddriver/tasks/job/WaitOnJobCompletionTest.java @@ -73,15 +73,22 @@ public final class WaitOnJobCompletionTest { public void setup() { objectMapper = new ObjectMapper(); RetrySupport retrySupport = new RetrySupport(); - configProperties = new TaskConfigurationProperties(); - configProperties - .getWaitOnJobCompletionTask() - .setExcludeKeysFromOutputs(Set.of("completionDetails")); mockKatoRestService = mock(KatoRestService.class); JobUtils mockJobUtils = mock(JobUtils.class); mockExecutionRepository = mock(ExecutionRepository.class); mockFront50Service = mock(Front50Service.class); + configProperties = new TaskConfigurationProperties(); + TaskConfigurationProperties.WaitOnJobCompletionTaskConfig.Retries retries = + new TaskConfigurationProperties.WaitOnJobCompletionTaskConfig.Retries(); + retries.setMaxAttempts(3); + retries.setBackOffInMs(1); + configProperties.getWaitOnJobCompletionTask().setFileContentRetry(retries); + configProperties.getWaitOnJobCompletionTask().setJobStatusRetry(retries); + configProperties + .getWaitOnJobCompletionTask() + .setExcludeKeysFromOutputs(Set.of("completionDetails")); + task = new WaitOnJobCompletion( mockKatoRestService, @@ -289,8 +296,13 @@ void testPropertyFileContentsErrorHandlingForASuccessfulK8sRunJob() throws IOExc verify(mockKatoRestService, times(1)) .collectJob(eq("test-app"), eq("test-account"), eq("test"), eq("job testrep")); - // since there are 6 tries made for this call if it fails - verify(mockKatoRestService, times(6)) + verify( + mockKatoRestService, + times( + configProperties + .getWaitOnJobCompletionTask() + .getFileContentRetry() + .getMaxAttempts())) .getFileContents( eq("test-app"), eq("test-account"), eq("test"), eq("job testrep"), eq("testrep")); @@ -448,10 +460,13 @@ void testPropertyFileContentsErrorHandlingForK8sRunJobFailures() throws IOExcept verify(mockKatoRestService, times(1)) .collectJob(eq("test-app"), eq("test-account"), eq("test"), eq("job testrep")); - // since there are 6 tries made for this call if it fails - this is a slow call since retry - // config options are - // hard-coded - verify(mockKatoRestService, times(6)) + verify( + mockKatoRestService, + times( + configProperties + .getWaitOnJobCompletionTask() + .getFileContentRetry() + .getMaxAttempts())) .getFileContents( eq("test-app"), eq("test-account"), eq("test"), eq("job testrep"), eq("testrep"));