Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

Commit

Permalink
Switch non interruptible @ unified retry behavior
Browse files Browse the repository at this point in the history
Signed-off-by: Dennis Keck <26092524+fellhorn@users.noreply.github.com>
  • Loading branch information
fellhorn committed Aug 23, 2023
1 parent b201c6b commit 69976b1
Showing 1 changed file with 20 additions and 4 deletions.
24 changes: 20 additions & 4 deletions pkg/controller/nodes/node_exec_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/flyteorg/flyteplugins/go/tasks/pluginmachinery/io"
"github.com/flyteorg/flyteplugins/go/tasks/pluginmachinery/ioutils"
"github.com/flyteorg/flytepropeller/pkg/controller/config"

"github.com/flyteorg/flytepropeller/events"
"github.com/flyteorg/flytepropeller/pkg/apis/flyteworkflow/v1alpha1"
Expand Down Expand Up @@ -216,10 +217,25 @@ func (c *nodeExecutor) newNodeExecContextDefault(ctx context.Context, currentNod

s := nl.GetNodeExecutionStatus(ctx, currentNodeID)

// a node is not considered interruptible if the system failures have exceeded the configured threshold
if interruptible && s.GetSystemFailures() >= c.interruptibleFailureThreshold {
interruptible = false
c.metrics.InterruptedThresholdHit.Inc(ctx)
if config.GetConfig().NodeConfig.IgnoreRetryCause {
// For the unified retry behavior we execute the last interruptibleFailureThreshold attempts on a non
// interruptible machine
currentAttempt := s.GetAttempts() + 1 + s.GetSystemFailures()
maxAttempts := uint32(config.GetConfig().NodeConfig.DefaultMaxAttempts)
if n.GetRetryStrategy() != nil && n.GetRetryStrategy().MinAttempts != nil && *n.GetRetryStrategy().MinAttempts != 0 {
maxAttempts = uint32(*n.GetRetryStrategy().MinAttempts)
}

if interruptible && currentAttempt >= maxAttempts-c.interruptibleFailureThreshold {
interruptible = false
c.metrics.InterruptedThresholdHit.Inc(ctx)
}
} else {
// Else a node is not considered interruptible if the system failures have exceeded the configured threshold
if interruptible && s.GetSystemFailures() >= c.interruptibleFailureThreshold {
interruptible = false
c.metrics.InterruptedThresholdHit.Inc(ctx)
}
}

rawOutputPrefix := c.defaultDataSandbox
Expand Down

0 comments on commit 69976b1

Please sign in to comment.