Skip to content

Commit

Permalink
Remove clear-previous-error and use enable-cr-debug-metadata instead
Browse files Browse the repository at this point in the history
Signed-off-by: Thomas Newton <thomas.w.newton@gmail.com>
  • Loading branch information
Tom-Newton committed Jan 16, 2024
1 parent 216e1a3 commit 6c5650c
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 27 deletions.
4 changes: 1 addition & 3 deletions flytepropeller/pkg/controller/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ var (
DefaultMaxAttempts: 1,
IgnoreRetryCause: false,
EnableCRDebugMetadata: false,
ClearPreviousError: false,
},
MaxStreakLength: 8, // Turbo mode is enabled by default
ProfilerPort: config.Port{
Expand Down Expand Up @@ -215,8 +214,7 @@ type NodeConfig struct {
InterruptibleFailureThreshold int32 `json:"interruptible-failure-threshold" pflag:"1,number of failures for a node to be still considered interruptible. Negative numbers are treated as complementary (ex. -1 means last attempt is non-interruptible).'"`
DefaultMaxAttempts int32 `json:"default-max-attempts" pflag:"3,Default maximum number of attempts for a node"`
IgnoreRetryCause bool `json:"ignore-retry-cause" pflag:",Ignore retry cause and count all attempts toward a node's max attempts"`
EnableCRDebugMetadata bool `json:"enable-cr-debug-metadata" pflag:",Collapse node on any terminal state, not just successful terminations. This is useful to reduce the size of workflow state in etcd."`
ClearPreviousError bool `json:"clear-previous-error" pflag:",When using the fail after executable nodes complete mode many nodes in the workflow can fail. This option causes the previous error to be cleared every time a new error occurs. This helps reduce the size of workflow state in etcd."`
EnableCRDebugMetadata bool `json:"enable-cr-debug-metadata" pflag:",By default node state gets cleared after flytepropeller will no longer need it. This is useful to reduce the size of workflow state in etcd. Consider enabling this to keep this state for debugging purposes."`
}

// DefaultDeadlines contains default values for timeouts
Expand Down
3 changes: 1 addition & 2 deletions flytepropeller/pkg/controller/config/config_flags.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 0 additions & 14 deletions flytepropeller/pkg/controller/config/config_flags_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions flytepropeller/pkg/controller/nodes/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,6 @@ type nodeExecutor struct {
catalog catalog.Client
clusterID string
enableCRDebugMetadata bool
clearPreviousError bool
defaultActiveDeadline time.Duration
defaultDataSandbox storage.DataReference
defaultExecutionDeadline time.Duration
Expand Down Expand Up @@ -870,7 +869,7 @@ func (c *nodeExecutor) execute(ctx context.Context, h interfaces.NodeHandler, nC
}

func (c *nodeExecutor) Clear(executableNodeStatus v1alpha1.ExecutableNodeStatus) {
if c.clearPreviousError {
if !c.enableCRDebugMetadata {
executableNodeStatus.ClearExecutionError()
}
}
Expand Down Expand Up @@ -1449,7 +1448,6 @@ func NewExecutor(ctx context.Context, nodeConfig config.NodeConfig, store *stora
catalog: catalogClient,
clusterID: clusterID,
enableCRDebugMetadata: nodeConfig.EnableCRDebugMetadata,
clearPreviousError: nodeConfig.ClearPreviousError,
defaultActiveDeadline: nodeConfig.DefaultDeadlines.DefaultNodeActiveDeadline.Duration,
defaultDataSandbox: defaultRawOutputPrefix,
defaultExecutionDeadline: nodeConfig.DefaultDeadlines.DefaultNodeExecutionDeadline.Duration,
Expand Down
10 changes: 5 additions & 5 deletions flytepropeller/pkg/controller/workflow/executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -540,15 +540,15 @@ func TestWorkflowExecutor_HandleFlyteWorkflow_Failing(t *testing.T) {
tests := []struct {
name string
onFailurePolicy v1alpha1.WorkflowOnFailurePolicy
clearPreviousError bool
enableCRDebugMetadata bool
expectedRoundsToFail int
expectedNodesWithErrorsCount int
expectedFailedNodesCount int
}{
{"failImidiately", v1alpha1.WorkflowOnFailurePolicy(core.WorkflowMetadata_FAIL_IMMEDIATELY), false, 6, 1, 1},
{"failImidiately clearPreviousError", v1alpha1.WorkflowOnFailurePolicy(core.WorkflowMetadata_FAIL_IMMEDIATELY), true, 6, 1, 1},
{"failAfterExecutableNodesComplete", v1alpha1.WorkflowOnFailurePolicy(core.WorkflowMetadata_FAIL_AFTER_EXECUTABLE_NODES_COMPLETE), false, 12, 2, 2},
{"failAfterExecutableNodesComplete clearPreviousError", v1alpha1.WorkflowOnFailurePolicy(core.WorkflowMetadata_FAIL_AFTER_EXECUTABLE_NODES_COMPLETE), true, 12, 1, 2},
{"failImidiately enableCRDebugMetadata", v1alpha1.WorkflowOnFailurePolicy(core.WorkflowMetadata_FAIL_IMMEDIATELY), true, 6, 1, 1},
{"failAfterExecutableNodesComplete", v1alpha1.WorkflowOnFailurePolicy(core.WorkflowMetadata_FAIL_AFTER_EXECUTABLE_NODES_COMPLETE), false, 12, 1, 2},
{"failAfterExecutableNodesComplete enableCRDebugMetadata", v1alpha1.WorkflowOnFailurePolicy(core.WorkflowMetadata_FAIL_AFTER_EXECUTABLE_NODES_COMPLETE), true, 12, 2, 2},
}

wJSON, err := yamlutils.ReadYamlFileAsJSON("testdata/benchmark_wf.yaml")
Expand All @@ -557,7 +557,7 @@ func TestWorkflowExecutor_HandleFlyteWorkflow_Failing(t *testing.T) {

t.Run(test.name, func(t *testing.T) {
nodeConfig := config.GetConfig().NodeConfig
nodeConfig.ClearPreviousError = test.clearPreviousError
nodeConfig.EnableCRDebugMetadata = test.enableCRDebugMetadata
nodeExec, err := nodes.NewExecutor(ctx, nodeConfig, store, enqueueWorkflow, eventSink, adminClient, adminClient,
maxOutputSize, "s3://bucket", fakeKubeClient, catalogClient, recoveryClient, eventConfig, testClusterID, signalClient, handlerFactory, promutils.NewTestScope())
assert.NoError(t, err)
Expand Down

0 comments on commit 6c5650c

Please sign in to comment.