diff --git a/docs/error-handling.md b/docs/error-handling.md new file mode 100644 index 00000000..f023e127 --- /dev/null +++ b/docs/error-handling.md @@ -0,0 +1,33 @@ +# Error Handling Guidelines + +This project uses standard Go error chaining and `go.uber.org/multierr` for aggregation. + +## Core Rules + +1. Wrap boundary errors with context: + - Use `fmt.Errorf(": %w", err)`. + - Include actionable fields (path, ID, resource name) when safe. +2. Aggregate multiple independent errors with `multierr.Append`: + - Keep each sub-error wrapped with operation context before appending. + - Return a single wrapped aggregate at the boundary. +3. Preserve chain semantics: + - Avoid string-only concatenation that loses causes. + - Prefer `%w` so `errors.Is` / `errors.As` remain usable. +4. Keep user-facing text stable unless behavior changes. + +## Example + +```go +var agg error + +if err := stopContainer(id); err != nil { + agg = multierr.Append(agg, fmt.Errorf("stop container %s: %w", id, err)) +} +if err := deleteNetwork(netID); err != nil { + agg = multierr.Append(agg, fmt.Errorf("delete network %s: %w", netID, err)) +} + +if agg != nil { + return fmt.Errorf("rollback failed for %s: %w", devnetName, agg) +} +``` diff --git a/internal/infrastructure/docker/orchestrator.go b/internal/infrastructure/docker/orchestrator.go index daaf0922..c11342b2 100644 --- a/internal/infrastructure/docker/orchestrator.go +++ b/internal/infrastructure/docker/orchestrator.go @@ -13,6 +13,7 @@ import ( "github.com/altuslabsxyz/devnet-builder/internal/domain/ports" "github.com/altuslabsxyz/devnet-builder/internal/infrastructure/node" "github.com/altuslabsxyz/devnet-builder/internal/output" + "go.uber.org/multierr" ) // OrchestratorImpl implements the DeploymentOrchestrator interface @@ -121,7 +122,7 @@ func (o *OrchestratorImpl) Rollback(ctx context.Context, state *ports.Deployment state.Phase = ports.PhaseRollingBack o.logger.Info("Rolling back deployment for %s", state.DevnetName) - var errs []error + var errs error // Step 1: Stop all started containers for _, containerID := range state.StartedContainers { @@ -129,7 +130,7 @@ func (o *OrchestratorImpl) Rollback(ctx context.Context, state *ports.Deployment cmd := exec.CommandContext(ctx, "docker", "stop", "-t", "5", containerID) if err := cmd.Run(); err != nil { o.logger.Warn("Failed to stop container %s: %v", containerID, err) - errs = append(errs, fmt.Errorf("stop container %s: %w", containerID, err)) + errs = multierr.Append(errs, fmt.Errorf("stop container %s: %w", containerID, err)) } } @@ -139,7 +140,7 @@ func (o *OrchestratorImpl) Rollback(ctx context.Context, state *ports.Deployment cmd := exec.CommandContext(ctx, "docker", "rm", "-f", containerID) if err := cmd.Run(); err != nil { o.logger.Warn("Failed to remove container %s: %v", containerID, err) - errs = append(errs, fmt.Errorf("remove container %s: %w", containerID, err)) + errs = multierr.Append(errs, fmt.Errorf("remove container %s: %w", containerID, err)) } } @@ -148,7 +149,7 @@ func (o *OrchestratorImpl) Rollback(ctx context.Context, state *ports.Deployment o.logger.Debug("Deleting network %s", *state.NetworkID) if err := o.networkManager.DeleteNetwork(ctx, *state.NetworkID); err != nil { o.logger.Warn("Failed to delete network %s: %v", *state.NetworkID, err) - errs = append(errs, fmt.Errorf("delete network %s: %w", *state.NetworkID, err)) + errs = multierr.Append(errs, fmt.Errorf("delete network %s: %w", *state.NetworkID, err)) } } @@ -157,12 +158,12 @@ func (o *OrchestratorImpl) Rollback(ctx context.Context, state *ports.Deployment o.logger.Debug("Releasing port allocation for %s", state.DevnetName) if err := o.portAllocator.ReleaseRange(ctx, state.DevnetName); err != nil { o.logger.Warn("Failed to release ports for %s: %v", state.DevnetName, err) - errs = append(errs, fmt.Errorf("release ports: %w", err)) + errs = multierr.Append(errs, fmt.Errorf("release ports for %s: %w", state.DevnetName, err)) } } - if len(errs) > 0 { - return &MultiError{Errors: errs} + if errs != nil { + return fmt.Errorf("rollback encountered one or more errors for %s: %w", state.DevnetName, errs) } // Clean up state file after successful rollback @@ -551,22 +552,9 @@ func (o *OrchestratorImpl) handleFailure(ctx context.Context, state *ports.Deplo if rollbackErr := o.Rollback(ctx, state); rollbackErr != nil { o.logger.Error("Rollback also failed: %v", rollbackErr) - return fmt.Errorf("deployment failed: %w (rollback also failed: %v)", err, rollbackErr) + return fmt.Errorf("deployment failed: %w; rollback also failed: %w", err, rollbackErr) } state.Phase = ports.PhaseFailed return fmt.Errorf("deployment failed: %w", err) } - -// MultiError represents multiple errors -type MultiError struct { - Errors []error -} - -func (m *MultiError) Error() string { - var msgs []string - for _, err := range m.Errors { - msgs = append(msgs, err.Error()) - } - return fmt.Sprintf("multiple errors: %s", strings.Join(msgs, "; ")) -} diff --git a/internal/infrastructure/docker/orchestrator_test.go b/internal/infrastructure/docker/orchestrator_test.go new file mode 100644 index 00000000..d5af0ada --- /dev/null +++ b/internal/infrastructure/docker/orchestrator_test.go @@ -0,0 +1,193 @@ +package docker + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/altuslabsxyz/devnet-builder/internal/domain/ports" + "github.com/altuslabsxyz/devnet-builder/internal/output" +) + +type mockNetworkManager struct { + createNetworkFunc func(ctx context.Context, devnetName string) (string, string, error) + deleteNetworkFunc func(ctx context.Context, networkID string) error + networkExistsFunc func(ctx context.Context, networkID string) (bool, error) + getNetworkSubnetFun func(ctx context.Context, networkID string) (string, error) + listNetworksFunc func(ctx context.Context) ([]ports.NetworkInfo, error) +} + +func (m *mockNetworkManager) CreateNetwork(ctx context.Context, devnetName string) (string, string, error) { + if m.createNetworkFunc != nil { + return m.createNetworkFunc(ctx, devnetName) + } + return "", "", nil +} + +func (m *mockNetworkManager) DeleteNetwork(ctx context.Context, networkID string) error { + if m.deleteNetworkFunc != nil { + return m.deleteNetworkFunc(ctx, networkID) + } + return nil +} + +func (m *mockNetworkManager) NetworkExists(ctx context.Context, networkID string) (bool, error) { + if m.networkExistsFunc != nil { + return m.networkExistsFunc(ctx, networkID) + } + return false, nil +} + +func (m *mockNetworkManager) GetNetworkSubnet(ctx context.Context, networkID string) (string, error) { + if m.getNetworkSubnetFun != nil { + return m.getNetworkSubnetFun(ctx, networkID) + } + return "", nil +} + +func (m *mockNetworkManager) ListDevnetNetworks(ctx context.Context) ([]ports.NetworkInfo, error) { + if m.listNetworksFunc != nil { + return m.listNetworksFunc(ctx) + } + return nil, nil +} + +type mockPortAllocator struct { + allocateRangeFunc func(ctx context.Context, devnetName string, validatorCount int) (*ports.PortAllocation, error) + releaseRangeFunc func(ctx context.Context, devnetName string) error + getAllocationFunc func(ctx context.Context, devnetName string) (*ports.PortAllocation, error) + validatePortAvailabilityFun func(ctx context.Context, allocation *ports.PortAllocation) ([]int, error) + listAllocationsFunc func(ctx context.Context) ([]*ports.PortAllocation, error) +} + +func (m *mockPortAllocator) AllocateRange(ctx context.Context, devnetName string, validatorCount int) (*ports.PortAllocation, error) { + if m.allocateRangeFunc != nil { + return m.allocateRangeFunc(ctx, devnetName, validatorCount) + } + return nil, nil +} + +func (m *mockPortAllocator) ReleaseRange(ctx context.Context, devnetName string) error { + if m.releaseRangeFunc != nil { + return m.releaseRangeFunc(ctx, devnetName) + } + return nil +} + +func (m *mockPortAllocator) GetAllocation(ctx context.Context, devnetName string) (*ports.PortAllocation, error) { + if m.getAllocationFunc != nil { + return m.getAllocationFunc(ctx, devnetName) + } + return nil, nil +} + +func (m *mockPortAllocator) ValidatePortAvailability(ctx context.Context, allocation *ports.PortAllocation) ([]int, error) { + if m.validatePortAvailabilityFun != nil { + return m.validatePortAvailabilityFun(ctx, allocation) + } + return nil, nil +} + +func (m *mockPortAllocator) ListAllocations(ctx context.Context) ([]*ports.PortAllocation, error) { + if m.listAllocationsFunc != nil { + return m.listAllocationsFunc(ctx) + } + return nil, nil +} + +func newTestLogger() *output.Logger { + l := output.NewLogger() + l.SetJSONMode(true) + return l +} + +func TestOrchestratorRollback_AggregatesErrorsWithErrorsIs(t *testing.T) { + errDeleteNetwork := errors.New("delete network failed") + errReleaseRange := errors.New("release range failed") + + networkMgr := &mockNetworkManager{ + deleteNetworkFunc: func(ctx context.Context, networkID string) error { return errDeleteNetwork }, + } + portAllocator := &mockPortAllocator{ + releaseRangeFunc: func(ctx context.Context, devnetName string) error { return errReleaseRange }, + } + + tmpDir := t.TempDir() + o := NewOrchestratorWithStateDir(networkMgr, portAllocator, nil, tmpDir, newTestLogger()) + + networkID := "network-1" + state := &ports.DeploymentState{ + DevnetName: "devnet-a", + NetworkID: &networkID, + PortRange: &ports.PortAllocation{ + DevnetName: "devnet-a", + }, + } + + err := o.Rollback(context.Background(), state) + if err == nil { + t.Fatalf("expected aggregated rollback error") + } + if !errors.Is(err, errDeleteNetwork) { + t.Fatalf("expected errors.Is(err, errDeleteNetwork) == true") + } + if !errors.Is(err, errReleaseRange) { + t.Fatalf("expected errors.Is(err, errReleaseRange) == true") + } +} + +func TestOrchestratorRollback_SuccessDeletesStateFile(t *testing.T) { + tmpDir := t.TempDir() + o := NewOrchestratorWithStateDir(&mockNetworkManager{}, &mockPortAllocator{}, nil, tmpDir, newTestLogger()) + + networkID := "network-ok" + state := &ports.DeploymentState{ + DevnetName: "devnet-ok", + NetworkID: &networkID, + StartedContainers: []string{}, + HealthyContainers: []string{}, + Errors: []ports.DeploymentError{}, + StartedAt: time.Now(), + } + + if err := o.saveState(state); err != nil { + t.Fatalf("saveState failed: %v", err) + } + + if err := o.Rollback(context.Background(), state); err != nil { + t.Fatalf("rollback failed: %v", err) + } + + got, err := o.GetState(context.Background(), "devnet-ok") + if err != nil { + t.Fatalf("GetState unexpected error: %v", err) + } + if got != nil { + t.Fatalf("expected state file to be deleted after rollback") + } +} + +func TestOrchestratorHandleFailure_WrapsDeploymentAndRollbackErrors(t *testing.T) { + errDeleteNetwork := errors.New("delete network failed") + deploymentErr := errors.New("deployment failed") + + networkMgr := &mockNetworkManager{ + deleteNetworkFunc: func(ctx context.Context, networkID string) error { return errDeleteNetwork }, + } + o := NewOrchestratorWithStateDir(networkMgr, &mockPortAllocator{}, nil, t.TempDir(), newTestLogger()) + + networkID := "network-x" + state := &ports.DeploymentState{DevnetName: "devnet-x", NetworkID: &networkID} + + err := o.handleFailure(context.Background(), state, deploymentErr) + if err == nil { + t.Fatalf("expected combined failure") + } + if !errors.Is(err, deploymentErr) { + t.Fatalf("expected deployment error in chain") + } + if !errors.Is(err, errDeleteNetwork) { + t.Fatalf("expected rollback error in chain") + } +}