Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions docs/error-handling.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Error Handling Guidelines

This project uses standard Go error chaining and `go.uber.org/multierr` for aggregation.

## Core Rules

1. Wrap boundary errors with context:
- Use `fmt.Errorf("<operation>: %w", err)`.
- Include actionable fields (path, ID, resource name) when safe.
2. Aggregate multiple independent errors with `multierr.Append`:
- Keep each sub-error wrapped with operation context before appending.
- Return a single wrapped aggregate at the boundary.
3. Preserve chain semantics:
- Avoid string-only concatenation that loses causes.
- Prefer `%w` so `errors.Is` / `errors.As` remain usable.
4. Keep user-facing text stable unless behavior changes.

## Example

```go
var agg error

if err := stopContainer(id); err != nil {
agg = multierr.Append(agg, fmt.Errorf("stop container %s: %w", id, err))
}
if err := deleteNetwork(netID); err != nil {
agg = multierr.Append(agg, fmt.Errorf("delete network %s: %w", netID, err))
}

if agg != nil {
return fmt.Errorf("rollback failed for %s: %w", devnetName, agg)
}
```
30 changes: 9 additions & 21 deletions internal/infrastructure/docker/orchestrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/altuslabsxyz/devnet-builder/internal/domain/ports"
"github.com/altuslabsxyz/devnet-builder/internal/infrastructure/node"
"github.com/altuslabsxyz/devnet-builder/internal/output"
"go.uber.org/multierr"
)

// OrchestratorImpl implements the DeploymentOrchestrator interface
Expand Down Expand Up @@ -121,15 +122,15 @@ func (o *OrchestratorImpl) Rollback(ctx context.Context, state *ports.Deployment
state.Phase = ports.PhaseRollingBack
o.logger.Info("Rolling back deployment for %s", state.DevnetName)

var errs []error
var errs error

// Step 1: Stop all started containers
for _, containerID := range state.StartedContainers {
o.logger.Debug("Stopping container %s", containerID)
cmd := exec.CommandContext(ctx, "docker", "stop", "-t", "5", containerID)
if err := cmd.Run(); err != nil {
o.logger.Warn("Failed to stop container %s: %v", containerID, err)
errs = append(errs, fmt.Errorf("stop container %s: %w", containerID, err))
errs = multierr.Append(errs, fmt.Errorf("stop container %s: %w", containerID, err))
}
}

Expand All @@ -139,7 +140,7 @@ func (o *OrchestratorImpl) Rollback(ctx context.Context, state *ports.Deployment
cmd := exec.CommandContext(ctx, "docker", "rm", "-f", containerID)
if err := cmd.Run(); err != nil {
o.logger.Warn("Failed to remove container %s: %v", containerID, err)
errs = append(errs, fmt.Errorf("remove container %s: %w", containerID, err))
errs = multierr.Append(errs, fmt.Errorf("remove container %s: %w", containerID, err))
}
}

Expand All @@ -148,7 +149,7 @@ func (o *OrchestratorImpl) Rollback(ctx context.Context, state *ports.Deployment
o.logger.Debug("Deleting network %s", *state.NetworkID)
if err := o.networkManager.DeleteNetwork(ctx, *state.NetworkID); err != nil {
o.logger.Warn("Failed to delete network %s: %v", *state.NetworkID, err)
errs = append(errs, fmt.Errorf("delete network %s: %w", *state.NetworkID, err))
errs = multierr.Append(errs, fmt.Errorf("delete network %s: %w", *state.NetworkID, err))
}
}

Expand All @@ -157,12 +158,12 @@ func (o *OrchestratorImpl) Rollback(ctx context.Context, state *ports.Deployment
o.logger.Debug("Releasing port allocation for %s", state.DevnetName)
if err := o.portAllocator.ReleaseRange(ctx, state.DevnetName); err != nil {
o.logger.Warn("Failed to release ports for %s: %v", state.DevnetName, err)
errs = append(errs, fmt.Errorf("release ports: %w", err))
errs = multierr.Append(errs, fmt.Errorf("release ports for %s: %w", state.DevnetName, err))
}
}

if len(errs) > 0 {
return &MultiError{Errors: errs}
if errs != nil {
return fmt.Errorf("rollback encountered one or more errors for %s: %w", state.DevnetName, errs)
}

// Clean up state file after successful rollback
Expand Down Expand Up @@ -551,22 +552,9 @@ func (o *OrchestratorImpl) handleFailure(ctx context.Context, state *ports.Deplo

if rollbackErr := o.Rollback(ctx, state); rollbackErr != nil {
o.logger.Error("Rollback also failed: %v", rollbackErr)
return fmt.Errorf("deployment failed: %w (rollback also failed: %v)", err, rollbackErr)
return fmt.Errorf("deployment failed: %w; rollback also failed: %w", err, rollbackErr)
}

state.Phase = ports.PhaseFailed
return fmt.Errorf("deployment failed: %w", err)
}

// MultiError represents multiple errors
type MultiError struct {
Errors []error
}

func (m *MultiError) Error() string {
var msgs []string
for _, err := range m.Errors {
msgs = append(msgs, err.Error())
}
return fmt.Sprintf("multiple errors: %s", strings.Join(msgs, "; "))
}
193 changes: 193 additions & 0 deletions internal/infrastructure/docker/orchestrator_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
package docker

import (
"context"
"errors"
"testing"
"time"

"github.com/altuslabsxyz/devnet-builder/internal/domain/ports"
"github.com/altuslabsxyz/devnet-builder/internal/output"
)

type mockNetworkManager struct {
createNetworkFunc func(ctx context.Context, devnetName string) (string, string, error)
deleteNetworkFunc func(ctx context.Context, networkID string) error
networkExistsFunc func(ctx context.Context, networkID string) (bool, error)
getNetworkSubnetFun func(ctx context.Context, networkID string) (string, error)
listNetworksFunc func(ctx context.Context) ([]ports.NetworkInfo, error)
}

func (m *mockNetworkManager) CreateNetwork(ctx context.Context, devnetName string) (string, string, error) {
if m.createNetworkFunc != nil {
return m.createNetworkFunc(ctx, devnetName)
}
return "", "", nil
}

func (m *mockNetworkManager) DeleteNetwork(ctx context.Context, networkID string) error {
if m.deleteNetworkFunc != nil {
return m.deleteNetworkFunc(ctx, networkID)
}
return nil
}

func (m *mockNetworkManager) NetworkExists(ctx context.Context, networkID string) (bool, error) {
if m.networkExistsFunc != nil {
return m.networkExistsFunc(ctx, networkID)
}
return false, nil
}

func (m *mockNetworkManager) GetNetworkSubnet(ctx context.Context, networkID string) (string, error) {
if m.getNetworkSubnetFun != nil {
return m.getNetworkSubnetFun(ctx, networkID)
}
return "", nil
}

func (m *mockNetworkManager) ListDevnetNetworks(ctx context.Context) ([]ports.NetworkInfo, error) {
if m.listNetworksFunc != nil {
return m.listNetworksFunc(ctx)
}
return nil, nil
}

type mockPortAllocator struct {
allocateRangeFunc func(ctx context.Context, devnetName string, validatorCount int) (*ports.PortAllocation, error)
releaseRangeFunc func(ctx context.Context, devnetName string) error
getAllocationFunc func(ctx context.Context, devnetName string) (*ports.PortAllocation, error)
validatePortAvailabilityFun func(ctx context.Context, allocation *ports.PortAllocation) ([]int, error)
listAllocationsFunc func(ctx context.Context) ([]*ports.PortAllocation, error)
}

func (m *mockPortAllocator) AllocateRange(ctx context.Context, devnetName string, validatorCount int) (*ports.PortAllocation, error) {
if m.allocateRangeFunc != nil {
return m.allocateRangeFunc(ctx, devnetName, validatorCount)
}
return nil, nil
}

func (m *mockPortAllocator) ReleaseRange(ctx context.Context, devnetName string) error {
if m.releaseRangeFunc != nil {
return m.releaseRangeFunc(ctx, devnetName)
}
return nil
}

func (m *mockPortAllocator) GetAllocation(ctx context.Context, devnetName string) (*ports.PortAllocation, error) {
if m.getAllocationFunc != nil {
return m.getAllocationFunc(ctx, devnetName)
}
return nil, nil
}

func (m *mockPortAllocator) ValidatePortAvailability(ctx context.Context, allocation *ports.PortAllocation) ([]int, error) {
if m.validatePortAvailabilityFun != nil {
return m.validatePortAvailabilityFun(ctx, allocation)
}
return nil, nil
}

func (m *mockPortAllocator) ListAllocations(ctx context.Context) ([]*ports.PortAllocation, error) {
if m.listAllocationsFunc != nil {
return m.listAllocationsFunc(ctx)
}
return nil, nil
}

func newTestLogger() *output.Logger {
l := output.NewLogger()
l.SetJSONMode(true)
return l
}

func TestOrchestratorRollback_AggregatesErrorsWithErrorsIs(t *testing.T) {
errDeleteNetwork := errors.New("delete network failed")
errReleaseRange := errors.New("release range failed")

networkMgr := &mockNetworkManager{
deleteNetworkFunc: func(ctx context.Context, networkID string) error { return errDeleteNetwork },
}
portAllocator := &mockPortAllocator{
releaseRangeFunc: func(ctx context.Context, devnetName string) error { return errReleaseRange },
}

tmpDir := t.TempDir()
o := NewOrchestratorWithStateDir(networkMgr, portAllocator, nil, tmpDir, newTestLogger())

networkID := "network-1"
state := &ports.DeploymentState{
DevnetName: "devnet-a",
NetworkID: &networkID,
PortRange: &ports.PortAllocation{
DevnetName: "devnet-a",
},
}

err := o.Rollback(context.Background(), state)
if err == nil {
t.Fatalf("expected aggregated rollback error")
}
if !errors.Is(err, errDeleteNetwork) {
t.Fatalf("expected errors.Is(err, errDeleteNetwork) == true")
}
if !errors.Is(err, errReleaseRange) {
t.Fatalf("expected errors.Is(err, errReleaseRange) == true")
}
}

func TestOrchestratorRollback_SuccessDeletesStateFile(t *testing.T) {
tmpDir := t.TempDir()
o := NewOrchestratorWithStateDir(&mockNetworkManager{}, &mockPortAllocator{}, nil, tmpDir, newTestLogger())

networkID := "network-ok"
state := &ports.DeploymentState{
DevnetName: "devnet-ok",
NetworkID: &networkID,
StartedContainers: []string{},
HealthyContainers: []string{},
Errors: []ports.DeploymentError{},
StartedAt: time.Now(),
}

if err := o.saveState(state); err != nil {
t.Fatalf("saveState failed: %v", err)
}

if err := o.Rollback(context.Background(), state); err != nil {
t.Fatalf("rollback failed: %v", err)
}

got, err := o.GetState(context.Background(), "devnet-ok")
if err != nil {
t.Fatalf("GetState unexpected error: %v", err)
}
if got != nil {
t.Fatalf("expected state file to be deleted after rollback")
}
}

func TestOrchestratorHandleFailure_WrapsDeploymentAndRollbackErrors(t *testing.T) {
errDeleteNetwork := errors.New("delete network failed")
deploymentErr := errors.New("deployment failed")

networkMgr := &mockNetworkManager{
deleteNetworkFunc: func(ctx context.Context, networkID string) error { return errDeleteNetwork },
}
o := NewOrchestratorWithStateDir(networkMgr, &mockPortAllocator{}, nil, t.TempDir(), newTestLogger())

networkID := "network-x"
state := &ports.DeploymentState{DevnetName: "devnet-x", NetworkID: &networkID}

err := o.handleFailure(context.Background(), state, deploymentErr)
if err == nil {
t.Fatalf("expected combined failure")
}
if !errors.Is(err, deploymentErr) {
t.Fatalf("expected deployment error in chain")
}
if !errors.Is(err, errDeleteNetwork) {
t.Fatalf("expected rollback error in chain")
}
}
Loading