Skip to content

Commit

Permalink
Fix flaky upgrade test
Browse files Browse the repository at this point in the history
Signed-off-by: Luis Rascao <luis.rascao@gmail.com>
  • Loading branch information
lrascao committed Nov 26, 2023
1 parent 668bab4 commit 00a5cac
Showing 1 changed file with 87 additions and 13 deletions.
100 changes: 87 additions & 13 deletions pkg/kubernetes/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

helm "helm.sh/helm/v3/pkg/action"
"helm.sh/helm/v3/pkg/chart"
"helm.sh/helm/v3/pkg/release"
"k8s.io/helm/pkg/strvals"

"github.com/hashicorp/go-version"
Expand Down Expand Up @@ -56,6 +57,16 @@ type UpgradeConfig struct {
ImageVariant string
}

// UpgradeOptions represents options for the upgrade function.
type UpgradeOptions struct {
WithRetry bool
MaxRetries int
RetryInterval time.Duration
}

// UpgradeOption is a functional option type for configuring upgrade.
type UpgradeOption func(*UpgradeOptions)

func Upgrade(conf UpgradeConfig) error {
helmRepo := utils.GetEnv("DAPR_HELM_REPO_URL", daprHelmRepo)
status, err := GetDaprResourcesStatus()
Expand All @@ -71,14 +82,14 @@ func Upgrade(conf UpgradeConfig) error {
return err
}

helmConf, err := helmConfig(status[0].Namespace)
upgradeClient, helmConf, err := newUpgradeClient(status[0].Namespace, conf)
if err != nil {
return err
return fmt.Errorf("unable to create helm client: %w", err)
}

controlPlaneChart, err := getHelmChart(conf.RuntimeVersion, "dapr", helmRepo, helmConf)
if err != nil {
return err
return fmt.Errorf("unable to get helm chart: %w", err)
}

willHaveDashboardInDaprChart, err := IsDashboardIncluded(conf.RuntimeVersion)
Expand Down Expand Up @@ -116,13 +127,6 @@ func Upgrade(conf UpgradeConfig) error {
}
}

upgradeClient := helm.NewUpgrade(helmConf)
upgradeClient.ResetValues = true
upgradeClient.Namespace = status[0].Namespace
upgradeClient.CleanupOnFail = true
upgradeClient.Wait = true
upgradeClient.Timeout = time.Duration(conf.Timeout) * time.Second

print.InfoStatusEvent(os.Stdout, "Starting upgrade...")

mtls, err := IsMTLSEnabled()
Expand Down Expand Up @@ -155,7 +159,7 @@ func Upgrade(conf UpgradeConfig) error {
if !isDowngrade(conf.RuntimeVersion, daprVersion) {
err = applyCRDs(fmt.Sprintf("v%s", conf.RuntimeVersion))
if err != nil {
return err
return fmt.Errorf("unable to apply CRDs: %w", err)
}
} else {
print.InfoStatusEvent(os.Stdout, "Downgrade detected, skipping CRDs.")
Expand All @@ -166,8 +170,13 @@ func Upgrade(conf UpgradeConfig) error {
return err
}

if _, err = upgradeClient.Run(chart, controlPlaneChart, vals); err != nil {
return err
// Deal with known race condition when applying both CRD and CR close together. The Helm upgrade fails
// when a CR is applied tries to be applied before the CRD is fully registered. On each retry we need a
// fresh client since the kube client locally caches the last OpenAPI schema it received from the server.
// See https://github.com/kubernetes/kubectl/issues/1179
_, err = helmUpgrade(upgradeClient, chart, controlPlaneChart, vals, WithRetry(5, 100*time.Millisecond))
if err != nil {
return fmt.Errorf("failure while running upgrade: %w", err)
}

if dashboardChart != nil {
Expand All @@ -192,6 +201,55 @@ func Upgrade(conf UpgradeConfig) error {
return nil
}

// WithRetry enables retry with the specified max retries and retry interval.
func WithRetry(maxRetries int, retryInterval time.Duration) UpgradeOption {
return func(o *UpgradeOptions) {
o.WithRetry = true
o.MaxRetries = maxRetries
o.RetryInterval = retryInterval
}
}

func helmUpgrade(client *helm.Upgrade, name string, chart *chart.Chart, vals map[string]interface{}, options ...UpgradeOption) (*release.Release, error) {
upgradeOptions := &UpgradeOptions{
WithRetry: false,
MaxRetries: 0,
RetryInterval: 0,
}

// Apply functional options.
for _, option := range options {
option(upgradeOptions)
}

var release *release.Release
for attempt := 1; ; attempt++ {
_, err := client.Run(name, chart, vals)
if err == nil {
// operation succeeded, no need to retry.
break
}

if !upgradeOptions.WithRetry || attempt >= upgradeOptions.MaxRetries {
// If not retrying or reached max retries, return the error.
return nil, fmt.Errorf("max retries reached, unable to run command: %w", err)
}

print.PendingStatusEvent(os.Stdout, "Retrying after %s...", upgradeOptions.RetryInterval)
time.Sleep(upgradeOptions.RetryInterval)

// create a totally new helm client, this ensures that we fetch a fresh openapi schema from the server on each attempt.
client, _, err = newUpgradeClient(client.Namespace, UpgradeConfig{
Timeout: uint(client.Timeout),
})
if err != nil {
return nil, fmt.Errorf("unable to create helm client: %w", err)
}
}

return release, nil
}

func highAvailabilityEnabled(status []StatusOutput) bool {
for _, s := range status {
if s.Name == "dapr-dashboard" {
Expand Down Expand Up @@ -264,3 +322,19 @@ func isDowngrade(targetVersion, existingVersion string) bool {
}
return target.LessThan(existing)
}

func newUpgradeClient(namespace string, cfg UpgradeConfig) (*helm.Upgrade, *helm.Configuration, error) {
helmCfg, err := helmConfig(namespace)
if err != nil {
return nil, nil, err
}

client := helm.NewUpgrade(helmCfg)
client.ResetValues = true
client.Namespace = namespace
client.CleanupOnFail = true
client.Wait = true
client.Timeout = time.Duration(cfg.Timeout) * time.Second

return client, helmCfg, nil
}

0 comments on commit 00a5cac

Please sign in to comment.