Skip to content

Commit

Permalink
Enhance error handling for rate limits; introduce RetryableAssignment…
Browse files Browse the repository at this point in the history
…Operation and RetryableOperation functions to manage throttling and implement exponential backoff with jitter. Update state mapping in settings catalog to remove unused fields, improving clarity and maintainability.
  • Loading branch information
ShocOne committed Dec 1, 2024
1 parent d4f72bb commit 5c533d9
Show file tree
Hide file tree
Showing 9 changed files with 820 additions and 334 deletions.
25 changes: 25 additions & 0 deletions internal/resources/common/errors/error_handling.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ type GraphErrorInfo struct {
AdditionalData map[string]interface{}
Headers *abstractions.ResponseHeaders
RequestDetails string
RetryAfter string
}

// standardErrorDescriptions provides consistent error messaging across the provider
Expand Down Expand Up @@ -85,6 +86,9 @@ func HandleGraphError(ctx context.Context, err error, resp interface{}, operatio
case 401, 403:
handlePermissionError(ctx, errorInfo, resp, operation, requiredPermissions)

case 429:
handleRateLimitError(ctx, errorInfo, resp)

default:
// Handle all other cases
addErrorToDiagnostics(ctx, resp, errorDesc.Summary,
Expand Down Expand Up @@ -216,6 +220,27 @@ func handlePermissionError(ctx context.Context, errorInfo GraphErrorInfo, resp i
addErrorToDiagnostics(ctx, resp, errorDesc.Summary, detail)
}

// handleRateLimitError processes rate limit errors and adds retry information to the error message
func handleRateLimitError(ctx context.Context, errorInfo GraphErrorInfo, resp interface{}) GraphErrorInfo {
if headers := errorInfo.Headers; headers != nil {
retryValues := headers.Get("Retry-After")
if len(retryValues) > 0 {
errorInfo.RetryAfter = retryValues[0]
}
}

tflog.Warn(ctx, "Rate limit exceeded", map[string]interface{}{
"retry_after": errorInfo.RetryAfter,
"details": errorInfo.ErrorMessage,
})

errorDesc := getErrorDescription(429)
detail := constructErrorDetail(errorDesc.Detail, errorInfo.ErrorMessage)
addErrorToDiagnostics(ctx, resp, errorDesc.Summary, detail)

return errorInfo
}

// addErrorToDiagnostics adds an error to the response diagnostics
func addErrorToDiagnostics(ctx context.Context, resp interface{}, summary, detail string) {
switch r := resp.(type) {
Expand Down
97 changes: 97 additions & 0 deletions internal/resources/common/retry/retry_assignments.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// REF: https://learn.microsoft.com/en-us/graph/throttling-limits#assignment-service-limits

package retry

import (
"context"
"time"

"github.com/deploymenttheory/terraform-provider-microsoft365/internal/resources/common/errors"
"github.com/hashicorp/terraform-plugin-log/tflog"
"golang.org/x/exp/rand"
)

// RetryableAssignmentOperation executes an assignment operation with specific rate limiting
func RetryableAssignmentOperation(ctx context.Context, operation string, fn func() error) error {
var attempt int
r := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))

const (
tenSecondLimit = 500 // requests per 10 seconds per app per tenant
hourlyLimit = 15000 // requests per hour per app per tenant
maxBackoff = 10 * time.Second
baseDelay = 3 * time.Second // Higher base delay for assignments
)

for {
err := fn()
if err == nil {
return nil
}

graphError := errors.GraphError(ctx, err)
if graphError.StatusCode != 429 {
return err
}

// Parse throttle scope if available
var throttleScope ThrottleScope
if scope := graphError.Headers.Get("x-ms-throttle-scope"); len(scope) > 0 {
throttleScope = parseThrottleScope(scope[0])
}

// Get throttle information
var throttleInfo string
if info := graphError.Headers.Get("x-ms-throttle-information"); len(info) > 0 {
throttleInfo = info[0]
}

// Use Retry-After if provided, otherwise use exponential backoff
var backoffDelay time.Duration
if graphError.RetryAfter != "" {
if seconds, err := time.ParseDuration(graphError.RetryAfter + "s"); err == nil {
backoffDelay = seconds
}
}

if backoffDelay == 0 {
backoffDelay = baseDelay * time.Duration(1<<attempt)
if backoffDelay > maxBackoff {
backoffDelay = maxBackoff
}
}

// Add jitter: randomly between 50-100% of calculated delay
jitterDelay := backoffDelay/2 + time.Duration(r.Int63n(int64(backoffDelay/2)))
attempt++

logDetails := map[string]interface{}{
"operation": operation,
"attempt": attempt,
"delay_seconds": jitterDelay.Seconds(),
"status_code": graphError.StatusCode,
"rate_limit_10s": tenSecondLimit,
"rate_limit_1h": hourlyLimit,
}

if throttleInfo != "" {
logDetails["throttle_reason"] = throttleInfo
}
if throttleScope != (ThrottleScope{}) {
logDetails["throttle_scope"] = throttleScope.Scope
logDetails["throttle_limit"] = throttleScope.Limit
}

tflog.Info(ctx, "Microsoft Graph assignment rate limit encountered", logDetails)

timer := time.NewTimer(jitterDelay)
defer timer.Stop()

select {
case <-ctx.Done():
return ctx.Err()
case <-timer.C:
continue
}
}
}
114 changes: 114 additions & 0 deletions internal/resources/common/retry/retry_global.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// REF: https://learn.microsoft.com/en-us/graph/throttling

package retry

import (
"context"
"strings"
"time"

"github.com/deploymenttheory/terraform-provider-microsoft365/internal/resources/common/errors"
"github.com/hashicorp/terraform-plugin-log/tflog"
"golang.org/x/exp/rand"
)

// ThrottleScope represents the scope of throttling from x-ms-throttle-scope header
type ThrottleScope struct {
Scope string
Limit string
ApplicationID string
ResourceID string
}

// parseThrottleScope parses the x-ms-throttle-scope header
func parseThrottleScope(scope string) ThrottleScope {
parts := strings.Split(scope, "/")
if len(parts) != 4 {
return ThrottleScope{}
}
return ThrottleScope{
Scope: parts[0],
Limit: parts[1],
ApplicationID: parts[2],
ResourceID: parts[3],
}
}

// RetryableOperation executes an operation with automatic retry on rate limit errors
func RetryableOperation(ctx context.Context, operation string, fn func() error) error {
var attempt int
r := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))

for {
err := fn()
if err == nil {
return nil
}

graphError := errors.GraphError(ctx, err)
if graphError.StatusCode != 429 {
return err
}

// Parse throttle scope if available
var throttleScope ThrottleScope
if scope := graphError.Headers.Get("x-ms-throttle-scope"); len(scope) > 0 {
throttleScope = parseThrottleScope(scope[0])
}

// Get throttle information
var throttleInfo string
if info := graphError.Headers.Get("x-ms-throttle-information"); len(info) > 0 {
throttleInfo = info[0]
}

const maxBackoff = 10 * time.Second
baseDelay := 2 * time.Second

// Use Retry-After if provided, otherwise use exponential backoff
var backoffDelay time.Duration
if graphError.RetryAfter != "" {
if seconds, err := time.ParseDuration(graphError.RetryAfter + "s"); err == nil {
backoffDelay = seconds
}
}

if backoffDelay == 0 {
backoffDelay = baseDelay * time.Duration(1<<attempt)
if backoffDelay > maxBackoff {
backoffDelay = maxBackoff
}
}

// Add jitter: randomly between 50-100% of calculated delay
jitterDelay := backoffDelay/2 + time.Duration(r.Int63n(int64(backoffDelay/2)))
attempt++

logDetails := map[string]interface{}{
"operation": operation,
"attempt": attempt,
"delay_seconds": jitterDelay.Seconds(),
"status_code": graphError.StatusCode,
}

if throttleInfo != "" {
logDetails["throttle_reason"] = throttleInfo
}
if throttleScope != (ThrottleScope{}) {
logDetails["throttle_scope"] = throttleScope.Scope
logDetails["throttle_limit"] = throttleScope.Limit
}

tflog.Info(ctx, "Microsoft Graph rate limit encountered", logDetails)

timer := time.NewTimer(jitterDelay)
defer timer.Stop()

select {
case <-ctx.Done():
return ctx.Err()
case <-timer.C:
continue
}
}
}
121 changes: 121 additions & 0 deletions internal/resources/common/retry/retry_intune.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// REF: https://learn.microsoft.com/en-us/graph/throttling-limits#intune-service-limits

package retry

import (
"context"
"time"

"github.com/deploymenttheory/terraform-provider-microsoft365/internal/resources/common/errors"
"github.com/hashicorp/terraform-plugin-log/tflog"
"golang.org/x/exp/rand"
)

// IntuneOperationType defines the type of Intune operation
type IntuneOperationType string

const (
IntuneWrite IntuneOperationType = "Write" // POST, PUT, DELETE, PATCH
IntuneRead IntuneOperationType = "Read" // GET and others
)

// RetryableIntuneOperation executes an Intune operation with specific rate limiting
func RetryableIntuneOperation(ctx context.Context, operation string, opType IntuneOperationType, fn func() error) error {
var attempt int
r := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))

const (
// Write operations (POST, PUT, DELETE, PATCH)
writePerAppLimit = 100 // requests per 20 seconds
writeTenantLimit = 200 // requests per 20 seconds

// General operations
generalPerAppLimit = 1000 // requests per 20 seconds
generalTenantLimit = 2000 // requests per 20 seconds

maxBackoff = 10 * time.Second
baseDelay = 2 * time.Second
)

for {
err := fn()
if err == nil {
return nil
}

graphError := errors.GraphError(ctx, err)
if graphError.StatusCode != 429 {
return err
}

// Parse throttle scope if available
var throttleScope ThrottleScope
if scope := graphError.Headers.Get("x-ms-throttle-scope"); len(scope) > 0 {
throttleScope = parseThrottleScope(scope[0])
}

// Get throttle information
var throttleInfo string
if info := graphError.Headers.Get("x-ms-throttle-information"); len(info) > 0 {
throttleInfo = info[0]
}

// Use Retry-After if provided, otherwise use exponential backoff
var backoffDelay time.Duration
if graphError.RetryAfter != "" {
if seconds, err := time.ParseDuration(graphError.RetryAfter + "s"); err == nil {
backoffDelay = seconds
}
}

if backoffDelay == 0 {
backoffDelay = baseDelay * time.Duration(1<<attempt)
if backoffDelay > maxBackoff {
backoffDelay = maxBackoff
}
}

// Add jitter: randomly between 50-100% of calculated delay
jitterDelay := backoffDelay/2 + time.Duration(r.Int63n(int64(backoffDelay/2)))
attempt++

// Enhanced logging with rate limit context
logDetails := map[string]interface{}{
"operation": operation,
"attempt": attempt,
"delay_seconds": jitterDelay.Seconds(),
"status_code": graphError.StatusCode,
"operation_type": string(opType),
}

if opType == IntuneWrite {
logDetails["rate_limit_per_app"] = writePerAppLimit
logDetails["rate_limit_tenant"] = writeTenantLimit
logDetails["window_seconds"] = 20
} else {
logDetails["rate_limit_per_app"] = generalPerAppLimit
logDetails["rate_limit_tenant"] = generalTenantLimit
logDetails["window_seconds"] = 20
}

if throttleInfo != "" {
logDetails["throttle_reason"] = throttleInfo
}
if throttleScope != (ThrottleScope{}) {
logDetails["throttle_scope"] = throttleScope.Scope
logDetails["throttle_limit"] = throttleScope.Limit
}

tflog.Info(ctx, "Intune service rate limit encountered", logDetails)

timer := time.NewTimer(jitterDelay)
defer timer.Stop()

select {
case <-ctx.Done():
return ctx.Err()
case <-timer.C:
continue
}
}
}
Loading

0 comments on commit 5c533d9

Please sign in to comment.