From b1fe361a4ea34ac00f3bcc0574e259abfb471e74 Mon Sep 17 00:00:00 2001 From: Zoltan Giber Date: Fri, 1 Aug 2025 10:39:42 +0100 Subject: [PATCH 1/2] Implement precise lock lease renewal for storage backends that support lease renewal. - Added:`LockLeaseRenewer` interface to support renewing lock leases. `renewLockLease` function to extend lease duration based on retry attempts. - Updated certificate management functions to renew lock leases during long-running retry loops. - Added tests for lease renewal functionality in `config_test.go` to ensure correct behavior with and without lease support. --- config.go | 44 ++++++++++++++++++++++++++++++++ config_test.go | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++ storage.go | 10 ++++++++ 3 files changed, 122 insertions(+) diff --git a/config.go b/config.go index 1e1e840e..15f28e09 100644 --- a/config.go +++ b/config.go @@ -490,6 +490,31 @@ func (cfg *Config) manageOne(ctx context.Context, domainName string, async bool) return renew() } +// renewLockLease extends the lease duration on an existing lock if the storage +// backend supports it. The lease duration is calculated based on the retry attempt +// number and includes the certificate obtain timeout. This prevents locks from +// expiring during long-running certificate operations with retries. +func renewLockLease(ctx context.Context, storage Storage, lockKey string, attempt int) error { + l, ok := storage.(LockLeaseRenewer) + if !ok { + return nil + } + + leaseDuration := maxRetryDuration + if attempt < len(retryIntervals) && attempt >= 0 { + leaseDuration = retryIntervals[attempt] + } + leaseDuration = leaseDuration + DefaultACME.CertObtainTimeout + + err := l.RenewLockLease(ctx, lockKey, leaseDuration) + if err == nil { + locksMu.Lock() + locks[lockKey] = storage + locksMu.Unlock() + } + return err +} + // ObtainCertSync generates a new private key and obtains a certificate for // name using cfg in the foreground; i.e. interactively and without retries. // It stows the renewed certificate and its assets in storage if successful. @@ -546,6 +571,15 @@ func (cfg *Config) obtainCert(ctx context.Context, name string, interactive bool log.Info("lock acquired", zap.String("identifier", name)) f := func(ctx context.Context) error { + // renew lease on the lock if the certificate store supports it + attempt, ok := ctx.Value(AttemptsCtxKey).(*int) + if ok { + err = renewLockLease(ctx, cfg.Storage, lockKey, *attempt) + if err != nil { + return fmt.Errorf("unable to renew lock lease '%s': %v", lockKey, err) + } + } + // check if obtain is still needed -- might have been obtained during lock if cfg.storageHasCertResourcesAnyIssuer(ctx, name) { log.Info("certificate already exists in storage", zap.String("identifier", name)) @@ -805,6 +839,16 @@ func (cfg *Config) renewCert(ctx context.Context, name string, force, interactiv log.Info("lock acquired", zap.String("identifier", name)) f := func(ctx context.Context) error { + // renew lease on the certificate store lock if the store implementation supports it; + // prevents the lock from being acquired by another process/instance while we're renewing + attempt, ok := ctx.Value(AttemptsCtxKey).(*int) + if ok { + err = renewLockLease(ctx, cfg.Storage, lockKey, *attempt) + if err != nil { + return fmt.Errorf("unable to renew lock lease '%s': %v", lockKey, err) + } + } + // prepare for renewal (load PEM cert, key, and meta) certRes, err := cfg.loadCertResourceAnyIssuer(ctx, name) if err != nil { diff --git a/config_test.go b/config_test.go index 8084d7d9..f9ff0b17 100644 --- a/config_test.go +++ b/config_test.go @@ -21,7 +21,9 @@ import ( "os" "reflect" "testing" + "time" + "github.com/caddyserver/certmagic/internal/testutil" "github.com/mholt/acmez/v3/acme" ) @@ -76,6 +78,72 @@ func TestSaveCertResource(t *testing.T) { } } +type mockStorageWithLease struct { + *FileStorage + renewCalled bool + renewError error + lastLockKey string + lastDuration time.Duration +} + +func (m *mockStorageWithLease) RenewLockLease(ctx context.Context, lockKey string, leaseDuration time.Duration) error { + m.renewCalled = true + m.lastLockKey = lockKey + m.lastDuration = leaseDuration + return m.renewError +} + +func TestRenewLockLeaseDuration(t *testing.T) { + ctx := context.Background() + tmpDir, err := os.MkdirTemp(os.TempDir(), "certmagic-test*") + testutil.RequireNoError(t, err, "allocating tmp dir") + defer os.RemoveAll(tmpDir) + + mockStorage := &mockStorageWithLease{ + FileStorage: &FileStorage{Path: tmpDir}, + } + + // Test attempt 0 + renewLockLease(ctx, mockStorage, "test-lock", 0) + expected := retryIntervals[0] + DefaultACME.CertObtainTimeout + testutil.RequireEqual(t, expected, mockStorage.lastDuration) + + // Test attempt beyond array bounds + renewLockLease(ctx, mockStorage, "test-lock", 999) + expected = maxRetryDuration + DefaultACME.CertObtainTimeout + testutil.RequireEqual(t, expected, mockStorage.lastDuration) +} + +// Test that lease renewal works when storage supports it +func TestRenewLockLeaseWithInterface(t *testing.T) { + ctx := context.Background() + tmpDir, err := os.MkdirTemp(os.TempDir(), "certmagic-test*") + testutil.RequireNoError(t, err, "allocating tmp dir") + defer os.RemoveAll(tmpDir) + + mockStorage := &mockStorageWithLease{ + FileStorage: &FileStorage{Path: tmpDir}, + } + + err = renewLockLease(ctx, mockStorage, "test-lock", 0) + testutil.RequireNoError(t, err) + + testutil.RequireEqual(t, true, mockStorage.renewCalled) +} + +// Test that no error occurs when storage doesn't support lease renewal +func TestRenewLockLeaseWithoutInterface(t *testing.T) { + ctx := context.Background() + tmpDir, err := os.MkdirTemp(os.TempDir(), "certmagic-test*") + testutil.RequireNoError(t, err, "allocating tmp dir") + defer os.RemoveAll(tmpDir) + + storage := &FileStorage{Path: tmpDir} + + err = renewLockLease(ctx, storage, "test-lock", 0) + testutil.RequireNoError(t, err) +} + func mustJSON(val any) []byte { result, err := json.Marshal(val) if err != nil { diff --git a/storage.go b/storage.go index faf73153..fcff7c02 100644 --- a/storage.go +++ b/storage.go @@ -149,6 +149,16 @@ type Locker interface { Unlock(ctx context.Context, name string) error } +// LockLeaseRenewer is an optional interface that can be implemented by a Storage +// implementation to support renewing the lease on a lock. This is useful for +// long-running operations that need to be synchronized across a cluster. +type LockLeaseRenewer interface { + // RenewLockLease renews the lease on the lock for the given lockKey for the + // given leaseDuration. This is used to prevent the lock from being acquired + // by another process. + RenewLockLease(ctx context.Context, lockKey string, leaseDuration time.Duration) error +} + // KeyInfo holds information about a key in storage. // Key and IsTerminal are required; Modified and Size // are optional if the storage implementation is not From 0412a9bedfa895d18d322e407fffffab46ff878f Mon Sep 17 00:00:00 2001 From: Zoltan Giber Date: Thu, 14 Aug 2025 08:27:13 +0100 Subject: [PATCH 2/2] Log renew lock lease --- config.go | 8 +++++--- config_test.go | 11 +++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/config.go b/config.go index 15f28e09..cf88853c 100644 --- a/config.go +++ b/config.go @@ -494,7 +494,7 @@ func (cfg *Config) manageOne(ctx context.Context, domainName string, async bool) // backend supports it. The lease duration is calculated based on the retry attempt // number and includes the certificate obtain timeout. This prevents locks from // expiring during long-running certificate operations with retries. -func renewLockLease(ctx context.Context, storage Storage, lockKey string, attempt int) error { +func (cfg *Config) renewLockLease(ctx context.Context, storage Storage, lockKey string, attempt int) error { l, ok := storage.(LockLeaseRenewer) if !ok { return nil @@ -505,6 +505,8 @@ func renewLockLease(ctx context.Context, storage Storage, lockKey string, attemp leaseDuration = retryIntervals[attempt] } leaseDuration = leaseDuration + DefaultACME.CertObtainTimeout + log := cfg.Logger.Named("renewLockLease") + log.Debug("renewing lock lease", zap.String("lockKey", lockKey), zap.Int("attempt", attempt)) err := l.RenewLockLease(ctx, lockKey, leaseDuration) if err == nil { @@ -574,7 +576,7 @@ func (cfg *Config) obtainCert(ctx context.Context, name string, interactive bool // renew lease on the lock if the certificate store supports it attempt, ok := ctx.Value(AttemptsCtxKey).(*int) if ok { - err = renewLockLease(ctx, cfg.Storage, lockKey, *attempt) + err = cfg.renewLockLease(ctx, cfg.Storage, lockKey, *attempt) if err != nil { return fmt.Errorf("unable to renew lock lease '%s': %v", lockKey, err) } @@ -843,7 +845,7 @@ func (cfg *Config) renewCert(ctx context.Context, name string, force, interactiv // prevents the lock from being acquired by another process/instance while we're renewing attempt, ok := ctx.Value(AttemptsCtxKey).(*int) if ok { - err = renewLockLease(ctx, cfg.Storage, lockKey, *attempt) + err = cfg.renewLockLease(ctx, cfg.Storage, lockKey, *attempt) if err != nil { return fmt.Errorf("unable to renew lock lease '%s': %v", lockKey, err) } diff --git a/config_test.go b/config_test.go index f9ff0b17..89a63224 100644 --- a/config_test.go +++ b/config_test.go @@ -104,12 +104,13 @@ func TestRenewLockLeaseDuration(t *testing.T) { } // Test attempt 0 - renewLockLease(ctx, mockStorage, "test-lock", 0) + cfg := &Config{Logger: defaultTestLogger} + cfg.renewLockLease(ctx, mockStorage, "test-lock", 0) expected := retryIntervals[0] + DefaultACME.CertObtainTimeout testutil.RequireEqual(t, expected, mockStorage.lastDuration) // Test attempt beyond array bounds - renewLockLease(ctx, mockStorage, "test-lock", 999) + cfg.renewLockLease(ctx, mockStorage, "test-lock", 999) expected = maxRetryDuration + DefaultACME.CertObtainTimeout testutil.RequireEqual(t, expected, mockStorage.lastDuration) } @@ -125,7 +126,8 @@ func TestRenewLockLeaseWithInterface(t *testing.T) { FileStorage: &FileStorage{Path: tmpDir}, } - err = renewLockLease(ctx, mockStorage, "test-lock", 0) + cfg := &Config{Logger: defaultTestLogger} + err = cfg.renewLockLease(ctx, mockStorage, "test-lock", 0) testutil.RequireNoError(t, err) testutil.RequireEqual(t, true, mockStorage.renewCalled) @@ -140,7 +142,8 @@ func TestRenewLockLeaseWithoutInterface(t *testing.T) { storage := &FileStorage{Path: tmpDir} - err = renewLockLease(ctx, storage, "test-lock", 0) + cfg := &Config{Logger: defaultTestLogger} + err = cfg.renewLockLease(ctx, storage, "test-lock", 0) testutil.RequireNoError(t, err) }