diff --git a/config.go b/config.go index 1e1e840e..cf88853c 100644 --- a/config.go +++ b/config.go @@ -490,6 +490,33 @@ func (cfg *Config) manageOne(ctx context.Context, domainName string, async bool) return renew() } +// renewLockLease extends the lease duration on an existing lock if the storage +// backend supports it. The lease duration is calculated based on the retry attempt +// number and includes the certificate obtain timeout. This prevents locks from +// expiring during long-running certificate operations with retries. +func (cfg *Config) renewLockLease(ctx context.Context, storage Storage, lockKey string, attempt int) error { + l, ok := storage.(LockLeaseRenewer) + if !ok { + return nil + } + + leaseDuration := maxRetryDuration + if attempt < len(retryIntervals) && attempt >= 0 { + leaseDuration = retryIntervals[attempt] + } + leaseDuration = leaseDuration + DefaultACME.CertObtainTimeout + log := cfg.Logger.Named("renewLockLease") + log.Debug("renewing lock lease", zap.String("lockKey", lockKey), zap.Int("attempt", attempt)) + + err := l.RenewLockLease(ctx, lockKey, leaseDuration) + if err == nil { + locksMu.Lock() + locks[lockKey] = storage + locksMu.Unlock() + } + return err +} + // ObtainCertSync generates a new private key and obtains a certificate for // name using cfg in the foreground; i.e. interactively and without retries. // It stows the renewed certificate and its assets in storage if successful. @@ -546,6 +573,15 @@ func (cfg *Config) obtainCert(ctx context.Context, name string, interactive bool log.Info("lock acquired", zap.String("identifier", name)) f := func(ctx context.Context) error { + // renew lease on the lock if the certificate store supports it + attempt, ok := ctx.Value(AttemptsCtxKey).(*int) + if ok { + err = cfg.renewLockLease(ctx, cfg.Storage, lockKey, *attempt) + if err != nil { + return fmt.Errorf("unable to renew lock lease '%s': %v", lockKey, err) + } + } + // check if obtain is still needed -- might have been obtained during lock if cfg.storageHasCertResourcesAnyIssuer(ctx, name) { log.Info("certificate already exists in storage", zap.String("identifier", name)) @@ -805,6 +841,16 @@ func (cfg *Config) renewCert(ctx context.Context, name string, force, interactiv log.Info("lock acquired", zap.String("identifier", name)) f := func(ctx context.Context) error { + // renew lease on the certificate store lock if the store implementation supports it; + // prevents the lock from being acquired by another process/instance while we're renewing + attempt, ok := ctx.Value(AttemptsCtxKey).(*int) + if ok { + err = cfg.renewLockLease(ctx, cfg.Storage, lockKey, *attempt) + if err != nil { + return fmt.Errorf("unable to renew lock lease '%s': %v", lockKey, err) + } + } + // prepare for renewal (load PEM cert, key, and meta) certRes, err := cfg.loadCertResourceAnyIssuer(ctx, name) if err != nil { diff --git a/config_test.go b/config_test.go index 8084d7d9..89a63224 100644 --- a/config_test.go +++ b/config_test.go @@ -21,7 +21,9 @@ import ( "os" "reflect" "testing" + "time" + "github.com/caddyserver/certmagic/internal/testutil" "github.com/mholt/acmez/v3/acme" ) @@ -76,6 +78,75 @@ func TestSaveCertResource(t *testing.T) { } } +type mockStorageWithLease struct { + *FileStorage + renewCalled bool + renewError error + lastLockKey string + lastDuration time.Duration +} + +func (m *mockStorageWithLease) RenewLockLease(ctx context.Context, lockKey string, leaseDuration time.Duration) error { + m.renewCalled = true + m.lastLockKey = lockKey + m.lastDuration = leaseDuration + return m.renewError +} + +func TestRenewLockLeaseDuration(t *testing.T) { + ctx := context.Background() + tmpDir, err := os.MkdirTemp(os.TempDir(), "certmagic-test*") + testutil.RequireNoError(t, err, "allocating tmp dir") + defer os.RemoveAll(tmpDir) + + mockStorage := &mockStorageWithLease{ + FileStorage: &FileStorage{Path: tmpDir}, + } + + // Test attempt 0 + cfg := &Config{Logger: defaultTestLogger} + cfg.renewLockLease(ctx, mockStorage, "test-lock", 0) + expected := retryIntervals[0] + DefaultACME.CertObtainTimeout + testutil.RequireEqual(t, expected, mockStorage.lastDuration) + + // Test attempt beyond array bounds + cfg.renewLockLease(ctx, mockStorage, "test-lock", 999) + expected = maxRetryDuration + DefaultACME.CertObtainTimeout + testutil.RequireEqual(t, expected, mockStorage.lastDuration) +} + +// Test that lease renewal works when storage supports it +func TestRenewLockLeaseWithInterface(t *testing.T) { + ctx := context.Background() + tmpDir, err := os.MkdirTemp(os.TempDir(), "certmagic-test*") + testutil.RequireNoError(t, err, "allocating tmp dir") + defer os.RemoveAll(tmpDir) + + mockStorage := &mockStorageWithLease{ + FileStorage: &FileStorage{Path: tmpDir}, + } + + cfg := &Config{Logger: defaultTestLogger} + err = cfg.renewLockLease(ctx, mockStorage, "test-lock", 0) + testutil.RequireNoError(t, err) + + testutil.RequireEqual(t, true, mockStorage.renewCalled) +} + +// Test that no error occurs when storage doesn't support lease renewal +func TestRenewLockLeaseWithoutInterface(t *testing.T) { + ctx := context.Background() + tmpDir, err := os.MkdirTemp(os.TempDir(), "certmagic-test*") + testutil.RequireNoError(t, err, "allocating tmp dir") + defer os.RemoveAll(tmpDir) + + storage := &FileStorage{Path: tmpDir} + + cfg := &Config{Logger: defaultTestLogger} + err = cfg.renewLockLease(ctx, storage, "test-lock", 0) + testutil.RequireNoError(t, err) +} + func mustJSON(val any) []byte { result, err := json.Marshal(val) if err != nil { diff --git a/storage.go b/storage.go index faf73153..fcff7c02 100644 --- a/storage.go +++ b/storage.go @@ -149,6 +149,16 @@ type Locker interface { Unlock(ctx context.Context, name string) error } +// LockLeaseRenewer is an optional interface that can be implemented by a Storage +// implementation to support renewing the lease on a lock. This is useful for +// long-running operations that need to be synchronized across a cluster. +type LockLeaseRenewer interface { + // RenewLockLease renews the lease on the lock for the given lockKey for the + // given leaseDuration. This is used to prevent the lock from being acquired + // by another process. + RenewLockLease(ctx context.Context, lockKey string, leaseDuration time.Duration) error +} + // KeyInfo holds information about a key in storage. // Key and IsTerminal are required; Modified and Size // are optional if the storage implementation is not