From c51b3b6b299d203b6609c7fa0ef9b1a7bc567682 Mon Sep 17 00:00:00 2001 From: Polyscone Date: Wed, 22 Oct 2025 11:52:18 +0900 Subject: [PATCH 1/2] Implement `TryLock` for use with optional tasks like ARI updates to reduce lock contention --- filestorage.go | 51 ++++++++++++++++++++++++++++++++++++++++---------- maintain.go | 12 +++++++++++- storage.go | 32 +++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 11 deletions(-) diff --git a/filestorage.go b/filestorage.go index 78ba2393..77158c1c 100644 --- a/filestorage.go +++ b/filestorage.go @@ -168,9 +168,10 @@ func (s *FileStorage) Filename(key string) string { return filepath.Join(s.Path, filepath.FromSlash(key)) } -// Lock obtains a lock named by the given name. It blocks -// until the lock can be obtained or an error is returned. -func (s *FileStorage) Lock(ctx context.Context, name string) error { +// obtainLock will attempt to obtain a lock for the given name up to the +// number of attempts given. +// if attempts is negative then it will try forever. +func (s *FileStorage) obtainLock(ctx context.Context, name string, attempts int) (bool, error) { filename := s.lockFilename(name) // sometimes the lockfiles read as empty (size 0) - this is either a stale lock or it @@ -179,14 +180,26 @@ func (s *FileStorage) Lock(ctx context.Context, name string) error { var emptyCount int for { + // if attempts is negative then we should allow the loop + // to retry until the context is done, otherwise we decrement + // the remaining attempts if there are any here to ensure we + // don't miss it due to continue statements throughout the loop + switch { + case attempts == 0: + return false, nil + + case attempts > 0: + attempts-- + } + err := createLockfile(filename) if err == nil { // got the lock, yay - return nil + return true, nil } if !os.IsExist(err) { // unexpected error - return fmt.Errorf("creating lock file: %v", err) + return false, fmt.Errorf("creating lock file: %v", err) } // lock file already exists @@ -204,7 +217,7 @@ func (s *FileStorage) Lock(ctx context.Context, name string) error { select { case <-time.After(250 * time.Millisecond): case <-ctx.Done(): - return ctx.Err() + return false, ctx.Err() } continue } else { @@ -215,7 +228,7 @@ func (s *FileStorage) Lock(ctx context.Context, name string) error { defaultLogger.Sugar().Infof("[%s] %s: Empty lockfile (%v) - likely previous process crashed or storage medium failure; treating as stale", s, filename, err2) } } else if err2 != nil { - return fmt.Errorf("decoding lockfile contents: %w", err2) + return false, fmt.Errorf("decoding lockfile contents: %w", err2) } } @@ -226,7 +239,7 @@ func (s *FileStorage) Lock(ctx context.Context, name string) error { case err != nil: // unexpected error - return fmt.Errorf("accessing lock file: %v", err) + return false, fmt.Errorf("accessing lock file: %v", err) case fileLockIsStale(meta): // lock file is stale - delete it and try again to obtain lock @@ -237,7 +250,7 @@ func (s *FileStorage) Lock(ctx context.Context, name string) error { defaultLogger.Sugar().Infof("[%s] Lock for '%s' is stale (created: %s, last update: %s); removing then retrying: %s", s, name, meta.Created, meta.Updated, filename) if err = os.Remove(filename); err != nil { // hopefully we can replace the lock file quickly! if !errors.Is(err, fs.ErrNotExist) { - return fmt.Errorf("unable to delete stale lockfile; deadlocked: %w", err) + return false, fmt.Errorf("unable to delete stale lockfile; deadlocked: %w", err) } } continue @@ -249,12 +262,30 @@ func (s *FileStorage) Lock(ctx context.Context, name string) error { select { case <-time.After(fileLockPollInterval): case <-ctx.Done(): - return ctx.Err() + return false, ctx.Err() } } } } +// Lock obtains a lock named by the given name. It blocks +// until the lock can be obtained or an error is returned. +func (s *FileStorage) Lock(ctx context.Context, name string) error { + ok, err := s.obtainLock(ctx, name, -1) + if !ok && err == nil { + return errors.New("unable to obtain lock") + } + + return err +} + +// TryLock attempts to obtain a lock named by the given name. +// If the lock was obtained it will return true, otherwise it will +// return false along with any errors that may have occurred. +func (s *FileStorage) TryLock(ctx context.Context, name string) (bool, error) { + return s.obtainLock(ctx, name, 2) +} + // Unlock releases the lock for name. func (s *FileStorage) Unlock(_ context.Context, name string) error { return os.Remove(s.lockFilename(name)) diff --git a/maintain.go b/maintain.go index a20a07f5..30de28de 100644 --- a/maintain.go +++ b/maintain.go @@ -468,7 +468,17 @@ func (cfg *Config) updateARI(ctx context.Context, cert Certificate, logger *zap. // synchronize ARI fetching; see #297 lockName := "ari_" + cert.ari.UniqueIdentifier - if err := acquireLock(ctx, cfg.Storage, lockName); err != nil { + if _, ok := cfg.Storage.(TryLocker); ok { + ok, err := tryAcquireLock(ctx, cfg.Storage, lockName) + if err != nil { + return cert, false, fmt.Errorf("unable to obtain ARI lock: %v", err) + } + if !ok { + logger.Debug("attempted to obtain ARI lock but it was already taken") + + return cert, false, nil + } + } else if err := acquireLock(ctx, cfg.Storage, lockName); err != nil { return cert, false, fmt.Errorf("unable to obtain ARI lock: %v", err) } defer func() { diff --git a/storage.go b/storage.go index fcff7c02..a4ae1fc6 100644 --- a/storage.go +++ b/storage.go @@ -16,6 +16,7 @@ package certmagic import ( "context" + "fmt" "path" "regexp" "strings" @@ -149,6 +150,22 @@ type Locker interface { Unlock(ctx context.Context, name string) error } +type TryLocker interface { + // TryLock attempts to acquire the lock for name, and returns a + // boolean that reports whether the lock was successfully aquired + // or not along with any errors that may have occurred. + // + // Implementations should honor context cancellation. + TryLock(ctx context.Context, name string) (bool, error) + + // Unlock releases named lock. This method must ONLY be called + // after a successful call to TryLock, and only after the critical + // section is finished, even if it errored or timed out. Unlock + // cleans up any resources allocated during TryLock. Unlock should + // only return an error if the lock was unable to be released. + Unlock(ctx context.Context, name string) error +} + // LockLeaseRenewer is an optional interface that can be implemented by a Storage // implementation to support renewing the lease on a lock. This is useful for // long-running operations that need to be synchronized across a cluster. @@ -298,6 +315,21 @@ func acquireLock(ctx context.Context, storage Storage, lockKey string) error { return err } +func tryAcquireLock(ctx context.Context, storage Storage, lockKey string) (bool, error) { + locker, ok := storage.(TryLocker) + if !ok { + return false, fmt.Errorf("%T does not implement TryLocker", storage) + } + + ok, err := locker.TryLock(ctx, lockKey) + if ok && err == nil { + locksMu.Lock() + locks[lockKey] = storage + locksMu.Unlock() + } + return ok, err +} + func releaseLock(ctx context.Context, storage Storage, lockKey string) error { err := storage.Unlock(context.WithoutCancel(ctx), lockKey) if err == nil { From cd1f51f5a7a016eece57b2a02cb69fe37f1a532d Mon Sep 17 00:00:00 2001 From: Matt Holt Date: Thu, 23 Oct 2025 13:24:21 -0600 Subject: [PATCH 2/2] Update maintain.go --- maintain.go | 1 - 1 file changed, 1 deletion(-) diff --git a/maintain.go b/maintain.go index 30de28de..bda4a93f 100644 --- a/maintain.go +++ b/maintain.go @@ -475,7 +475,6 @@ func (cfg *Config) updateARI(ctx context.Context, cert Certificate, logger *zap. } if !ok { logger.Debug("attempted to obtain ARI lock but it was already taken") - return cert, false, nil } } else if err := acquireLock(ctx, cfg.Storage, lockName); err != nil {