deucebucket · deucebucket · Feb 16, 2026 · Feb 16, 2026 · Feb 16, 2026 · Feb 16, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,20 @@
 
 All notable changes to Library Manager will be documented in this file.
 
+## [0.9.0-beta.126] - 2026-02-16
+
+### Fixed
+
+- **Issue #155: API key not sent on /search requests** - All Skaldleita API endpoints now include
+  authentication headers. GET /search requests were missing the X-API-Key header, causing 403
+  Forbidden errors after Skaldleita added auth requirements to all endpoints.
+- **Issue #154: Rate limit handling** - Centralized rate limit handling in `handle_rate_limit_response()`
+  with exponential backoff (30s/60s/120s), Retry-After header parsing, and circuit breaker
+  integration. Applied to bookdb.py and fingerprint.py providers. Frontend displays rate limit
+  warnings with retry countdown on library, queue, and history pages.
+
+---
+
 ## [0.9.0-beta.125] - 2026-02-14
 
 ### Fixed

diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@
 
 **Smart Audiobook Library Organizer with Multi-Source Metadata & AI Verification**
 
-[![Version](https://img.shields.io/badge/version-0.9.0--beta.125-blue.svg)](CHANGELOG.md)
+[![Version](https://img.shields.io/badge/version-0.9.0--beta.126-blue.svg)](CHANGELOG.md)
 [![Docker](https://img.shields.io/badge/docker-ghcr.io-blue.svg)](https://ghcr.io/deucebucket/library-manager)
 [![License](https://img.shields.io/badge/license-AGPL--3.0-blue.svg)](LICENSE)
 

diff --git a/app.py b/app.py
@@ -11,7 +11,7 @@
 - Multi-provider AI (Gemini, OpenRouter, Ollama)
 """
 
-APP_VERSION = "0.9.0-beta.125"
+APP_VERSION = "0.9.0-beta.126"
 GITHUB_REPO = "deucebucket/library-manager"  # Your GitHub repo
 
 # Versioning Guide:
@@ -73,9 +73,10 @@
 )
 from library_manager.providers import (
     rate_limit_wait, is_circuit_open, record_api_failure, record_api_success,
+    handle_rate_limit_response,
     API_RATE_LIMITS, API_CIRCUIT_BREAKER,
     search_audnexus, search_openlibrary, search_google_books, search_hardcover,
-    BOOKDB_API_URL, BOOKDB_PUBLIC_KEY,
+    BOOKDB_API_URL, BOOKDB_PUBLIC_KEY, get_signed_headers,
     search_bookdb as _search_bookdb_raw, identify_audio_with_bookdb,
     call_ollama as _call_ollama_raw, call_ollama_simple as _call_ollama_simple_raw,
     get_ollama_models, test_ollama_connection,
@@ -700,7 +701,7 @@
            try:
                with open(ERROR_REPORTS_PATH, 'r') as f:
                    reports = json.load(f)
            except:
                reports = []

        # Add new report (keep last 100 reports to avoid file bloat)
@@ -724,7 +725,7 @@
        try:
            with open(ERROR_REPORTS_PATH, 'r') as f:
                return json.load(f)
        except:
            return []
    return []

@@ -1679,7 +1680,7 @@
                    continue
                result = call_gemini(prompt, merged_config)
                if result:
                    logger.info(f"[PROVIDER CHAIN] Success with gemini")
                    return result

            elif provider == 'openrouter':
@@ -1688,13 +1689,13 @@
                    continue
                result = call_openrouter(prompt, merged_config)
                if result:
                    logger.info(f"[PROVIDER CHAIN] Success with openrouter")
                    return result

            elif provider == 'ollama':
                result = call_ollama(prompt, merged_config)
                if result:
                    logger.info(f"[PROVIDER CHAIN] Success with ollama")
                    return result

            else:
@@ -1796,7 +1797,7 @@
                        return result
                    elif result and result.get('transcript'):
                        # Got transcript but no match - still useful, return for potential AI fallback
                        logger.info(f"[AUDIO CHAIN] BookDB returned transcript only")
                        return result
                    elif result is None and attempt < max_retries - 1:
                        # Connection might be down, wait and retry
@@ -2128,11 +2129,11 @@
                device = "cuda"
                # int8 works on all CUDA devices including GTX 1080 (compute 6.1)
                # float16 only works on newer GPUs (compute 7.0+)
                logger.info(f"[WHISPER] Using CUDA GPU acceleration (10x faster)")
            else:
                logger.info(f"[WHISPER] Using CPU (no CUDA GPU detected)")
        except ImportError:
            logger.info(f"[WHISPER] Using CPU (ctranslate2 not available)")

        _whisper_model = WhisperModel(model_name, device=device, compute_type=compute_type)
        _whisper_model_name = model_name
@@ -2339,7 +2340,7 @@
        if sample_path and os.path.exists(sample_path):
            try:
                os.unlink(sample_path)
            except:
                pass

    return result
@@ -3021,7 +3022,7 @@
 
 def search_bookdb_api(title, author=None, retry_count=0):
     """
-    Search the BookBucket API for a book (public endpoint, no auth needed).
+    Search the Skaldleita API for a book.
     Uses Qdrant vector search - fast even with 50M books.
     Returns dict with author, title, series if found.
     Filters garbage matches using title similarity.
@@ -3039,6 +3040,12 @@
 
     rate_limit_wait('bookdb')  # 3.6s delay = max 1000/hr, never skips
 
+    # Build headers with auth (Skaldleita requires auth on all endpoints)
+    secrets = load_secrets()
+    api_key = secrets.get('bookdb_api_key') or BOOKDB_PUBLIC_KEY
+    headers = get_signed_headers() or {}
+    headers['X-API-Key'] = api_key
+
     try:
         # Use longer timeout for cold start (embedding model can take 45-60s to load)
         # Retry once on timeout
@@ -3047,6 +3054,7 @@
                 response = requests.get(
                     f"{BOOKDB_API_URL}/search",
                     params={"q": search_title, "limit": 5},
+                    headers=headers,
                     timeout=60 if attempt == 0 else 30
                 )
                 break
@@ -3056,16 +3064,13 @@
                     continue
                 raise
 
-        # Handle rate limiting - respect Retry-After header from server
-        if response.status_code == 429 and retry_count < 3:
-            retry_after = response.headers.get('Retry-After', '60')
-            try:
-                wait_time = min(int(retry_after), 300)  # Cap at 5 minutes
-            except ValueError:
-                wait_time = 60 * (retry_count + 1)  # Fallback: 60s, 120s, 180s
-            logger.info(f"BookDB API rate limited, waiting {wait_time}s (Retry-After: {retry_after})...")
-            time.sleep(wait_time)
-            return search_bookdb_api(title, retry_count + 1)
+        # Handle rate limiting with exponential backoff
+        if response.status_code == 429:
+            rl = handle_rate_limit_response(response, 'bookdb', retry_count)
+            if rl['should_retry']:
+                time.sleep(rl['wait_seconds'])
+                return search_bookdb_api(title, author, retry_count + 1)
+            return None
 
         if response.status_code == 200:
             results = response.json()
@@ -10894,13 +10899,9 @@
 
 # ============== MANUAL BOOK MATCHING ==============
 
-# Use the public BookBucket API - same as metadata pipeline
-# No API key required - the search endpoints are public
-
 @app.route('/api/search_bookdb')
 def api_search_bookdb():
-    """Search BookBucket for books/series to manually match.
-    Uses the public /search endpoint - no API key required.
+    """Search Skaldleita for books/series to manually match.
     Falls back to Google Books if BookDB is unavailable or returns no results.
     """
     query = request.args.get('q', '').strip()
@@ -10950,14 +10951,32 @@
         if author:
             params['author'] = author
 
-        # Use public /search endpoint (no auth required)
+        # Build headers with auth (Skaldleita requires auth on all endpoints)
+        secrets = load_secrets()
+        api_key = secrets.get('bookdb_api_key') or BOOKDB_PUBLIC_KEY
+        headers = get_signed_headers() or {}
+        headers['X-API-Key'] = api_key
+
         if search_type == 'all':
             endpoint = f"{BOOKDB_API_URL}/search"
         else:
             endpoint = f"{BOOKDB_API_URL}/search/{search_type}"
 
         # Longer timeout for cold start (embedding model can take 45-60s to load)
-        resp = requests.get(endpoint, params=params, timeout=60)
+        resp = requests.get(endpoint, params=params, headers=headers, timeout=60)
+
+        if resp.status_code == 429:
+            retry_after = resp.headers.get('Retry-After', '60')
+            try:
+                wait_seconds = int(retry_after)
+            except ValueError:
+                wait_seconds = 60
+            return jsonify({
+                'error': f'Skaldleita rate limited. Try again in {wait_seconds}s.',
+                'rate_limited': True,
+                'retry_after': wait_seconds,
+                'results': []
+            }), 429
 
         if resp.status_code == 200:
             results = resp.json()

diff --git a/library_manager/providers/__init__.py b/library_manager/providers/__init__.py
@@ -19,6 +19,7 @@
     is_circuit_open,
     record_api_failure,
     record_api_success,
+    handle_rate_limit_response,
     API_RATE_LIMITS,
     API_CIRCUIT_BREAKER,
 )
@@ -29,6 +30,7 @@
 from library_manager.providers.bookdb import (
     BOOKDB_API_URL,
     BOOKDB_PUBLIC_KEY,
+    get_signed_headers,
     search_bookdb,
     identify_audio_with_bookdb,
 )
@@ -82,6 +84,7 @@
     'is_circuit_open',
     'record_api_failure',
     'record_api_success',
+    'handle_rate_limit_response',
     'API_RATE_LIMITS',
     'API_CIRCUIT_BREAKER',
     # API providers
@@ -92,6 +95,7 @@
     # Skaldleita (legacy name: BookDB)
     'BOOKDB_API_URL',
     'BOOKDB_PUBLIC_KEY',
+    'get_signed_headers',
     'search_bookdb',
     'identify_audio_with_bookdb',
     # Ollama

diff --git a/library_manager/providers/bookdb.py b/library_manager/providers/bookdb.py
@@ -23,6 +23,7 @@
     is_circuit_open,
     record_api_failure,
     record_api_success,
+    handle_rate_limit_response,
     API_CIRCUIT_BREAKER,
 )
 from library_manager.utils.voice_embedding import (
@@ -148,30 +149,14 @@ def search_bookdb(title, author=None, api_key=None, retry_count=0, bookdb_url=No
             timeout=10
         )
 
-        # Handle rate limiting - respect Retry-After header from server
+        # Handle rate limiting with exponential backoff
         if resp.status_code == 429:
-            # Increment circuit breaker failures
-            if 'bookdb' in API_CIRCUIT_BREAKER:
-                cb = API_CIRCUIT_BREAKER['bookdb']
-                cb['failures'] = cb.get('failures', 0) + 1
-                if cb['failures'] >= cb.get('max_failures', 5):
-                    cb['circuit_open_until'] = time.time() + cb.get('cooldown', 120)
-                    logger.warning(f"Skaldleita: Circuit OPEN after {cb['failures']} rate limits, backing off for {cb['cooldown']}s")
-                    return None
-
-            if retry_count < 2:  # Reduced retries since we have circuit breaker now
-                retry_after = resp.headers.get('Retry-After', '60')
-                try:
-                    wait_time = min(int(retry_after), 120)  # Cap at 2 minutes
-                except ValueError:
-                    wait_time = 30 * (retry_count + 1)  # Fallback: 30s, 60s
-                logger.info(f"Skaldleita rate limited, waiting {wait_time}s (Retry-After: {retry_after})...")
-                time.sleep(wait_time)
+            rl = handle_rate_limit_response(resp, 'bookdb', retry_count)
+            if rl['should_retry']:
+                time.sleep(rl['wait_seconds'])
                 return search_bookdb(title, author, api_key, retry_count + 1, bookdb_url,
                                      config, data_dir, cache_getter)
-            else:
-                logger.warning("Skaldleita rate limited, max retries reached")
-                return None
+            return None
 
         if resp.status_code != 200:
             logger.debug(f"Skaldleita returned status {resp.status_code}")

diff --git a/library_manager/providers/fingerprint.py b/library_manager/providers/fingerprint.py
@@ -18,6 +18,8 @@
 
 import requests
 
+from library_manager.providers.rate_limiter import handle_rate_limit_response
+
 logger = logging.getLogger(__name__)
 
 # Skaldleita fingerprint endpoints
@@ -142,6 +144,13 @@ def lookup_fingerprint(
             timeout=10
         )
 
+        if response.status_code == 429:
+            # Fingerprint lookups are supplementary - fail fast, don't retry.
+            # The circuit breaker will back off future requests automatically.
+            rl = handle_rate_limit_response(response, 'bookdb')
+            logger.warning(f"[FINGERPRINT] Rate limited (retry_after: {rl['retry_after']})")
+            return None
+
         if response.status_code == 200:
             data = response.json()
             if data.get('match'):
@@ -220,6 +229,12 @@ def contribute_fingerprint(
             timeout=10
         )
 
+        if response.status_code == 429:
+            # Contributions are best-effort - fail fast, don't retry
+            rl = handle_rate_limit_response(response, 'bookdb')
+            logger.warning(f"[FINGERPRINT] Contribution rate limited (retry_after: {rl['retry_after']})")
+            return False
+
         if response.status_code in (200, 201):
             data = response.json()
             if data.get('is_new'):
@@ -507,6 +522,12 @@ def lookup_narrator(
             timeout=10
         )
 
+        if response.status_code == 429:
+            # Narrator lookups are supplementary - fail fast, don't retry
+            handle_rate_limit_response(response, 'bookdb')
+            logger.warning("[NARRATOR] Lookup rate limited")
+            return None
+
         if response.status_code == 200:
             data = response.json()
             if data.get('match'):

diff --git a/library_manager/providers/rate_limiter.py b/library_manager/providers/rate_limiter.py
@@ -86,6 +86,67 @@ def record_api_success(api_name):
         API_CIRCUIT_BREAKER[api_name]['failures'] = 0
 
 
+def handle_rate_limit_response(response, api_name: str, retry_count: int = 0, max_retries: int = 2) -> dict:
+    """
+    Handle a 429 response with exponential backoff and circuit breaker.
+
+    Args:
+        response: The requests.Response object (must be status 429)
+        api_name: API name for circuit breaker tracking (e.g. 'bookdb')
+        retry_count: Current retry attempt (0-based)
+        max_retries: Maximum number of retries before giving up
+
+    Returns:
+        dict with:
+            'should_retry': bool - whether caller should retry the request
+            'wait_seconds': int - how long to wait before retrying (0 if not retrying)
+            'circuit_open': bool - whether circuit breaker tripped
+            'retry_after': str - raw Retry-After header value
+    """
+    retry_after_raw = response.headers.get('Retry-After', '')
+
+    result = {
+        'should_retry': False,
+        'wait_seconds': 0,
+        'circuit_open': False,
+        'retry_after': retry_after_raw,
+    }
+
+    # Update circuit breaker
+    record_api_failure(api_name)
+
+    cb = API_CIRCUIT_BREAKER.get(api_name, {})
+    if cb.get('circuit_open_until', 0) > time.time():
+        result['circuit_open'] = True
+        logger.warning(f"[RATE LIMIT] {api_name}: Circuit breaker tripped, backing off")
+        return result
+
+    if retry_count >= max_retries:
+        logger.warning(f"[RATE LIMIT] {api_name}: Max retries ({max_retries}) reached")
+        return result
+
+    # Calculate wait time: use Retry-After header, with exponential backoff fallback
+    try:
+        wait_time = int(retry_after_raw) if retry_after_raw else 0
+    except ValueError:
+        wait_time = 0
+
+    if wait_time <= 0:
+        # Exponential backoff: 30s, 60s, 120s...
+        wait_time = 30 * (2 ** retry_count)
+
+    # Cap at 5 minutes
+    wait_time = min(wait_time, 300)
+
+    result['should_retry'] = True
+    result['wait_seconds'] = wait_time
+
+    logger.info(f"[RATE LIMIT] {api_name}: Rate limited, waiting {wait_time}s "
+                f"(attempt {retry_count + 1}/{max_retries}, Retry-After: {retry_after_raw or 'none'})")
+
+    return result
+
+
 __all__ = [
     'API_RATE_LIMITS',
     'API_RATE_LOCK',
@@ -94,4 +155,5 @@ def record_api_success(api_name):
     'is_circuit_open',
     'record_api_failure',
     'record_api_success',
+    'handle_rate_limit_response',
 ]
diff --git a/templates/history.html b/templates/history.html
@@ -343,8 +343,13 @@ <h5 class="modal-title"><i class="bi bi-pencil"></i> Edit Book Metadata</h5>
     resultsDiv.style.display = 'block';
 
     fetch(`/api/search_bookdb?q=${encodeURIComponent(query)}&limit=20`)
-        .then(r => r.json())
-        .then(data => {
+        .then(r => r.json().then(data => ({status: r.status, data})))
+        .then(({status, data}) => {
+            if (data.rate_limited) {
+                resultsList.innerHTML = `<div class="list-group-item bg-dark text-warning">Rate limited. Try again in ${data.retry_after || 60}s.</div>`;
+                if (typeof showToast === 'function') showToast(`Skaldleita rate limited. Retry in ${data.retry_after || 60}s.`, 'warning');
+                return;
+            }
             if (data.error) {
                 resultsList.innerHTML = `<div class="list-group-item bg-dark text-danger">${data.error}</div>`;
                 return;

diff --git a/templates/library.html b/templates/library.html
@@ -1256,8 +1256,13 @@ <h5 class="modal-title"><i class="bi bi-pencil"></i> Edit Book Metadata</h5>
     resultsDiv.style.display = 'block';
 
     fetch(`/api/search_bookdb?q=${encodeURIComponent(query)}&limit=20`)
-        .then(r => r.json())
-        .then(data => {
+        .then(r => r.json().then(data => ({status: r.status, data})))
+        .then(({status, data}) => {
+            if (data.rate_limited) {
+                resultsList.innerHTML = `<div class="list-group-item bg-dark text-warning">Rate limited. Try again in ${data.retry_after || 60}s.</div>`;
+                if (typeof showToast === 'function') showToast(`Skaldleita rate limited. Retry in ${data.retry_after || 60}s.`, 'warning');
+                return;
+            }
             if (data.error) {
                 resultsList.innerHTML = `<div class="list-group-item bg-dark text-danger">${data.error}</div>`;
                 return;