From e00252d9d7e8b4287725867124ab13f046cb921f Mon Sep 17 00:00:00 2001 From: deucebucket Date: Mon, 16 Feb 2026 00:18:21 -0600 Subject: [PATCH 1/5] Fix #155 #154: Add API key to /search requests + rate limit handling --- app.py | 57 ++++++++++++++------- library_manager/providers/__init__.py | 4 ++ library_manager/providers/bookdb.py | 27 +++------- library_manager/providers/fingerprint.py | 17 +++++++ library_manager/providers/rate_limiter.py | 62 +++++++++++++++++++++++ templates/history.html | 9 +++- templates/library.html | 9 +++- templates/queue.html | 9 +++- 8 files changed, 148 insertions(+), 46 deletions(-) diff --git a/app.py b/app.py index f215e6c..d144c60 100644 --- a/app.py +++ b/app.py @@ -73,9 +73,10 @@ ) from library_manager.providers import ( rate_limit_wait, is_circuit_open, record_api_failure, record_api_success, + handle_rate_limit_response, API_RATE_LIMITS, API_CIRCUIT_BREAKER, search_audnexus, search_openlibrary, search_google_books, search_hardcover, - BOOKDB_API_URL, BOOKDB_PUBLIC_KEY, + BOOKDB_API_URL, BOOKDB_PUBLIC_KEY, get_signed_headers, search_bookdb as _search_bookdb_raw, identify_audio_with_bookdb, call_ollama as _call_ollama_raw, call_ollama_simple as _call_ollama_simple_raw, get_ollama_models, test_ollama_connection, @@ -3021,7 +3022,7 @@ def group_loose_files(files): def search_bookdb_api(title, author=None, retry_count=0): """ - Search the BookBucket API for a book (public endpoint, no auth needed). + Search the Skaldleita API for a book. Uses Qdrant vector search - fast even with 50M books. Returns dict with author, title, series if found. Filters garbage matches using title similarity. @@ -3040,6 +3041,12 @@ def search_bookdb_api(title, author=None, retry_count=0): rate_limit_wait('bookdb') # 3.6s delay = max 1000/hr, never skips try: + # Build headers with auth (Skaldleita requires auth on all endpoints) + secrets = load_secrets() + api_key = secrets.get('bookdb_api_key') or BOOKDB_PUBLIC_KEY + headers = get_signed_headers() + headers['X-API-Key'] = api_key + # Use longer timeout for cold start (embedding model can take 45-60s to load) # Retry once on timeout for attempt in range(2): @@ -3047,6 +3054,7 @@ def search_bookdb_api(title, author=None, retry_count=0): response = requests.get( f"{BOOKDB_API_URL}/search", params={"q": search_title, "limit": 5}, + headers=headers, timeout=60 if attempt == 0 else 30 ) break @@ -3056,16 +3064,13 @@ def search_bookdb_api(title, author=None, retry_count=0): continue raise - # Handle rate limiting - respect Retry-After header from server - if response.status_code == 429 and retry_count < 3: - retry_after = response.headers.get('Retry-After', '60') - try: - wait_time = min(int(retry_after), 300) # Cap at 5 minutes - except ValueError: - wait_time = 60 * (retry_count + 1) # Fallback: 60s, 120s, 180s - logger.info(f"BookDB API rate limited, waiting {wait_time}s (Retry-After: {retry_after})...") - time.sleep(wait_time) - return search_bookdb_api(title, retry_count + 1) + # Handle rate limiting with exponential backoff + if response.status_code == 429: + rl = handle_rate_limit_response(response, 'bookdb', retry_count) + if rl['should_retry']: + time.sleep(rl['wait_seconds']) + return search_bookdb_api(title, author, retry_count + 1) + return None if response.status_code == 200: results = response.json() @@ -10894,13 +10899,9 @@ def api_abs_remove_exclude(): # ============== MANUAL BOOK MATCHING ============== -# Use the public BookBucket API - same as metadata pipeline -# No API key required - the search endpoints are public - @app.route('/api/search_bookdb') def api_search_bookdb(): - """Search BookBucket for books/series to manually match. - Uses the public /search endpoint - no API key required. + """Search Skaldleita for books/series to manually match. Falls back to Google Books if BookDB is unavailable or returns no results. """ query = request.args.get('q', '').strip() @@ -10950,14 +10951,32 @@ def api_search_bookdb(): if author: params['author'] = author - # Use public /search endpoint (no auth required) + # Build headers with auth (Skaldleita requires auth on all endpoints) + secrets = load_secrets() + sl_api_key = secrets.get('bookdb_api_key') or BOOKDB_PUBLIC_KEY + sl_headers = get_signed_headers() + sl_headers['X-API-Key'] = sl_api_key + if search_type == 'all': endpoint = f"{BOOKDB_API_URL}/search" else: endpoint = f"{BOOKDB_API_URL}/search/{search_type}" # Longer timeout for cold start (embedding model can take 45-60s to load) - resp = requests.get(endpoint, params=params, timeout=60) + resp = requests.get(endpoint, params=params, headers=sl_headers, timeout=60) + + if resp.status_code == 429: + retry_after = resp.headers.get('Retry-After', '60') + try: + wait_seconds = int(retry_after) + except ValueError: + wait_seconds = 60 + return jsonify({ + 'error': f'Skaldleita rate limited. Try again in {wait_seconds}s.', + 'rate_limited': True, + 'retry_after': wait_seconds, + 'results': [] + }), 429 if resp.status_code == 200: results = resp.json() diff --git a/library_manager/providers/__init__.py b/library_manager/providers/__init__.py index a5fa835..514ae04 100644 --- a/library_manager/providers/__init__.py +++ b/library_manager/providers/__init__.py @@ -19,6 +19,7 @@ is_circuit_open, record_api_failure, record_api_success, + handle_rate_limit_response, API_RATE_LIMITS, API_CIRCUIT_BREAKER, ) @@ -29,6 +30,7 @@ from library_manager.providers.bookdb import ( BOOKDB_API_URL, BOOKDB_PUBLIC_KEY, + get_signed_headers, search_bookdb, identify_audio_with_bookdb, ) @@ -82,6 +84,7 @@ 'is_circuit_open', 'record_api_failure', 'record_api_success', + 'handle_rate_limit_response', 'API_RATE_LIMITS', 'API_CIRCUIT_BREAKER', # API providers @@ -92,6 +95,7 @@ # Skaldleita (legacy name: BookDB) 'BOOKDB_API_URL', 'BOOKDB_PUBLIC_KEY', + 'get_signed_headers', 'search_bookdb', 'identify_audio_with_bookdb', # Ollama diff --git a/library_manager/providers/bookdb.py b/library_manager/providers/bookdb.py index 7d593f6..83e7345 100644 --- a/library_manager/providers/bookdb.py +++ b/library_manager/providers/bookdb.py @@ -23,6 +23,7 @@ is_circuit_open, record_api_failure, record_api_success, + handle_rate_limit_response, API_CIRCUIT_BREAKER, ) from library_manager.utils.voice_embedding import ( @@ -148,30 +149,14 @@ def search_bookdb(title, author=None, api_key=None, retry_count=0, bookdb_url=No timeout=10 ) - # Handle rate limiting - respect Retry-After header from server + # Handle rate limiting with exponential backoff if resp.status_code == 429: - # Increment circuit breaker failures - if 'bookdb' in API_CIRCUIT_BREAKER: - cb = API_CIRCUIT_BREAKER['bookdb'] - cb['failures'] = cb.get('failures', 0) + 1 - if cb['failures'] >= cb.get('max_failures', 5): - cb['circuit_open_until'] = time.time() + cb.get('cooldown', 120) - logger.warning(f"Skaldleita: Circuit OPEN after {cb['failures']} rate limits, backing off for {cb['cooldown']}s") - return None - - if retry_count < 2: # Reduced retries since we have circuit breaker now - retry_after = resp.headers.get('Retry-After', '60') - try: - wait_time = min(int(retry_after), 120) # Cap at 2 minutes - except ValueError: - wait_time = 30 * (retry_count + 1) # Fallback: 30s, 60s - logger.info(f"Skaldleita rate limited, waiting {wait_time}s (Retry-After: {retry_after})...") - time.sleep(wait_time) + rl = handle_rate_limit_response(resp, 'bookdb', retry_count) + if rl['should_retry']: + time.sleep(rl['wait_seconds']) return search_bookdb(title, author, api_key, retry_count + 1, bookdb_url, config, data_dir, cache_getter) - else: - logger.warning("Skaldleita rate limited, max retries reached") - return None + return None if resp.status_code != 200: logger.debug(f"Skaldleita returned status {resp.status_code}") diff --git a/library_manager/providers/fingerprint.py b/library_manager/providers/fingerprint.py index 9fb712c..afbd1c8 100644 --- a/library_manager/providers/fingerprint.py +++ b/library_manager/providers/fingerprint.py @@ -18,6 +18,8 @@ import requests +from library_manager.providers.rate_limiter import handle_rate_limit_response + logger = logging.getLogger(__name__) # Skaldleita fingerprint endpoints @@ -142,6 +144,11 @@ def lookup_fingerprint( timeout=10 ) + if response.status_code == 429: + rl = handle_rate_limit_response(response, 'bookdb') + logger.warning(f"[FINGERPRINT] Rate limited (retry_after: {rl['retry_after']})") + return None + if response.status_code == 200: data = response.json() if data.get('match'): @@ -220,6 +227,11 @@ def contribute_fingerprint( timeout=10 ) + if response.status_code == 429: + rl = handle_rate_limit_response(response, 'bookdb') + logger.warning(f"[FINGERPRINT] Contribution rate limited (retry_after: {rl['retry_after']})") + return False + if response.status_code in (200, 201): data = response.json() if data.get('is_new'): @@ -507,6 +519,11 @@ def lookup_narrator( timeout=10 ) + if response.status_code == 429: + handle_rate_limit_response(response, 'bookdb') + logger.warning("[NARRATOR] Lookup rate limited") + return None + if response.status_code == 200: data = response.json() if data.get('match'): diff --git a/library_manager/providers/rate_limiter.py b/library_manager/providers/rate_limiter.py index 479a6fd..53d1c2f 100644 --- a/library_manager/providers/rate_limiter.py +++ b/library_manager/providers/rate_limiter.py @@ -86,6 +86,67 @@ def record_api_success(api_name): API_CIRCUIT_BREAKER[api_name]['failures'] = 0 +def handle_rate_limit_response(response, api_name, retry_count=0, max_retries=2): + """ + Handle a 429 response with exponential backoff and circuit breaker. + + Args: + response: The requests.Response object (must be status 429) + api_name: API name for circuit breaker tracking (e.g. 'bookdb') + retry_count: Current retry attempt (0-based) + max_retries: Maximum number of retries before giving up + + Returns: + dict with: + 'should_retry': bool - whether caller should retry the request + 'wait_seconds': int - how long to wait before retrying (0 if not retrying) + 'circuit_open': bool - whether circuit breaker tripped + 'retry_after': str - raw Retry-After header value + """ + retry_after_raw = response.headers.get('Retry-After', '') + + result = { + 'should_retry': False, + 'wait_seconds': 0, + 'circuit_open': False, + 'retry_after': retry_after_raw, + } + + # Update circuit breaker + record_api_failure(api_name) + + cb = API_CIRCUIT_BREAKER.get(api_name, {}) + if cb.get('circuit_open_until', 0) > time.time(): + result['circuit_open'] = True + logger.warning(f"[RATE LIMIT] {api_name}: Circuit breaker tripped, backing off") + return result + + if retry_count >= max_retries: + logger.warning(f"[RATE LIMIT] {api_name}: Max retries ({max_retries}) reached") + return result + + # Calculate wait time: use Retry-After header, with exponential backoff fallback + try: + wait_time = int(retry_after_raw) if retry_after_raw else 0 + except ValueError: + wait_time = 0 + + if wait_time <= 0: + # Exponential backoff: 30s, 60s, 120s... + wait_time = 30 * (2 ** retry_count) + + # Cap at 5 minutes + wait_time = min(wait_time, 300) + + result['should_retry'] = True + result['wait_seconds'] = wait_time + + logger.info(f"[RATE LIMIT] {api_name}: Rate limited, waiting {wait_time}s " + f"(attempt {retry_count + 1}/{max_retries}, Retry-After: {retry_after_raw or 'none'})") + + return result + + __all__ = [ 'API_RATE_LIMITS', 'API_RATE_LOCK', @@ -94,4 +155,5 @@ def record_api_success(api_name): 'is_circuit_open', 'record_api_failure', 'record_api_success', + 'handle_rate_limit_response', ] diff --git a/templates/history.html b/templates/history.html index c80b8a1..17797bc 100644 --- a/templates/history.html +++ b/templates/history.html @@ -343,8 +343,13 @@ resultsDiv.style.display = 'block'; fetch(`/api/search_bookdb?q=${encodeURIComponent(query)}&limit=20`) - .then(r => r.json()) - .then(data => { + .then(r => r.json().then(data => ({status: r.status, data}))) + .then(({status, data}) => { + if (data.rate_limited) { + resultsList.innerHTML = `
Rate limited. Try again in ${data.retry_after || 60}s.
`; + if (typeof showToast === 'function') showToast(`Skaldleita rate limited. Retry in ${data.retry_after || 60}s.`, 'warning'); + return; + } if (data.error) { resultsList.innerHTML = `
${data.error}
`; return; diff --git a/templates/library.html b/templates/library.html index f498c75..92ec03f 100644 --- a/templates/library.html +++ b/templates/library.html @@ -1256,8 +1256,13 @@ resultsDiv.style.display = 'block'; fetch(`/api/search_bookdb?q=${encodeURIComponent(query)}&limit=20`) - .then(r => r.json()) - .then(data => { + .then(r => r.json().then(data => ({status: r.status, data}))) + .then(({status, data}) => { + if (data.rate_limited) { + resultsList.innerHTML = `
Rate limited. Try again in ${data.retry_after || 60}s.
`; + if (typeof showToast === 'function') showToast(`Skaldleita rate limited. Retry in ${data.retry_after || 60}s.`, 'warning'); + return; + } if (data.error) { resultsList.innerHTML = `
${data.error}
`; return; diff --git a/templates/queue.html b/templates/queue.html index 65ae970..7bf5779 100644 --- a/templates/queue.html +++ b/templates/queue.html @@ -558,8 +558,13 @@