diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6ad1008..74ec034 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,33 @@
All notable changes to Library Manager will be documented in this file.
+## [0.9.0-beta.124] - 2026-02-11
+
+### Added
+
+- **Issue #110 Part 2: Folder triage** - New `library_manager/folder_triage.py` module that
+ categorizes folder names as clean/messy/garbage before processing. Clean folders use path hints
+ normally. Messy folders (scene release tags, torrent markers, quality indicators) skip path
+ parsing and rely on audio/metadata only. Garbage folders (hash names, numbers-only, generic
+ placeholders) also skip path hints and get a confidence penalty. Triage results stored in DB
+ and logged during scans. Integrated into Whisper transcription hints, AI identification
+ prompts, and the processing pipeline queue.
+
+---
+
+## [0.9.0-beta.123] - 2026-02-11
+
+### Added
+
+- **Issue #103: In-app hints and tooltips** - New `library_manager/hints.py` module with contextual
+ documentation for all features and settings. Hover over the (?) icon next to any setting to see a
+ plain-language explanation of what it does. Tooltips added to: all identification layers, AI
+ providers, confidence threshold, trust modes, safety toggles, watch folder, ebook management,
+ metadata embedding, community features, and more. Library page filter chips and action buttons also
+ show helpful tooltips on hover. Users never need to ask "what does this do?" again.
+
+---
+
## [0.9.0-beta.122] - 2026-02-11
### Added
diff --git a/app.py b/app.py
index d31e6f0..71a3244 100644
--- a/app.py
+++ b/app.py
@@ -11,7 +11,7 @@
- Multi-provider AI (Gemini, OpenRouter, Ollama)
"""
-APP_VERSION = "0.9.0-beta.122"
+APP_VERSION = "0.9.0-beta.124"
GITHUB_REPO = "deucebucket/library-manager" # Your GitHub repo
# Versioning Guide:
@@ -111,6 +111,8 @@
get_instance_data,
save_instance_data,
)
+from library_manager.hints import get_all_hints
+from library_manager.folder_triage import triage_folder, triage_book_path, should_use_path_hints, confidence_modifier
# Try to import P2P cache (optional - gracefully degrades if not available)
try:
@@ -3196,31 +3198,39 @@ def search_book_searxng(query, duration_hours=None):
return []
-def calculate_input_quality(folder_name, filenames, info):
+def calculate_input_quality(folder_name, filenames, info, folder_triage='clean'):
"""
Score the quality of input data for AI identification.
Returns a score 0-100 and list of usable clues found.
Low quality inputs (random numbers, 'unknown', no words) should not be
trusted to AI as it will hallucinate famous books.
+
+ Issue #110: folder_triage controls whether folder name is trusted as input.
"""
score = 0
clues = []
- # Check folder name for useful info
- folder_clean = re.sub(r'[_\-\d\.\[\]\(\)]', ' ', folder_name or '').strip()
- words = [w for w in folder_clean.split() if len(w) > 2 and w.lower() not in ('unknown', 'audiobook', 'audio', 'book', 'mp3', 'the', 'and', 'part')]
+ # Issue #110: Only trust folder name for clean folders
+ use_folder = should_use_path_hints(folder_triage)
+
+ if use_folder:
+ # Check folder name for useful info
+ folder_clean = re.sub(r'[_\-\d\.\[\]\(\)]', ' ', folder_name or '').strip()
+ words = [w for w in folder_clean.split() if len(w) > 2 and w.lower() not in ('unknown', 'audiobook', 'audio', 'book', 'mp3', 'the', 'and', 'part')]
- if words:
- score += min(40, len(words) * 10) # Up to 40 points for meaningful words
- clues.append(f"folder_words: {words[:5]}")
+ if words:
+ score += min(40, len(words) * 10) # Up to 40 points for meaningful words
+ clues.append(f"folder_words: {words[:5]}")
- # Check for author-title pattern (e.g., "Author - Title")
- if ' - ' in (folder_name or ''):
- score += 20
- clues.append("has_author_title_separator")
+ # Check for author-title pattern (e.g., "Author - Title")
+ if ' - ' in (folder_name or ''):
+ score += 20
+ clues.append("has_author_title_separator")
+ else:
+ clues.append(f"folder_skipped: triage={folder_triage}")
- # Check metadata tags
+ # Check metadata tags (always trusted regardless of folder triage)
if info.get('title') and info.get('title') not in ('none', 'Unknown', ''):
score += 25
clues.append(f"has_title_tag: {info.get('title')[:30]}")
@@ -3239,6 +3249,12 @@ def calculate_input_quality(folder_name, filenames, info):
score = max(0, score - 50) # Heavy penalty for "unknown_123" type names
clues.append("PENALTY: numeric_garbage_name")
+ # Issue #110: Apply confidence modifier for garbage folders
+ modifier = confidence_modifier(folder_triage)
+ if modifier:
+ score = max(0, score + modifier)
+ clues.append(f"triage_modifier: {modifier}")
+
return min(100, score), clues
@@ -3348,11 +3364,14 @@ def identify_book_with_ai(file_group, config):
info = file_group.get('detected_info', {})
folder_name = file_group.get('folder_name', '')
+ # Issue #110: Determine folder triage for this book
+ ft = file_group.get('folder_triage') or triage_folder(folder_name)
+
# Build context for AI
filenames = [Path(f).name if isinstance(f, str) else f.name for f in files[:20]]
# === HALLUCINATION PREVENTION: Input quality check ===
- input_quality, clues = calculate_input_quality(folder_name, filenames, info)
+ input_quality, clues = calculate_input_quality(folder_name, filenames, info, folder_triage=ft)
if input_quality < 25:
# Input is garbage - don't even try AI, it will hallucinate
@@ -3386,7 +3405,7 @@ def identify_book_with_ai(file_group, config):
- Or are you GUESSING based on a generic title? (If guessing, return null!)
Input information:
-- Folder name: {folder_name}
+- Folder name: {folder_name if should_use_path_hints(ft) else '[UNRELIABLE - ignore folder name]'}
- Files ({len(files)} total): {', '.join(filenames[:10])}{'...' if len(filenames) > 10 else ''}
- Duration: {info.get('duration_hours', 'unknown')} hours
- Album tag: {info.get('title', 'none')}
@@ -4815,6 +4834,7 @@ def deep_scan_library(config):
scanned = 0 # New books added to tracking
queued = 0 # Books added to fix queue
issues_found = {} # path -> list of issues
+ triage_counts = {'clean': 0, 'messy': 0, 'garbage': 0} # Issue #110: Folder triage stats
# Track files for duplicate detection
file_signatures = {} # signature -> list of paths
@@ -4972,6 +4992,8 @@ def deep_scan_library(config):
flat_author, flat_title = extract_author_title(author)
# Issue #132: Resolve path to prevent duplicates
flat_path = str(author_dir.resolve())
+ # Issue #110: Triage folder name quality
+ flat_triage = triage_folder(author)
checked += 1
@@ -4987,13 +5009,15 @@ def deep_scan_library(config):
if has_profile:
continue
flat_book_id = existing_flat['id']
+ c.execute('UPDATE books SET folder_triage = ? WHERE id = ?',
+ (flat_triage, flat_book_id))
else:
- c.execute('''INSERT INTO books (path, current_author, current_title, status)
- VALUES (?, ?, ?, 'pending')''', (flat_path, flat_author, flat_title))
+ c.execute('''INSERT INTO books (path, current_author, current_title, status, folder_triage)
+ VALUES (?, ?, ?, 'pending', ?)''', (flat_path, flat_author, flat_title, flat_triage))
conn.commit()
flat_book_id = c.lastrowid
scanned += 1
- logger.info(f"Added flat book: {flat_author} - {flat_title}")
+ logger.info(f"Added flat book: {flat_author} - {flat_title} (triage: {flat_triage})")
# Queue for processing
c.execute('SELECT id FROM queue WHERE book_id = ?', (flat_book_id,))
@@ -5136,6 +5160,8 @@ def deep_scan_library(config):
continue # No audio files, skip
checked += 1
+ # Issue #110: Triage folder name quality
+ series_book_triage = triage_folder(book_title)
# Check if already tracked
c.execute('SELECT id, status, profile, user_locked FROM books WHERE path = ?', (book_path,))
@@ -5149,9 +5175,11 @@ def deep_scan_library(config):
if has_profile:
continue
book_id = existing_book['id']
+ c.execute('UPDATE books SET folder_triage = ? WHERE id = ?',
+ (series_book_triage, book_id))
else:
- c.execute('''INSERT INTO books (path, current_author, current_title, status)
- VALUES (?, ?, ?, 'pending')''', (book_path, author, book_title))
+ c.execute('''INSERT INTO books (path, current_author, current_title, status, folder_triage)
+ VALUES (?, ?, ?, 'pending', ?)''', (book_path, author, book_title, series_book_triage))
conn.commit()
book_id = c.lastrowid
scanned += 1
@@ -5195,6 +5223,12 @@ def deep_scan_library(config):
# This is a valid book folder - count it
checked += 1
+ # Issue #110: Triage folder name quality
+ folder_triage_result = triage_folder(title)
+ triage_counts[folder_triage_result] = triage_counts.get(folder_triage_result, 0) + 1
+ if folder_triage_result != 'clean':
+ logger.info(f"Folder triage: {folder_triage_result} - {title[:60]}")
+
# Analyze title
title_issues = analyze_title(title, author)
cleaned_title, clean_issues = clean_title(title)
@@ -5257,9 +5291,12 @@ def deep_scan_library(config):
queued += 1
continue
book_id = existing['id']
+ # Update triage for existing books (backfill)
+ c.execute('UPDATE books SET folder_triage = ? WHERE id = ?',
+ (folder_triage_result, book_id))
else:
- c.execute('''INSERT INTO books (path, current_author, current_title, status)
- VALUES (?, ?, ?, 'pending')''', (path, author, title))
+ c.execute('''INSERT INTO books (path, current_author, current_title, status, folder_triage)
+ VALUES (?, ?, ?, 'pending', ?)''', (path, author, title, folder_triage_result))
conn.commit()
book_id = c.lastrowid
scanned += 1
@@ -5319,6 +5356,7 @@ def deep_scan_library(config):
logger.info(f"Scanned: {scanned} new books added to tracking")
logger.info(f"Queued: {queued} books need fixing")
logger.info(f"Already correct: {checked - queued} books")
+ logger.info(f"Folder triage: {triage_counts['clean']} clean, {triage_counts['messy']} messy, {triage_counts['garbage']} garbage")
return checked, scanned, queued
@@ -5512,19 +5550,26 @@ def transcribe_audio_intro(file_path, duration_seconds=45):
initial_prompt = "This is an audiobook introduction. The narrator typically announces the book title, author name, and narrator."
# Add folder hints to the prompt if available
+ # Issue #110: Only use folder hints for clean triage folders
folder_path = Path(file_path).parent
folder_name = folder_path.name
parent_name = folder_path.parent.name if folder_path.parent else ""
- # Extract potential author/title from folder structure for spelling hints
- hints = []
- if parent_name and parent_name not in ['audiobooks', 'Unknown', '']:
- hints.append(parent_name)
- if folder_name and folder_name not in ['audiobooks', 'Unknown', '']:
- hints.append(folder_name)
+ # Check folder triage before trusting folder names as hints
+ folder_triage_result = triage_folder(folder_name)
+
+ if should_use_path_hints(folder_triage_result):
+ # Extract potential author/title from folder structure for spelling hints
+ hints = []
+ if parent_name and parent_name not in ['audiobooks', 'Unknown', '']:
+ hints.append(parent_name)
+ if folder_name and folder_name not in ['audiobooks', 'Unknown', '']:
+ hints.append(folder_name)
- if hints:
- initial_prompt += f" Possible names: {', '.join(hints)}."
+ if hints:
+ initial_prompt += f" Possible names: {', '.join(hints)}."
+ else:
+ logger.info(f"[LAYER 1/AUDIO] Skipping folder hints (triage: {folder_triage_result}): {folder_name[:40]}")
# Transcribe with better settings for accuracy
segments, info = whisper_model.transcribe(
@@ -6761,6 +6806,12 @@ def inject_worker_status():
"""Inject worker_running into all templates automatically."""
return {'worker_running': is_worker_running()}
+
+@app.context_processor
+def inject_hints():
+ """Inject hints dictionary into all templates for tooltips."""
+ return {'hints': get_all_hints()}
+
# ============== ROUTES ==============
@app.route('/')
@@ -9111,7 +9162,8 @@ def build_order_by(sort_cols, default_order):
# Issue #36: Filter out series_folder and multi_book_files - they should never appear in queue
order = build_order_by(QUEUE_SORT_COLS, 'q.priority, q.added_at')
c.execute('''SELECT q.id as queue_id, q.reason, q.added_at, q.priority,
- b.id as book_id, b.path, b.current_author, b.current_title, b.status
+ b.id as book_id, b.path, b.current_author, b.current_title, b.status,
+ b.folder_triage
FROM queue q
JOIN books b ON q.book_id = b.id
WHERE b.status NOT IN ('series_folder', 'multi_book_files', 'verified', 'fixed')
@@ -9128,7 +9180,8 @@ def build_order_by(sort_cols, default_order):
'status': 'in_queue',
'reason': row['reason'],
'priority': row['priority'],
- 'added_at': row['added_at']
+ 'added_at': row['added_at'],
+ 'folder_triage': row['folder_triage'] or 'clean'
})
elif status_filter == 'fixed':
diff --git a/library_manager/database.py b/library_manager/database.py
index 7a176be..eb12340 100644
--- a/library_manager/database.py
+++ b/library_manager/database.py
@@ -126,6 +126,13 @@ def init_db(db_path=None):
except:
pass # Column already exists
+ # Add folder_triage column - categorizes folder name quality (clean/messy/garbage)
+ # Issue #110: Used to decide whether to trust path-derived hints
+ try:
+ c.execute("ALTER TABLE books ADD COLUMN folder_triage TEXT DEFAULT 'clean'")
+ except:
+ pass # Column already exists
+
# Stats table - daily stats
c.execute('''CREATE TABLE IF NOT EXISTS stats (
id INTEGER PRIMARY KEY,
diff --git a/library_manager/folder_triage.py b/library_manager/folder_triage.py
new file mode 100644
index 0000000..bfd103a
--- /dev/null
+++ b/library_manager/folder_triage.py
@@ -0,0 +1,98 @@
+"""
+Folder triage - categorize folder names by cleanliness.
+
+Determines processing strategy per-folder:
+- CLEAN: Use folder name as hints for author/title parsing
+- MESSY: Skip path parsing, rely on audio/metadata only
+- GARBAGE: Skip path parsing, expect harder match, lower confidence
+
+Issue #110 Part 2
+"""
+import os
+import re
+import logging
+
+logger = logging.getLogger(__name__)
+
+# Scene release tags, torrent markers, quality indicators
+MESSY_PATTERNS = [
+ r'\{[a-z]+\}', # {mb}, {cbt}
+ r'\[[A-Z0-9]+\]', # [FLAC], [MP3]
+ r'\([A-Za-z]+\)', # (Thorne), (narrator)
+ r'^\d{4}\s*-', # 2023 -
+ r'\d{2}\.\d{2}\.\d{2}', # 01.10.42
+ r'\d+k\b', # 62k, 128k
+ r'\d+kbps', # 64kbps
+ r'\bHQ\b|\bLQ\b', # Quality markers
+ r'-[A-Z]{2,4}$', # -TEAM suffix (scene release)
+ r'\.com\b', # Website in name
+ r'\bwww\.', # Website prefix
+ r'\b(rip|ripped|scene)\b', # Rip indicators
+ r'\b(x264|aac|mp3|flac|ogg|m4b)\b', # Codec in name
+]
+
+# Completely useless folder names
+GARBAGE_PATTERNS = [
+ r'^[a-f0-9]{12,}$', # Hash-only names (12+ hex chars)
+ r'^[\d\s\-\.]+$', # Numbers only
+ r'^(New Folder|tmp|downloads?|torrents?|audiobooks?|untitled)$',
+ r'^(CD|Disc|Track)\s*\d+$', # Disc/track folders
+ r'^Unknown\s*(Artist|Author|Album)?$', # Generic unknowns
+]
+
+# Compiled patterns for performance (compiled once at import time)
+_MESSY_COMPILED = [re.compile(p, re.IGNORECASE) for p in MESSY_PATTERNS]
+_GARBAGE_COMPILED = [re.compile(p, re.IGNORECASE) for p in GARBAGE_PATTERNS]
+
+
+def triage_folder(folder_name):
+ """
+ Categorize a folder name by cleanliness.
+
+ Returns:
+ 'clean' - Folder name looks like a real author/title
+ 'messy' - Has scene tags or markers but might have useful info
+ 'garbage' - Completely useless (hash, numbers, generic placeholder)
+ """
+ if not folder_name or not folder_name.strip():
+ return 'garbage'
+
+ folder_name = folder_name.strip()
+
+ # Check garbage first (most restrictive)
+ for pattern in _GARBAGE_COMPILED:
+ if pattern.match(folder_name):
+ return 'garbage'
+
+ # Check messy patterns
+ for pattern in _MESSY_COMPILED:
+ if pattern.search(folder_name):
+ return 'messy'
+
+ return 'clean'
+
+
+def triage_book_path(book_path):
+ """
+ Triage the book folder from a full book path.
+
+ For a path like /audiobooks/Author Name/Book Title,
+ triages the immediate parent folder (Book Title).
+
+ Returns:
+ tuple: (triage_result, folder_name)
+ """
+ folder_name = os.path.basename(book_path) if book_path else ''
+ return triage_folder(folder_name), folder_name
+
+
+def should_use_path_hints(triage_result):
+ """Whether path-derived hints should be trusted for this triage category."""
+ return triage_result == 'clean'
+
+
+def confidence_modifier(triage_result):
+ """Confidence adjustment based on folder triage category."""
+ if triage_result == 'garbage':
+ return -10
+ return 0
diff --git a/library_manager/hints.py b/library_manager/hints.py
new file mode 100644
index 0000000..462873f
--- /dev/null
+++ b/library_manager/hints.py
@@ -0,0 +1,118 @@
+"""
+In-app documentation hints for Library Manager.
+Provides contextual help text for UI tooltips and hover explanations.
+"""
+
+HINTS = {
+ # === Identification Layers ===
+ 'layer_1': 'Database Lookups: Searches Skaldleita, Audnexus, OpenLibrary, Google Books, and Hardcover for metadata matches. Free, fast, no API key needed.',
+ 'layer_2': 'AI Verification: When databases return uncertain matches, AI (Gemini, OpenRouter, or Ollama) cross-checks the results. Uses your configured AI provider.',
+ 'layer_3': 'Audio Analysis: Extracts the first 90 seconds of audio to identify the book from narrator intros and title announcements. Can use Skaldleita GPU or your own Gemini API.',
+ 'layer_4': 'Content Analysis: Last resort. Transcribes story text with Whisper and sends it to AI to identify the book. Slowest but catches edge cases other layers miss.',
+
+ # === AI Providers ===
+ 'skaldleita': 'Free GPU-powered audio identification service. Transcribes your audiobook intro and matches it against 50M+ books. Does not use your API quota.',
+ 'gemini': 'Google Gemini AI. Free tier offers 14,400 calls/day with Gemma 3 models. Handles both text verification and native audio analysis.',
+ 'openrouter': 'API gateway to multiple AI models. Free models available (Llama, Gemma). Used as fallback when Gemini is unavailable or for Layer 4 content analysis.',
+ 'ollama': 'Self-hosted AI. Run models locally with no API costs or rate limits. Requires separate Ollama installation.',
+
+ # === Confidence & Verification ===
+ 'confidence_threshold': 'Minimum confidence percentage before a book is considered identified. Higher values mean more certainty but slower processing. Lower values accept weaker matches faster.',
+ 'confidence_percentage': 'How certain the system is about this identification. Built from multiple sources: audio analysis (85 weight), ID3 tags (80), metadata files (75), database lookups (65), AI (60), path analysis (40). Multiple agreeing sources boost confidence.',
+ 'deep_scan_mode': 'Runs ALL enabled identification layers for every book, even if an earlier layer already found a confident match. Slower but more thorough.',
+
+ # === Status Meanings ===
+ 'status_pending': 'A rename has been proposed. Review the suggested author/title and click Apply to rename, or Reject to dismiss.',
+ 'status_verified': 'This book is already in the correct Author/Title folder. No changes needed.',
+ 'status_fixed': 'This book was successfully renamed and moved to its new Author/Title location.',
+ 'status_queued': 'Waiting to be identified. Will be processed automatically when the worker runs, or click Process Queue to start now.',
+ 'status_error': 'Something went wrong during identification or renaming. Check the error message for details.',
+ 'status_attention': 'Could not be auto-identified with enough confidence. Needs manual review - click Edit to set the correct author and title.',
+ 'status_orphan': 'Loose audio files without a proper folder structure. Click Organize to move them into an Author/Title folder.',
+ 'status_locked': 'Protected from automatic changes. Unlock to allow the system to process this book again.',
+ 'status_duplicate': 'Multiple copies of the same book detected in your library.',
+ 'status_reversed': 'Author and title folders appear swapped (e.g., Title/Author instead of Author/Title).',
+
+ # === Settings - Library Tab ===
+ 'library_paths': 'Folders containing your audiobook library. Each path is scanned for book folders. Supports multiple paths (one per line).',
+ 'naming_format': 'How renamed folders are structured. Author/Title works with Audiobookshelf, Plex, and Jellyfin. Custom templates let you include series, narrator, year, and more.',
+ 'series_grouping': 'Groups series books under a shared folder: Author/Series Name/1 - Title. Keeps multi-book series organized together.',
+ 'standardize_initials': 'Normalizes author initials to a consistent format (e.g., "JRR Tolkien" and "J.R.R. Tolkien" both become "J. R. R. Tolkien"). Prevents duplicate author folders.',
+ 'strip_unabridged': 'Removes "(Unabridged)", "[Unabridged]", and similar markers from book titles during rename.',
+ 'multilang_naming': 'Controls how non-English books are named. Native keeps the original language title. Preferred translates to your language. Tagged adds a language indicator.',
+
+ # === Settings - Watch Folder ===
+ 'watch_folder': 'Monitors a folder for new audiobooks and automatically organizes them into your library. Great for processing downloads or imports.',
+ 'watch_interval': 'How often (in seconds) to check the watch folder for new files.',
+ 'watch_min_age': 'Minimum file age before processing. Prevents picking up files still being downloaded or copied.',
+ 'watch_hard_links': 'Use hard links instead of moving files. Only works when watch folder and library are on the same filesystem. Saves disk space during processing.',
+
+ # === Settings - Processing Tab ===
+ 'background_processing': 'Automatically processes queue items without manual intervention. Disable to only process when you click Process Queue.',
+ 'scan_interval': 'Hours between automatic library scans. The system checks for new or changed books at this interval.',
+ 'batch_size': 'Number of books processed in each batch. Higher values process faster but use more API calls at once.',
+ 'max_requests_per_hour': 'Rate limit for API calls. Prevents hitting provider rate limits. Range: 10-500.',
+
+ # === Settings - AI Setup Tab ===
+ 'gemini_api_key': 'Free API key from Google AI Studio (aistudio.google.com). Enables Gemini AI for text verification and audio analysis. 14,400 free calls per day.',
+ 'openrouter_api_key': 'API key from openrouter.ai. Provides access to free AI models as fallback, and enables Layer 4 content analysis.',
+ 'bookdb_api_key': 'Optional Skaldleita API key. Increases your rate limit from 500 to 1000 requests per hour. Free to register.',
+ 'google_books_api_key': 'Optional Google Books API key for higher rate limits on book lookups.',
+ 'ai_provider': 'Which AI to try first for text verification. Falls back to other configured providers automatically if the primary fails.',
+ 'provider_chain': 'Order in which providers are tried. If the first one fails or is unavailable, the next one is used automatically.',
+
+ # === Settings - Safety Tab ===
+ 'auto_fix': 'Automatically applies safe renames without asking. Only applies non-drastic changes (e.g., fixing capitalization). Drastic author changes still require approval.',
+ 'protect_author_changes': 'When the author changes completely (e.g., "Unknown" to "Stephen King"), the fix is sent to Pending for manual review instead of auto-applying.',
+ 'trust_the_process': 'YOLO mode. Auto-applies ALL changes when AI and audio analysis agree, including drastic author changes. No safety net. Back up your library first.',
+ 'skip_confirmations': 'Removes "Are you sure?" popups when clicking Apply, Reject, or Undo. Faster workflow but no second chances.',
+
+ # === Settings - Advanced Tab ===
+ 'metadata_embedding': 'Writes metadata tags (title, author, narrator, series) directly into audio files when fixes are applied. Supports MP3, M4B, FLAC, and Ogg.',
+ 'ebook_management': 'Enables scanning and organizing ebook files (.epub, .mobi, .azw3, .pdf). Can merge ebooks into the same Author/Title folders as audiobooks or keep them separate.',
+ 'isbn_lookup': 'Extracts ISBN from EPUB/PDF metadata for more accurate book matching.',
+ 'error_reporting': 'Shares anonymous error reports to help improve Library Manager. Never includes file paths, API keys, or personal data.',
+ 'community_contributions': 'Shares extracted metadata (author, title, narrator) with other Library Manager users. When 2+ users agree on metadata, it becomes verified for everyone.',
+ 'p2p_cache': 'Shares book lookup results via a decentralized peer-to-peer network. Helps when Skaldleita is temporarily unavailable.',
+ 'language_detection': 'Uses Gemini to detect the spoken language of audiobooks from audio samples.',
+ 'strict_language_matching': 'Only matches books in your preferred language. Prevents cross-language mismatches (e.g., a Russian audiobook matching an English database entry).',
+ 'preserve_original_titles': 'Keeps foreign language titles as-is instead of translating them to your preferred language.',
+ 'deep_verification': 'Re-verifies your entire library against APIs, even books that look correctly named. Use when you suspect misattributed books in an imported collection.',
+
+ # === Trust Mode ===
+ 'sl_trust_full': 'Accepts Skaldleita matches at 80%+ confidence and skips AI verification. Recommended - GPU Whisper with 50M book database is usually accurate.',
+ 'sl_trust_boost': 'Uses Skaldleita results as a strong hint, then verifies with database APIs. Skips AI. Good middle ground.',
+ 'sl_trust_legacy': 'Uses AI to verify uncertain Skaldleita matches. Most thorough but uses more API quota.',
+
+ # === Source Icons ===
+ 'source_bookdb': 'Identified via Skaldleita - GPU-powered audio fingerprinting matched against 50M+ book database.',
+ 'source_audio': 'Identified from audio analysis - narrator intro or title announcement detected.',
+ 'source_ai': 'Verified by AI - an AI model confirmed the identification.',
+ 'source_id3': 'Metadata from embedded ID3/audio tags in the file itself.',
+ 'source_json': 'Metadata from a JSON sidecar file (e.g., metadata.json, info.json).',
+ 'source_path': 'Inferred from the folder path and filename structure.',
+ 'source_googlebooks': 'Matched via Google Books API.',
+ 'source_openlibrary': 'Matched via OpenLibrary API.',
+ 'source_audnexus': 'Matched via Audnexus (Audible metadata).',
+ 'source_hardcover': 'Matched via Hardcover API (indie/modern books).',
+ 'source_user': 'Manually set by user - overrides all other sources.',
+
+ # === Voice ID ===
+ 'voice_id': 'Identifies narrators by voice fingerprint - like Shazam for audiobooks. Builds a community narrator library that improves over time.',
+
+ # === Misc UI ===
+ 'free_badge': 'This feature is completely free - no API key or payment required.',
+ 'uses_tokens_badge': 'This feature uses API calls from your configured provider. Check your provider dashboard for usage.',
+ 'scan_library': 'Scans your library paths for new or changed audiobook folders. Does not process them - just discovers what needs to be identified.',
+ 'process_queue': 'Starts processing all queued books through the identification pipeline (Layer 1 through Layer 4, depending on your settings).',
+}
+
+
+def get_hint(key: str, default: str = '') -> str:
+ """Get a hint by key, returns empty string if not found."""
+ return HINTS.get(key, default)
+
+
+def get_all_hints() -> dict:
+ """Get all hints for template rendering."""
+ return HINTS.copy()
diff --git a/library_manager/pipeline/layer_ai_queue.py b/library_manager/pipeline/layer_ai_queue.py
index cea2803..bae9c70 100644
--- a/library_manager/pipeline/layer_ai_queue.py
+++ b/library_manager/pipeline/layer_ai_queue.py
@@ -125,7 +125,7 @@ def process_queue(
# Process items at specified layer (or layer 4 for folder fallback)
c.execute('''SELECT q.id as queue_id, q.book_id, q.reason,
b.path, b.current_author, b.current_title,
- b.confidence, b.profile
+ b.confidence, b.profile, b.folder_triage
FROM queue q
JOIN books b ON q.book_id = b.id
WHERE b.verification_layer = ?
@@ -137,7 +137,7 @@ def process_queue(
# API disabled - process all queue items directly with AI
c.execute('''SELECT q.id as queue_id, q.book_id, q.reason,
b.path, b.current_author, b.current_title,
- b.confidence, b.profile
+ b.confidence, b.profile, b.folder_triage
FROM queue q
JOIN books b ON q.book_id = b.id
WHERE b.status NOT IN ('verified', 'fixed', 'series_folder', 'multi_book_files', 'needs_attention')
@@ -205,7 +205,18 @@ def process_queue(
return len(garbage_batch), 0 # (processed, fixed)
# Build messy names for AI
- messy_names = [f"{row['current_author']} - {row['current_title']}" for row in batch]
+ # Issue #110: For messy/garbage triage folders, mark the folder name as unreliable
+ messy_names = []
+ for row in batch:
+ triage = row.get('folder_triage') or 'clean'
+ name = f"{row['current_author']} - {row['current_title']}"
+ if triage == 'garbage':
+ name += " [FOLDER NAME UNRELIABLE - use audio/metadata only]"
+ logger.info(f"[{layer_name}] Garbage triage folder, suppressing path hints: {row['current_title'][:40]}")
+ elif triage == 'messy':
+ name += " [FOLDER NAME MAY BE UNRELIABLE]"
+ logger.info(f"[{layer_name}] Messy triage folder: {row['current_title'][:40]}")
+ messy_names.append(name)
logger.info(f"[DEBUG] Processing batch of {len(batch)} items:")
for i, name in enumerate(messy_names):
diff --git a/templates/library.html b/templates/library.html
index 120bc24..f498c75 100644
--- a/templates/library.html
+++ b/templates/library.html
@@ -85,6 +85,61 @@
opacity: 1;
color: #00d9ff;
}
+ .hint-icon {
+ display: inline-flex;
+ align-items: center;
+ justify-content: center;
+ width: 14px;
+ height: 14px;
+ border-radius: 50%;
+ border: 1px solid rgba(0, 217, 255, 0.4);
+ color: rgba(0, 217, 255, 0.6);
+ font-size: 9px;
+ font-weight: bold;
+ font-style: normal;
+ cursor: help;
+ margin-left: 3px;
+ position: relative;
+ vertical-align: middle;
+ line-height: 1;
+ }
+ .hint-icon:hover {
+ border-color: #00d9ff;
+ color: #00d9ff;
+ }
+ .hint-icon .hint-text {
+ display: none;
+ position: absolute;
+ bottom: calc(100% + 8px);
+ left: 50%;
+ transform: translateX(-50%);
+ background: rgba(15, 52, 96, 0.95);
+ border: 1px solid rgba(0, 217, 255, 0.3);
+ color: #eee;
+ padding: 8px 12px;
+ border-radius: 6px;
+ font-size: 0.8rem;
+ font-weight: normal;
+ font-style: normal;
+ line-height: 1.4;
+ white-space: normal;
+ width: 260px;
+ z-index: 1000;
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.4);
+ pointer-events: none;
+ }
+ .hint-icon .hint-text::after {
+ content: '';
+ position: absolute;
+ top: 100%;
+ left: 50%;
+ transform: translateX(-50%);
+ border: 6px solid transparent;
+ border-top-color: rgba(0, 217, 255, 0.3);
+ }
+ .hint-icon:hover .hint-text {
+ display: block;
+ }
@@ -95,42 +150,42 @@
All0
-
+ Pending0
-
+ Orphans0
-
+ Queue0
-
+ Fixed0
-
+ Verified0
-
+ Errors0
-
+ Attention0
-
+ Locked0
@@ -177,10 +232,10 @@