deucebucket · deucebucket · Feb 11, 2026 · Feb 11, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,33 @@
 
 All notable changes to Library Manager will be documented in this file.
 
+## [0.9.0-beta.124] - 2026-02-11
+
+### Added
+
+- **Issue #110 Part 2: Folder triage** - New `library_manager/folder_triage.py` module that
+  categorizes folder names as clean/messy/garbage before processing. Clean folders use path hints
+  normally. Messy folders (scene release tags, torrent markers, quality indicators) skip path
+  parsing and rely on audio/metadata only. Garbage folders (hash names, numbers-only, generic
+  placeholders) also skip path hints and get a confidence penalty. Triage results stored in DB
+  and logged during scans. Integrated into Whisper transcription hints, AI identification
+  prompts, and the processing pipeline queue.
+
+---
+
+## [0.9.0-beta.123] - 2026-02-11
+
+### Added
+
+- **Issue #103: In-app hints and tooltips** - New `library_manager/hints.py` module with contextual
+  documentation for all features and settings. Hover over the (?) icon next to any setting to see a
+  plain-language explanation of what it does. Tooltips added to: all identification layers, AI
+  providers, confidence threshold, trust modes, safety toggles, watch folder, ebook management,
+  metadata embedding, community features, and more. Library page filter chips and action buttons also
+  show helpful tooltips on hover. Users never need to ask "what does this do?" again.
+
+---
+
 ## [0.9.0-beta.122] - 2026-02-11
 
 ### Added

diff --git a/app.py b/app.py
@@ -11,7 +11,7 @@
 - Multi-provider AI (Gemini, OpenRouter, Ollama)
 """
 
-APP_VERSION = "0.9.0-beta.122"
+APP_VERSION = "0.9.0-beta.124"
 GITHUB_REPO = "deucebucket/library-manager"  # Your GitHub repo
 
 # Versioning Guide:
@@ -111,6 +111,8 @@
     get_instance_data,
     save_instance_data,
 )
+from library_manager.hints import get_all_hints
+from library_manager.folder_triage import triage_folder, triage_book_path, should_use_path_hints, confidence_modifier
 
 # Try to import P2P cache (optional - gracefully degrades if not available)
 try:
@@ -698,7 +700,7 @@
            try:
                with open(ERROR_REPORTS_PATH, 'r') as f:
                    reports = json.load(f)
            except:
                reports = []

        # Add new report (keep last 100 reports to avoid file bloat)
@@ -722,7 +724,7 @@
        try:
            with open(ERROR_REPORTS_PATH, 'r') as f:
                return json.load(f)
        except:
            return []
    return []

@@ -1677,7 +1679,7 @@
                    continue
                result = call_gemini(prompt, merged_config)
                if result:
                    logger.info(f"[PROVIDER CHAIN] Success with gemini")
                    return result

            elif provider == 'openrouter':
@@ -1686,13 +1688,13 @@
                    continue
                result = call_openrouter(prompt, merged_config)
                if result:
                    logger.info(f"[PROVIDER CHAIN] Success with openrouter")
                    return result

            elif provider == 'ollama':
                result = call_ollama(prompt, merged_config)
                if result:
                    logger.info(f"[PROVIDER CHAIN] Success with ollama")
                    return result

            else:
@@ -1794,7 +1796,7 @@
                        return result
                    elif result and result.get('transcript'):
                        # Got transcript but no match - still useful, return for potential AI fallback
                        logger.info(f"[AUDIO CHAIN] BookDB returned transcript only")
                        return result
                    elif result is None and attempt < max_retries - 1:
                        # Connection might be down, wait and retry
@@ -2126,11 +2128,11 @@
                device = "cuda"
                # int8 works on all CUDA devices including GTX 1080 (compute 6.1)
                # float16 only works on newer GPUs (compute 7.0+)
                logger.info(f"[WHISPER] Using CUDA GPU acceleration (10x faster)")
            else:
                logger.info(f"[WHISPER] Using CPU (no CUDA GPU detected)")
        except ImportError:
            logger.info(f"[WHISPER] Using CPU (ctranslate2 not available)")

        _whisper_model = WhisperModel(model_name, device=device, compute_type=compute_type)
        _whisper_model_name = model_name
@@ -2337,7 +2339,7 @@
        if sample_path and os.path.exists(sample_path):
            try:
                os.unlink(sample_path)
            except:
                pass

    return result
@@ -3196,31 +3198,39 @@
     return []
 
 
-def calculate_input_quality(folder_name, filenames, info):
+def calculate_input_quality(folder_name, filenames, info, folder_triage='clean'):
     """
     Score the quality of input data for AI identification.
     Returns a score 0-100 and list of usable clues found.
 
     Low quality inputs (random numbers, 'unknown', no words) should not be
     trusted to AI as it will hallucinate famous books.
+
+    Issue #110: folder_triage controls whether folder name is trusted as input.
     """
     score = 0
     clues = []
 
-    # Check folder name for useful info
-    folder_clean = re.sub(r'[_\-\d\.\[\]\(\)]', ' ', folder_name or '').strip()
-    words = [w for w in folder_clean.split() if len(w) > 2 and w.lower() not in ('unknown', 'audiobook', 'audio', 'book', 'mp3', 'the', 'and', 'part')]
+    # Issue #110: Only trust folder name for clean folders
+    use_folder = should_use_path_hints(folder_triage)
+
+    if use_folder:
+        # Check folder name for useful info
+        folder_clean = re.sub(r'[_\-\d\.\[\]\(\)]', ' ', folder_name or '').strip()
+        words = [w for w in folder_clean.split() if len(w) > 2 and w.lower() not in ('unknown', 'audiobook', 'audio', 'book', 'mp3', 'the', 'and', 'part')]
 
-    if words:
-        score += min(40, len(words) * 10)  # Up to 40 points for meaningful words
-        clues.append(f"folder_words: {words[:5]}")
+        if words:
+            score += min(40, len(words) * 10)  # Up to 40 points for meaningful words
+            clues.append(f"folder_words: {words[:5]}")
 
-    # Check for author-title pattern (e.g., "Author - Title")
-    if ' - ' in (folder_name or ''):
-        score += 20
-        clues.append("has_author_title_separator")
+        # Check for author-title pattern (e.g., "Author - Title")
+        if ' - ' in (folder_name or ''):
+            score += 20
+            clues.append("has_author_title_separator")
+    else:
+        clues.append(f"folder_skipped: triage={folder_triage}")
 
-    # Check metadata tags
+    # Check metadata tags (always trusted regardless of folder triage)
     if info.get('title') and info.get('title') not in ('none', 'Unknown', ''):
         score += 25
         clues.append(f"has_title_tag: {info.get('title')[:30]}")
@@ -3239,6 +3249,12 @@
         score = max(0, score - 50)  # Heavy penalty for "unknown_123" type names
         clues.append("PENALTY: numeric_garbage_name")
 
+    # Issue #110: Apply confidence modifier for garbage folders
+    modifier = confidence_modifier(folder_triage)
+    if modifier:
+        score = max(0, score + modifier)
+        clues.append(f"triage_modifier: {modifier}")
+
     return min(100, score), clues
 
 
@@ -3348,11 +3364,14 @@
     info = file_group.get('detected_info', {})
     folder_name = file_group.get('folder_name', '')
 
+    # Issue #110: Determine folder triage for this book
+    ft = file_group.get('folder_triage') or triage_folder(folder_name)
+
     # Build context for AI
     filenames = [Path(f).name if isinstance(f, str) else f.name for f in files[:20]]
 
     # === HALLUCINATION PREVENTION: Input quality check ===
-    input_quality, clues = calculate_input_quality(folder_name, filenames, info)
+    input_quality, clues = calculate_input_quality(folder_name, filenames, info, folder_triage=ft)
 
     if input_quality < 25:
         # Input is garbage - don't even try AI, it will hallucinate
@@ -3386,7 +3405,7 @@
 - Or are you GUESSING based on a generic title? (If guessing, return null!)
 
 Input information:
-- Folder name: {folder_name}
+- Folder name: {folder_name if should_use_path_hints(ft) else '[UNRELIABLE - ignore folder name]'}
 - Files ({len(files)} total): {', '.join(filenames[:10])}{'...' if len(filenames) > 10 else ''}
 - Duration: {info.get('duration_hours', 'unknown')} hours
 - Album tag: {info.get('title', 'none')}
@@ -4815,6 +4834,7 @@
     scanned = 0  # New books added to tracking
     queued = 0   # Books added to fix queue
     issues_found = {}  # path -> list of issues
+    triage_counts = {'clean': 0, 'messy': 0, 'garbage': 0}  # Issue #110: Folder triage stats
 
     # Track files for duplicate detection
     file_signatures = {}  # signature -> list of paths
@@ -4972,6 +4992,8 @@
                 flat_author, flat_title = extract_author_title(author)
                 # Issue #132: Resolve path to prevent duplicates
                 flat_path = str(author_dir.resolve())
+                # Issue #110: Triage folder name quality
+                flat_triage = triage_folder(author)
 
                 checked += 1
 
@@ -4987,13 +5009,15 @@
                         if has_profile:
                             continue
                     flat_book_id = existing_flat['id']
+                    c.execute('UPDATE books SET folder_triage = ? WHERE id = ?',
+                             (flat_triage, flat_book_id))
                 else:
-                    c.execute('''INSERT INTO books (path, current_author, current_title, status)
-                                 VALUES (?, ?, ?, 'pending')''', (flat_path, flat_author, flat_title))
+                    c.execute('''INSERT INTO books (path, current_author, current_title, status, folder_triage)
+                                 VALUES (?, ?, ?, 'pending', ?)''', (flat_path, flat_author, flat_title, flat_triage))
                     conn.commit()
                     flat_book_id = c.lastrowid
                     scanned += 1
-                    logger.info(f"Added flat book: {flat_author} - {flat_title}")
+                    logger.info(f"Added flat book: {flat_author} - {flat_title} (triage: {flat_triage})")
 
                 # Queue for processing
                 c.execute('SELECT id FROM queue WHERE book_id = ?', (flat_book_id,))
@@ -5136,6 +5160,8 @@
                                 continue  # No audio files, skip
 
                             checked += 1
+                            # Issue #110: Triage folder name quality
+                            series_book_triage = triage_folder(book_title)
 
                             # Check if already tracked
                             c.execute('SELECT id, status, profile, user_locked FROM books WHERE path = ?', (book_path,))
@@ -5149,9 +5175,11 @@
                                     if has_profile:
                                         continue
                                 book_id = existing_book['id']
+                                c.execute('UPDATE books SET folder_triage = ? WHERE id = ?',
+                                         (series_book_triage, book_id))
                             else:
-                                c.execute('''INSERT INTO books (path, current_author, current_title, status)
-                                             VALUES (?, ?, ?, 'pending')''', (book_path, author, book_title))
+                                c.execute('''INSERT INTO books (path, current_author, current_title, status, folder_triage)
+                                             VALUES (?, ?, ?, 'pending', ?)''', (book_path, author, book_title, series_book_triage))
                                 conn.commit()
                                 book_id = c.lastrowid
                                 scanned += 1
@@ -5195,6 +5223,12 @@
                 # This is a valid book folder - count it
                 checked += 1
 
+                # Issue #110: Triage folder name quality
+                folder_triage_result = triage_folder(title)
+                triage_counts[folder_triage_result] = triage_counts.get(folder_triage_result, 0) + 1
+                if folder_triage_result != 'clean':
+                    logger.info(f"Folder triage: {folder_triage_result} - {title[:60]}")
+
                 # Analyze title
                 title_issues = analyze_title(title, author)
                 cleaned_title, clean_issues = clean_title(title)
@@ -5257,9 +5291,12 @@
                             queued += 1
                             continue
                     book_id = existing['id']
+                    # Update triage for existing books (backfill)
+                    c.execute('UPDATE books SET folder_triage = ? WHERE id = ?',
+                             (folder_triage_result, book_id))
                 else:
-                    c.execute('''INSERT INTO books (path, current_author, current_title, status)
-                                 VALUES (?, ?, ?, 'pending')''', (path, author, title))
+                    c.execute('''INSERT INTO books (path, current_author, current_title, status, folder_triage)
+                                 VALUES (?, ?, ?, 'pending', ?)''', (path, author, title, folder_triage_result))
                     conn.commit()
                     book_id = c.lastrowid
                     scanned += 1
@@ -5319,6 +5356,7 @@
     logger.info(f"Scanned: {scanned} new books added to tracking")
     logger.info(f"Queued: {queued} books need fixing")
     logger.info(f"Already correct: {checked - queued} books")
+    logger.info(f"Folder triage: {triage_counts['clean']} clean, {triage_counts['messy']} messy, {triage_counts['garbage']} garbage")
 
     return checked, scanned, queued
 
@@ -5512,19 +5550,26 @@
                 initial_prompt = "This is an audiobook introduction. The narrator typically announces the book title, author name, and narrator."
 
                 # Add folder hints to the prompt if available
+                # Issue #110: Only use folder hints for clean triage folders
                 folder_path = Path(file_path).parent
                 folder_name = folder_path.name
                 parent_name = folder_path.parent.name if folder_path.parent else ""
 
-                # Extract potential author/title from folder structure for spelling hints
-                hints = []
-                if parent_name and parent_name not in ['audiobooks', 'Unknown', '']:
-                    hints.append(parent_name)
-                if folder_name and folder_name not in ['audiobooks', 'Unknown', '']:
-                    hints.append(folder_name)
+                # Check folder triage before trusting folder names as hints
+                folder_triage_result = triage_folder(folder_name)
+
+                if should_use_path_hints(folder_triage_result):
+                    # Extract potential author/title from folder structure for spelling hints
+                    hints = []
+                    if parent_name and parent_name not in ['audiobooks', 'Unknown', '']:
+                        hints.append(parent_name)
+                    if folder_name and folder_name not in ['audiobooks', 'Unknown', '']:
+                        hints.append(folder_name)
 
-                if hints:
-                    initial_prompt += f" Possible names: {', '.join(hints)}."
+                    if hints:
+                        initial_prompt += f" Possible names: {', '.join(hints)}."
+                else:
+                    logger.info(f"[LAYER 1/AUDIO] Skipping folder hints (triage: {folder_triage_result}): {folder_name[:40]}")
 
                 # Transcribe with better settings for accuracy
                 segments, info = whisper_model.transcribe(
@@ -6761,6 +6806,12 @@
     """Inject worker_running into all templates automatically."""
     return {'worker_running': is_worker_running()}
 
+
+@app.context_processor
+def inject_hints():
+    """Inject hints dictionary into all templates for tooltips."""
+    return {'hints': get_all_hints()}
+
 # ============== ROUTES ==============
 
 @app.route('/')
@@ -9111,7 +9162,8 @@
         # Issue #36: Filter out series_folder and multi_book_files - they should never appear in queue
         order = build_order_by(QUEUE_SORT_COLS, 'q.priority, q.added_at')
         c.execute('''SELECT q.id as queue_id, q.reason, q.added_at, q.priority,
-                            b.id as book_id, b.path, b.current_author, b.current_title, b.status
+                            b.id as book_id, b.path, b.current_author, b.current_title, b.status,
+                            b.folder_triage
                      FROM queue q
                      JOIN books b ON q.book_id = b.id
                      WHERE b.status NOT IN ('series_folder', 'multi_book_files', 'verified', 'fixed')
@@ -9128,7 +9180,8 @@
                 'status': 'in_queue',
                 'reason': row['reason'],
                 'priority': row['priority'],
-                'added_at': row['added_at']
+                'added_at': row['added_at'],
+                'folder_triage': row['folder_triage'] or 'clean'
             })
 
     elif status_filter == 'fixed':

diff --git a/library_manager/database.py b/library_manager/database.py
@@ -126,6 +126,13 @@ def init_db(db_path=None):
         except:
             pass  # Column already exists
 
+    # Add folder_triage column - categorizes folder name quality (clean/messy/garbage)
+    # Issue #110: Used to decide whether to trust path-derived hints
+    try:
+        c.execute("ALTER TABLE books ADD COLUMN folder_triage TEXT DEFAULT 'clean'")
+    except:
+        pass  # Column already exists
+
     # Stats table - daily stats
     c.execute('''CREATE TABLE IF NOT EXISTS stats (
         id INTEGER PRIMARY KEY,