Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,33 @@

All notable changes to Library Manager will be documented in this file.

## [0.9.0-beta.124] - 2026-02-11

### Added

- **Issue #110 Part 2: Folder triage** - New `library_manager/folder_triage.py` module that
categorizes folder names as clean/messy/garbage before processing. Clean folders use path hints
normally. Messy folders (scene release tags, torrent markers, quality indicators) skip path
parsing and rely on audio/metadata only. Garbage folders (hash names, numbers-only, generic
placeholders) also skip path hints and get a confidence penalty. Triage results stored in DB
and logged during scans. Integrated into Whisper transcription hints, AI identification
prompts, and the processing pipeline queue.

---

## [0.9.0-beta.123] - 2026-02-11

### Added

- **Issue #103: In-app hints and tooltips** - New `library_manager/hints.py` module with contextual
documentation for all features and settings. Hover over the (?) icon next to any setting to see a
plain-language explanation of what it does. Tooltips added to: all identification layers, AI
providers, confidence threshold, trust modes, safety toggles, watch folder, ebook management,
metadata embedding, community features, and more. Library page filter chips and action buttons also
show helpful tooltips on hover. Users never need to ask "what does this do?" again.

---

## [0.9.0-beta.122] - 2026-02-11

### Added
Expand Down
117 changes: 85 additions & 32 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
- Multi-provider AI (Gemini, OpenRouter, Ollama)
"""

APP_VERSION = "0.9.0-beta.122"
APP_VERSION = "0.9.0-beta.124"
GITHUB_REPO = "deucebucket/library-manager" # Your GitHub repo

# Versioning Guide:
Expand Down Expand Up @@ -111,6 +111,8 @@
get_instance_data,
save_instance_data,
)
from library_manager.hints import get_all_hints
from library_manager.folder_triage import triage_folder, triage_book_path, should_use_path_hints, confidence_modifier

# Try to import P2P cache (optional - gracefully degrades if not available)
try:
Expand Down Expand Up @@ -698,7 +700,7 @@
try:
with open(ERROR_REPORTS_PATH, 'r') as f:
reports = json.load(f)
except:

Check failure on line 703 in app.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (E722)

app.py:703:13: E722 Do not use bare `except`
reports = []

# Add new report (keep last 100 reports to avoid file bloat)
Expand All @@ -722,7 +724,7 @@
try:
with open(ERROR_REPORTS_PATH, 'r') as f:
return json.load(f)
except:

Check failure on line 727 in app.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (E722)

app.py:727:9: E722 Do not use bare `except`
return []
return []

Expand Down Expand Up @@ -1677,7 +1679,7 @@
continue
result = call_gemini(prompt, merged_config)
if result:
logger.info(f"[PROVIDER CHAIN] Success with gemini")

Check failure on line 1682 in app.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (F541)

app.py:1682:33: F541 f-string without any placeholders
return result

elif provider == 'openrouter':
Expand All @@ -1686,13 +1688,13 @@
continue
result = call_openrouter(prompt, merged_config)
if result:
logger.info(f"[PROVIDER CHAIN] Success with openrouter")

Check failure on line 1691 in app.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (F541)

app.py:1691:33: F541 f-string without any placeholders
return result

elif provider == 'ollama':
result = call_ollama(prompt, merged_config)
if result:
logger.info(f"[PROVIDER CHAIN] Success with ollama")

Check failure on line 1697 in app.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (F541)

app.py:1697:33: F541 f-string without any placeholders
return result

else:
Expand Down Expand Up @@ -1794,7 +1796,7 @@
return result
elif result and result.get('transcript'):
# Got transcript but no match - still useful, return for potential AI fallback
logger.info(f"[AUDIO CHAIN] BookDB returned transcript only")

Check failure on line 1799 in app.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (F541)

app.py:1799:37: F541 f-string without any placeholders
return result
elif result is None and attempt < max_retries - 1:
# Connection might be down, wait and retry
Expand Down Expand Up @@ -2126,11 +2128,11 @@
device = "cuda"
# int8 works on all CUDA devices including GTX 1080 (compute 6.1)
# float16 only works on newer GPUs (compute 7.0+)
logger.info(f"[WHISPER] Using CUDA GPU acceleration (10x faster)")

Check failure on line 2131 in app.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (F541)

app.py:2131:29: F541 f-string without any placeholders
else:
logger.info(f"[WHISPER] Using CPU (no CUDA GPU detected)")

Check failure on line 2133 in app.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (F541)

app.py:2133:29: F541 f-string without any placeholders
except ImportError:
logger.info(f"[WHISPER] Using CPU (ctranslate2 not available)")

Check failure on line 2135 in app.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (F541)

app.py:2135:25: F541 f-string without any placeholders

_whisper_model = WhisperModel(model_name, device=device, compute_type=compute_type)
_whisper_model_name = model_name
Expand Down Expand Up @@ -2337,7 +2339,7 @@
if sample_path and os.path.exists(sample_path):
try:
os.unlink(sample_path)
except:

Check failure on line 2342 in app.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (E722)

app.py:2342:13: E722 Do not use bare `except`
pass

return result
Expand Down Expand Up @@ -3196,31 +3198,39 @@
return []


def calculate_input_quality(folder_name, filenames, info):
def calculate_input_quality(folder_name, filenames, info, folder_triage='clean'):
"""
Score the quality of input data for AI identification.
Returns a score 0-100 and list of usable clues found.

Low quality inputs (random numbers, 'unknown', no words) should not be
trusted to AI as it will hallucinate famous books.

Issue #110: folder_triage controls whether folder name is trusted as input.
"""
score = 0
clues = []

# Check folder name for useful info
folder_clean = re.sub(r'[_\-\d\.\[\]\(\)]', ' ', folder_name or '').strip()
words = [w for w in folder_clean.split() if len(w) > 2 and w.lower() not in ('unknown', 'audiobook', 'audio', 'book', 'mp3', 'the', 'and', 'part')]
# Issue #110: Only trust folder name for clean folders
use_folder = should_use_path_hints(folder_triage)

if use_folder:
# Check folder name for useful info
folder_clean = re.sub(r'[_\-\d\.\[\]\(\)]', ' ', folder_name or '').strip()
words = [w for w in folder_clean.split() if len(w) > 2 and w.lower() not in ('unknown', 'audiobook', 'audio', 'book', 'mp3', 'the', 'and', 'part')]

if words:
score += min(40, len(words) * 10) # Up to 40 points for meaningful words
clues.append(f"folder_words: {words[:5]}")
if words:
score += min(40, len(words) * 10) # Up to 40 points for meaningful words
clues.append(f"folder_words: {words[:5]}")

# Check for author-title pattern (e.g., "Author - Title")
if ' - ' in (folder_name or ''):
score += 20
clues.append("has_author_title_separator")
# Check for author-title pattern (e.g., "Author - Title")
if ' - ' in (folder_name or ''):
score += 20
clues.append("has_author_title_separator")
else:
clues.append(f"folder_skipped: triage={folder_triage}")

# Check metadata tags
# Check metadata tags (always trusted regardless of folder triage)
if info.get('title') and info.get('title') not in ('none', 'Unknown', ''):
score += 25
clues.append(f"has_title_tag: {info.get('title')[:30]}")
Expand All @@ -3239,6 +3249,12 @@
score = max(0, score - 50) # Heavy penalty for "unknown_123" type names
clues.append("PENALTY: numeric_garbage_name")

# Issue #110: Apply confidence modifier for garbage folders
modifier = confidence_modifier(folder_triage)
if modifier:
score = max(0, score + modifier)
clues.append(f"triage_modifier: {modifier}")

return min(100, score), clues


Expand Down Expand Up @@ -3348,11 +3364,14 @@
info = file_group.get('detected_info', {})
folder_name = file_group.get('folder_name', '')

# Issue #110: Determine folder triage for this book
ft = file_group.get('folder_triage') or triage_folder(folder_name)

# Build context for AI
filenames = [Path(f).name if isinstance(f, str) else f.name for f in files[:20]]

# === HALLUCINATION PREVENTION: Input quality check ===
input_quality, clues = calculate_input_quality(folder_name, filenames, info)
input_quality, clues = calculate_input_quality(folder_name, filenames, info, folder_triage=ft)

if input_quality < 25:
# Input is garbage - don't even try AI, it will hallucinate
Expand Down Expand Up @@ -3386,7 +3405,7 @@
- Or are you GUESSING based on a generic title? (If guessing, return null!)

Input information:
- Folder name: {folder_name}
- Folder name: {folder_name if should_use_path_hints(ft) else '[UNRELIABLE - ignore folder name]'}
- Files ({len(files)} total): {', '.join(filenames[:10])}{'...' if len(filenames) > 10 else ''}
- Duration: {info.get('duration_hours', 'unknown')} hours
- Album tag: {info.get('title', 'none')}
Expand Down Expand Up @@ -4815,6 +4834,7 @@
scanned = 0 # New books added to tracking
queued = 0 # Books added to fix queue
issues_found = {} # path -> list of issues
triage_counts = {'clean': 0, 'messy': 0, 'garbage': 0} # Issue #110: Folder triage stats

# Track files for duplicate detection
file_signatures = {} # signature -> list of paths
Expand Down Expand Up @@ -4972,6 +4992,8 @@
flat_author, flat_title = extract_author_title(author)
# Issue #132: Resolve path to prevent duplicates
flat_path = str(author_dir.resolve())
# Issue #110: Triage folder name quality
flat_triage = triage_folder(author)

checked += 1

Expand All @@ -4987,13 +5009,15 @@
if has_profile:
continue
flat_book_id = existing_flat['id']
c.execute('UPDATE books SET folder_triage = ? WHERE id = ?',
(flat_triage, flat_book_id))
else:
c.execute('''INSERT INTO books (path, current_author, current_title, status)
VALUES (?, ?, ?, 'pending')''', (flat_path, flat_author, flat_title))
c.execute('''INSERT INTO books (path, current_author, current_title, status, folder_triage)
VALUES (?, ?, ?, 'pending', ?)''', (flat_path, flat_author, flat_title, flat_triage))
conn.commit()
flat_book_id = c.lastrowid
scanned += 1
logger.info(f"Added flat book: {flat_author} - {flat_title}")
logger.info(f"Added flat book: {flat_author} - {flat_title} (triage: {flat_triage})")

# Queue for processing
c.execute('SELECT id FROM queue WHERE book_id = ?', (flat_book_id,))
Expand Down Expand Up @@ -5136,6 +5160,8 @@
continue # No audio files, skip

checked += 1
# Issue #110: Triage folder name quality
series_book_triage = triage_folder(book_title)

# Check if already tracked
c.execute('SELECT id, status, profile, user_locked FROM books WHERE path = ?', (book_path,))
Expand All @@ -5149,9 +5175,11 @@
if has_profile:
continue
book_id = existing_book['id']
c.execute('UPDATE books SET folder_triage = ? WHERE id = ?',
(series_book_triage, book_id))
else:
c.execute('''INSERT INTO books (path, current_author, current_title, status)
VALUES (?, ?, ?, 'pending')''', (book_path, author, book_title))
c.execute('''INSERT INTO books (path, current_author, current_title, status, folder_triage)
VALUES (?, ?, ?, 'pending', ?)''', (book_path, author, book_title, series_book_triage))
conn.commit()
book_id = c.lastrowid
scanned += 1
Expand Down Expand Up @@ -5195,6 +5223,12 @@
# This is a valid book folder - count it
checked += 1

# Issue #110: Triage folder name quality
folder_triage_result = triage_folder(title)
triage_counts[folder_triage_result] = triage_counts.get(folder_triage_result, 0) + 1
if folder_triage_result != 'clean':
logger.info(f"Folder triage: {folder_triage_result} - {title[:60]}")

# Analyze title
title_issues = analyze_title(title, author)
cleaned_title, clean_issues = clean_title(title)
Expand Down Expand Up @@ -5257,9 +5291,12 @@
queued += 1
continue
book_id = existing['id']
# Update triage for existing books (backfill)
c.execute('UPDATE books SET folder_triage = ? WHERE id = ?',
(folder_triage_result, book_id))
else:
c.execute('''INSERT INTO books (path, current_author, current_title, status)
VALUES (?, ?, ?, 'pending')''', (path, author, title))
c.execute('''INSERT INTO books (path, current_author, current_title, status, folder_triage)
VALUES (?, ?, ?, 'pending', ?)''', (path, author, title, folder_triage_result))
conn.commit()
book_id = c.lastrowid
scanned += 1
Expand Down Expand Up @@ -5319,6 +5356,7 @@
logger.info(f"Scanned: {scanned} new books added to tracking")
logger.info(f"Queued: {queued} books need fixing")
logger.info(f"Already correct: {checked - queued} books")
logger.info(f"Folder triage: {triage_counts['clean']} clean, {triage_counts['messy']} messy, {triage_counts['garbage']} garbage")

return checked, scanned, queued

Expand Down Expand Up @@ -5512,19 +5550,26 @@
initial_prompt = "This is an audiobook introduction. The narrator typically announces the book title, author name, and narrator."

# Add folder hints to the prompt if available
# Issue #110: Only use folder hints for clean triage folders
folder_path = Path(file_path).parent
folder_name = folder_path.name
parent_name = folder_path.parent.name if folder_path.parent else ""

# Extract potential author/title from folder structure for spelling hints
hints = []
if parent_name and parent_name not in ['audiobooks', 'Unknown', '']:
hints.append(parent_name)
if folder_name and folder_name not in ['audiobooks', 'Unknown', '']:
hints.append(folder_name)
# Check folder triage before trusting folder names as hints
folder_triage_result = triage_folder(folder_name)

if should_use_path_hints(folder_triage_result):
# Extract potential author/title from folder structure for spelling hints
hints = []
if parent_name and parent_name not in ['audiobooks', 'Unknown', '']:
hints.append(parent_name)
if folder_name and folder_name not in ['audiobooks', 'Unknown', '']:
hints.append(folder_name)

if hints:
initial_prompt += f" Possible names: {', '.join(hints)}."
if hints:
initial_prompt += f" Possible names: {', '.join(hints)}."
else:
logger.info(f"[LAYER 1/AUDIO] Skipping folder hints (triage: {folder_triage_result}): {folder_name[:40]}")

# Transcribe with better settings for accuracy
segments, info = whisper_model.transcribe(
Expand Down Expand Up @@ -6761,6 +6806,12 @@
"""Inject worker_running into all templates automatically."""
return {'worker_running': is_worker_running()}


@app.context_processor
def inject_hints():
"""Inject hints dictionary into all templates for tooltips."""
return {'hints': get_all_hints()}

# ============== ROUTES ==============

@app.route('/')
Expand Down Expand Up @@ -9111,7 +9162,8 @@
# Issue #36: Filter out series_folder and multi_book_files - they should never appear in queue
order = build_order_by(QUEUE_SORT_COLS, 'q.priority, q.added_at')
c.execute('''SELECT q.id as queue_id, q.reason, q.added_at, q.priority,
b.id as book_id, b.path, b.current_author, b.current_title, b.status
b.id as book_id, b.path, b.current_author, b.current_title, b.status,
b.folder_triage
FROM queue q
JOIN books b ON q.book_id = b.id
WHERE b.status NOT IN ('series_folder', 'multi_book_files', 'verified', 'fixed')
Expand All @@ -9128,7 +9180,8 @@
'status': 'in_queue',
'reason': row['reason'],
'priority': row['priority'],
'added_at': row['added_at']
'added_at': row['added_at'],
'folder_triage': row['folder_triage'] or 'clean'
})

elif status_filter == 'fixed':
Expand Down
7 changes: 7 additions & 0 deletions library_manager/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,13 @@ def init_db(db_path=None):
except:
pass # Column already exists

# Add folder_triage column - categorizes folder name quality (clean/messy/garbage)
# Issue #110: Used to decide whether to trust path-derived hints
try:
c.execute("ALTER TABLE books ADD COLUMN folder_triage TEXT DEFAULT 'clean'")
except:
pass # Column already exists

# Stats table - daily stats
c.execute('''CREATE TABLE IF NOT EXISTS stats (
id INTEGER PRIMARY KEY,
Expand Down
Loading