From 3e085cacce1ef50bd30177b04be9c6d1005497e5 Mon Sep 17 00:00:00 2001
From: deucebucket <deucebucket@users.noreply.github.com>
Date: Mon, 16 Feb 2026 00:06:38 -0600
Subject: [PATCH 1/4] Fix #127: Use path info to complete partial Skaldleita
 results

When Skaldleita returns a truncated name (e.g., "James S. A" instead of
"James S. A. Corey"), the file path often contains the full name. This
adds _complete_result_from_path() which:

- Completes truncated author names when path has a longer version that
  starts with the SL result
- Completes truncated titles using the same starts-with logic
- Extracts series info from path components when SL returned none
- Gives a small confidence boost when path corroborates SL results
- Never replaces a longer SL result with a shorter path fragment
- Requires minimum 4 characters to avoid false matches on trivial prefixes

Applied at three points in the pipeline:
1. SL requeue with partial ID
2. SL full identification after sanity check
3. AI fallback after sanity check
---
 library_manager/pipeline/layer_audio_id.py | 132 ++++++++++++++++++++-
 1 file changed, 131 insertions(+), 1 deletion(-)

diff --git a/library_manager/pipeline/layer_audio_id.py b/library_manager/pipeline/layer_audio_id.py
index afd2641..36474b0 100644
--- a/library_manager/pipeline/layer_audio_id.py
+++ b/library_manager/pipeline/layer_audio_id.py
@@ -14,6 +14,7 @@
 import json
 import logging
 import os
+import re
 import time
 from datetime import datetime, timedelta
 from pathlib import Path
@@ -146,7 +147,6 @@ def _validate_ai_result_against_path(result: Dict, folder_hint: str, book_path:
 
     # Clean up - remove common noise
     def clean_text(text):
-        import re
         # Remove brackets, hashes, special chars
         text = re.sub(r'\[[^\]]*\]', ' ', text)
         text = re.sub(r'[^a-z0-9\s]', ' ', text)
@@ -188,6 +188,129 @@ def clean_text(text):
     return result
 
 
+def _complete_result_from_path(result: Dict, folder_hint: str, book_path: str) -> Dict:
+    """
+    Use path info to complete partial/truncated Skaldleita results.
+
+    When SL returns a truncated name (e.g., "James S. A" instead of "James S. A. Corey"),
+    the file path often contains the full name. If the path version starts with the SL
+    version, use the longer path version.
+
+    Only completes - never replaces a longer SL result with a shorter path fragment.
+
+    Args:
+        result: The identification result dict (author, title, series, etc.)
+        folder_hint: "current_author - current_title" string from path parsing
+        book_path: Full filesystem path to the book folder
+
+    Returns:
+        The result dict with potentially completed fields.
+    """
+    if not result or not folder_hint:
+        return result
+
+    # Parse the folder hint into author and title components
+    # folder_hint format: "current_author - current_title"
+    hint_parts = folder_hint.split(' - ', 1)
+    path_author = hint_parts[0].strip() if len(hint_parts) >= 1 else ''
+    path_title = hint_parts[1].strip() if len(hint_parts) >= 2 else ''
+
+    def _is_truncated_version(shorter: str, longer: str) -> bool:
+        """Check if 'shorter' is a truncated prefix of 'longer'.
+
+        Returns True if the shorter string is a prefix of the longer string
+        (case-insensitive), and the longer string has meaningful additional content.
+        Requires the shorter version to be at least 4 characters to avoid false
+        matches on trivial prefixes.
+        """
+        if not shorter or not longer:
+            return False
+        s = shorter.strip()
+        l = longer.strip()
+        if len(s) < 4 or len(s) >= len(l):
+            return False
+        return l.lower().startswith(s.lower())
+
+    completed_any = False
+
+    # Complete author if truncated
+    sl_author = result.get('author', '') or ''
+    if sl_author and path_author and _is_truncated_version(sl_author, path_author):
+        logger.info(f"[PATH COMPLETE] Author: '{sl_author}' -> '{path_author}' (path has full name)")
+        result['author'] = path_author
+        result['path_completed_author'] = True
+        completed_any = True
+
+    # Complete title if truncated
+    sl_title = result.get('title', '') or ''
+    if sl_title and path_title and _is_truncated_version(sl_title, path_title):
+        logger.info(f"[PATH COMPLETE] Title: '{sl_title}' -> '{path_title}' (path has full name)")
+        result['title'] = path_title
+        result['path_completed_title'] = True
+        completed_any = True
+
+    # Try to extract series info from path if SL returned none
+    # Path often has patterns like "The Stormlight Archive 01" or "Series Name/Book 01"
+    sl_series = result.get('series', '') or ''
+    if not sl_series and path_title:
+        # Check for series number patterns in the path title
+        # e.g., "The Way of Kings (Stormlight Archive 01)" or "Book Title - Series Name 01"
+        # Look for series patterns in the full path (parent directories)
+        path_obj = Path(book_path) if book_path else None
+        if path_obj:
+            # Check parent directory names for series info not captured by SL
+            # The folder structure is typically: library/Author/SeriesOrTitle/BookTitle
+            # or: library/Author/Title
+            parts = path_obj.parts
+            # Look at path components between author and book for series folders
+            for part in parts:
+                # Match patterns like "Series Name 01" or "Series Name - Book 01"
+                series_match = re.match(
+                    r'^(.+?)\s*[-–]\s*(?:Book\s+)?(\d+(?:\.\d+)?)\s*$',
+                    part, re.IGNORECASE
+                )
+                if not series_match:
+                    series_match = re.match(
+                        r'^(.+?)\s+(\d+(?:\.\d+)?)\s*$',
+                        part, re.IGNORECASE
+                    )
+                if series_match:
+                    potential_series = series_match.group(1).strip()
+                    potential_num = series_match.group(2).strip()
+                    # Only use if the series name is meaningful (not just the title)
+                    if (len(potential_series) >= 3
+                            and potential_series.lower() != sl_title.lower()
+                            and potential_series.lower() != (result.get('author') or '').lower()):
+                        logger.info(f"[PATH COMPLETE] Series: '{potential_series}' #{potential_num} (extracted from path)")
+                        result['series'] = potential_series
+                        result['series_num'] = potential_num
+                        result['path_completed_series'] = True
+                        completed_any = True
+                        break
+
+    # If we completed anything, give a small confidence boost since path corroborates SL
+    if completed_any:
+        raw_conf = result.get('confidence', 0.7)
+        try:
+            if isinstance(raw_conf, str):
+                # String confidence levels - bump up one tier
+                if raw_conf == 'low':
+                    result['confidence'] = 'medium'
+                elif raw_conf == 'medium':
+                    result['confidence'] = 'high'
+                # 'high' stays high
+            elif isinstance(raw_conf, (int, float)):
+                # Numeric confidence - small boost (5%) for path agreement, cap at 0.95
+                if raw_conf <= 1:
+                    result['confidence'] = min(0.95, raw_conf + 0.05)
+                else:
+                    result['confidence'] = min(95, raw_conf + 5)
+        except (ValueError, TypeError):
+            pass  # Leave confidence unchanged if we can't parse it
+
+    return result
+
+
 def process_layer_1_audio(
     config: Dict,
     get_db: Callable,
@@ -444,6 +567,8 @@ def process_layer_1_audio(
                         'sl_source': sl_source,
                         'requeue_suggested': True
                     }
+                    # Issue #127: Complete truncated SL results using path info
+                    result = _complete_result_from_path(result, folder_hint, book_path)
                     # Continue processing - let the normal flow create pending_fix
                     # The requeue flag will be used to schedule a future recheck
                 else:
@@ -475,6 +600,8 @@ def process_layer_1_audio(
                     transcript = bookdb_result.get('transcript')  # Keep transcript for AI
                     result = None  # Clear to trigger AI fallback
                 else:
+                    # Issue #127: Complete truncated SL results using path info
+                    bookdb_result = _complete_result_from_path(bookdb_result, folder_hint, book_path)
                     result = bookdb_result  # Passed sanity check
             else:
                 # Skaldleita didn't get a full match - might have a transcript though
@@ -513,6 +640,9 @@ def process_layer_1_audio(
             # This catches cases where AI completely misparses (e.g., narrator name as author)
             if result:
                 result = _validate_ai_result_against_path(result, folder_hint, book_path)
+                # Issue #127: Complete truncated AI results using path info
+                if result and not result.get('sanity_failed'):
+                    result = _complete_result_from_path(result, folder_hint, book_path)
 
         if result and result.get('author') and result.get('title') and result.get('confidence') != 'none':
             # Got identification from audio!

From ddd1db0b0257e178ef438f120cb8810d6891d904 Mon Sep 17 00:00:00 2001
From: deucebucket <deucebucket@users.noreply.github.com>
Date: Mon, 16 Feb 2026 01:21:33 -0600
Subject: [PATCH 2/4] Address vibe-check review: limit path iteration scope,
 add CHANGELOG

---
 CHANGELOG.md                               | 11 +++++++++++
 library_manager/pipeline/layer_audio_id.py |  7 +++----
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d1a7093..bf38403 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,17 @@
 
 All notable changes to Library Manager will be documented in this file.
 
+## [Unreleased]
+
+### Added
+
+- **Issue #127: Path-based completion for partial results** - When Skaldleita returns truncated
+  names (e.g., "James S. A" instead of "James S. A. Corey"), the system now uses folder path
+  information to complete the full name. Also extracts series information from path structure
+  when missing from audio identification. Requires minimum 4-char prefix match for safety.
+
+---
+
 ## [0.9.0-beta.125] - 2026-02-14
 
 ### Fixed
diff --git a/library_manager/pipeline/layer_audio_id.py b/library_manager/pipeline/layer_audio_id.py
index 36474b0..99976f1 100644
--- a/library_manager/pipeline/layer_audio_id.py
+++ b/library_manager/pipeline/layer_audio_id.py
@@ -259,11 +259,10 @@ def _is_truncated_version(shorter: str, longer: str) -> bool:
         path_obj = Path(book_path) if book_path else None
         if path_obj:
             # Check parent directory names for series info not captured by SL
-            # The folder structure is typically: library/Author/SeriesOrTitle/BookTitle
-            # or: library/Author/Title
+            # Typical structure: library/Author/SeriesOrTitle/BookTitle
+            # Only check the last 3-4 relevant dirs, not filesystem root
             parts = path_obj.parts
-            # Look at path components between author and book for series folders
-            for part in parts:
+            for part in parts[-4:-1]:
                 # Match patterns like "Series Name 01" or "Series Name - Book 01"
                 series_match = re.match(
                     r'^(.+?)\s*[-–]\s*(?:Book\s+)?(\d+(?:\.\d+)?)\s*$',

From 2565924c95b426a070ce253d2c1ad425bd9bcb62 Mon Sep 17 00:00:00 2001
From: deucebucket <deucebucket@users.noreply.github.com>
Date: Mon, 16 Feb 2026 01:24:55 -0600
Subject: [PATCH 3/4] Bump version to beta.127

---
 CHANGELOG.md | 2 +-
 README.md    | 2 +-
 app.py       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bf38403..9c4ed71 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,7 +2,7 @@
 
 All notable changes to Library Manager will be documented in this file.
 
-## [Unreleased]
+## [0.9.0-beta.127] - 2026-02-16
 
 ### Added
 
diff --git a/README.md b/README.md
index 5891303..9719546 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 
 **Smart Audiobook Library Organizer with Multi-Source Metadata & AI Verification**
 
-[![Version](https://img.shields.io/badge/version-0.9.0--beta.125-blue.svg)](CHANGELOG.md)
+[![Version](https://img.shields.io/badge/version-0.9.0--beta.127-blue.svg)](CHANGELOG.md)
 [![Docker](https://img.shields.io/badge/docker-ghcr.io-blue.svg)](https://ghcr.io/deucebucket/library-manager)
 [![License](https://img.shields.io/badge/license-AGPL--3.0-blue.svg)](LICENSE)
 
diff --git a/app.py b/app.py
index f215e6c..f023fee 100644
--- a/app.py
+++ b/app.py
@@ -11,7 +11,7 @@
 - Multi-provider AI (Gemini, OpenRouter, Ollama)
 """
 
-APP_VERSION = "0.9.0-beta.125"
+APP_VERSION = "0.9.0-beta.127"
 GITHUB_REPO = "deucebucket/library-manager"  # Your GitHub repo
 
 # Versioning Guide:

From d9380a6034dcf3c430d209ceb41accc29594dcfe Mon Sep 17 00:00:00 2001
From: deucebucket <deucebucket@users.noreply.github.com>
Date: Mon, 16 Feb 2026 01:27:11 -0600
Subject: [PATCH 4/4] Address vibe-check review: handle numeric string
 confidence, add comments

---
 library_manager/pipeline/layer_audio_id.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/library_manager/pipeline/layer_audio_id.py b/library_manager/pipeline/layer_audio_id.py
index 99976f1..ab16f71 100644
--- a/library_manager/pipeline/layer_audio_id.py
+++ b/library_manager/pipeline/layer_audio_id.py
@@ -276,7 +276,7 @@ def _is_truncated_version(shorter: str, longer: str) -> bool:
                 if series_match:
                     potential_series = series_match.group(1).strip()
                     potential_num = series_match.group(2).strip()
-                    # Only use if the series name is meaningful (not just the title)
+                    # Min length 3 to avoid false matches like "The" or "No"
                     if (len(potential_series) >= 3
                             and potential_series.lower() != sl_title.lower()
                             and potential_series.lower() != (result.get('author') or '').lower()):
@@ -292,12 +292,20 @@ def _is_truncated_version(shorter: str, longer: str) -> bool:
         raw_conf = result.get('confidence', 0.7)
         try:
             if isinstance(raw_conf, str):
-                # String confidence levels - bump up one tier
-                if raw_conf == 'low':
-                    result['confidence'] = 'medium'
-                elif raw_conf == 'medium':
-                    result['confidence'] = 'high'
-                # 'high' stays high
+                # SL can return string levels or numeric strings like "0.7"
+                try:
+                    numeric = float(raw_conf)
+                    if numeric <= 1:
+                        result['confidence'] = min(0.95, numeric + 0.05)
+                    else:
+                        result['confidence'] = min(95, numeric + 5)
+                except ValueError:
+                    # Named confidence levels - bump up one tier
+                    if raw_conf == 'low':
+                        result['confidence'] = 'medium'
+                    elif raw_conf == 'medium':
+                        result['confidence'] = 'high'
+                    # 'high' stays high
             elif isinstance(raw_conf, (int, float)):
                 # Numeric confidence - small boost (5%) for path agreement, cap at 0.95
                 if raw_conf <= 1: