From 78d4281051f723006a94c563ae7d1b7537438367 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 24 Jan 2026 04:21:22 +0000 Subject: [PATCH 1/4] fix: refine voice resolution and speed validation --- cmd/speak.go | 69 +++++++++++++++++++++++++++++++----------- cmd/speak_test.go | 76 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 122 insertions(+), 23 deletions(-) diff --git a/cmd/speak.go b/cmd/speak.go index d3ad81d..f1cfcaa 100644 --- a/cmd/speak.go +++ b/cmd/speak.go @@ -62,26 +62,24 @@ func init() { return ensureAPIKey() }, RunE: func(cmd *cobra.Command, args []string) error { - if opts.speed <= 0.5 || opts.speed >= 2.0 { - return errors.New("speed must be between 0.5 and 2.0 (e.g. 1.1 for 10% faster)") - } - if opts.rateWPM > 0 { - // Map macOS `say` rate (words per minute) to ElevenLabs speed multiplier. - opts.speed = float64(opts.rateWPM) / float64(defaultWPM) - if opts.speed <= 0.5 || opts.speed >= 2.0 { - return fmt.Errorf("rate %d wpm maps to speed %.2f, which is outside the allowed 0.5–2.0 range", opts.rateWPM, opts.speed) - } + if err := applyRateAndSpeed(&opts); err != nil { + return err } - if opts.voiceID == "" { - opts.voiceID = os.Getenv("ELEVENLABS_VOICE_ID") - } - if opts.voiceID == "" { - opts.voiceID = os.Getenv("SAG_VOICE_ID") + forceVoiceID := cmd.Flags().Changed("voice-id") + voiceInput := opts.voiceID + if voiceInput == "" { + if env := os.Getenv("ELEVENLABS_VOICE_ID"); env != "" { + voiceInput = env + forceVoiceID = true + } else if env := os.Getenv("SAG_VOICE_ID"); env != "" { + voiceInput = env + forceVoiceID = true + } } client := elevenlabs.NewClient(cfg.APIKey, cfg.BaseURL) - voiceID, err := resolveVoice(cmd.Context(), client, opts.voiceID) + voiceID, err := resolveVoice(cmd.Context(), client, voiceInput, forceVoiceID) if err != nil { return err } @@ -172,6 +170,21 @@ func init() { rootCmd.AddCommand(cmd) } +func applyRateAndSpeed(opts *speakOptions) error { + if opts.rateWPM > 0 { + // Map macOS `say` rate (words per minute) to ElevenLabs speed multiplier. + opts.speed = float64(opts.rateWPM) / float64(defaultWPM) + if opts.speed <= 0.5 || opts.speed >= 2.0 { + return fmt.Errorf("rate %d wpm maps to speed %.2f, which is outside the allowed 0.5–2.0 range", opts.rateWPM, opts.speed) + } + return nil + } + if opts.speed <= 0.5 || opts.speed >= 2.0 { + return errors.New("speed must be between 0.5 and 2.0 (e.g. 1.1 for 10% faster)") + } + return nil +} + func buildTTSRequest(cmd *cobra.Command, opts speakOptions, text string) (elevenlabs.TTSRequest, error) { flags := cmd.Flags() @@ -414,7 +427,7 @@ func convertAndPlay(ctx context.Context, client *elevenlabs.Client, opts speakOp return n, nil } -func resolveVoice(ctx context.Context, client *elevenlabs.Client, voiceInput string) (string, error) { +func resolveVoice(ctx context.Context, client *elevenlabs.Client, voiceInput string, forceID bool) (string, error) { voiceInput = strings.TrimSpace(voiceInput) if voiceInput == "" { ctx, cancel := context.WithTimeout(ctx, 30*time.Second) @@ -451,8 +464,24 @@ func resolveVoice(ctx context.Context, client *elevenlabs.Client, voiceInput str return "", nil } - // If input looks like an ID (UUID-like), use directly. - if len(voiceInput) >= 15 && strings.ContainsAny(voiceInput, "0123456789") { + if forceID { + return voiceInput, nil + } + + if looksLikeVoiceID(voiceInput) { + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + voices, err := client.ListVoices(ctx, voiceInput) + if err != nil { + return "", err + } + voiceInputLower := strings.ToLower(voiceInput) + for _, v := range voices { + if strings.ToLower(v.Name) == voiceInputLower { + fmt.Fprintf(os.Stderr, "using voice %s (%s)\n", v.Name, v.VoiceID) + return v.VoiceID, nil + } + } return voiceInput, nil } @@ -477,6 +506,10 @@ func resolveVoice(ctx context.Context, client *elevenlabs.Client, voiceInput str return "", fmt.Errorf("voice %q not found; try 'sag voices' or -v '?'", voiceInput) } +func looksLikeVoiceID(voiceInput string) bool { + return len(voiceInput) >= 15 && !strings.ContainsRune(voiceInput, ' ') +} + func inferFormatFromExt(path string) string { ext := strings.ToLower(filepath.Ext(path)) switch ext { diff --git a/cmd/speak_test.go b/cmd/speak_test.go index 94ef7b4..57a6460 100644 --- a/cmd/speak_test.go +++ b/cmd/speak_test.go @@ -3,6 +3,7 @@ package cmd import ( "context" "io" + "math" "net/http" "net/http/httptest" "os" @@ -116,6 +117,24 @@ func TestResolveTextEmptyFile(t *testing.T) { } } +func TestApplyRateOverridesInvalidSpeed(t *testing.T) { + opts := &speakOptions{speed: 0.3, rateWPM: 200} + if err := applyRateAndSpeed(opts); err != nil { + t.Fatalf("applyRateAndSpeed error: %v", err) + } + want := float64(200) / float64(defaultWPM) + if math.Abs(opts.speed-want) > 1e-9 { + t.Fatalf("expected speed %.2f, got %.2f", want, opts.speed) + } +} + +func TestApplyRateAndSpeedInvalidSpeed(t *testing.T) { + opts := &speakOptions{speed: 0.3} + if err := applyRateAndSpeed(opts); err == nil { + t.Fatalf("expected speed validation error") + } +} + func TestResolveVoiceDefaultsToFirst(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { if _, err := w.Write([]byte(`{"voices":[{"voice_id":"id1","name":"Alpha","category":"premade"},{"voice_id":"id2","name":"Beta","category":"premade"}]}`)); err != nil { @@ -125,7 +144,7 @@ func TestResolveVoiceDefaultsToFirst(t *testing.T) { defer srv.Close() client := elevenlabs.NewClient("key", srv.URL) - id, err := resolveVoice(context.Background(), client, "") + id, err := resolveVoice(context.Background(), client, "", false) if err != nil { t.Fatalf("resolveVoice error: %v", err) } @@ -142,7 +161,7 @@ func TestResolveVoicePassThroughID(t *testing.T) { defer srv.Close() client := elevenlabs.NewClient("key", srv.URL) - id, err := resolveVoice(context.Background(), client, "abc1234567890123") + id, err := resolveVoice(context.Background(), client, "abc1234567890123", true) if err != nil { t.Fatalf("resolveVoice error: %v", err) } @@ -151,6 +170,53 @@ func TestResolveVoicePassThroughID(t *testing.T) { } } +func TestResolveVoiceLongNameExactMatch(t *testing.T) { + var called bool + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + called = true + if _, err := w.Write([]byte(`{"voices":[{"voice_id":"id-long","name":"LongVoiceNameAlpha","category":"premade"}]}`)); err != nil { + t.Fatalf("write response: %v", err) + } + })) + defer srv.Close() + + client := elevenlabs.NewClient("key", srv.URL) + id, err := resolveVoice(context.Background(), client, "LongVoiceNameAlpha", false) + if err != nil { + t.Fatalf("resolveVoice error: %v", err) + } + if !called { + t.Fatalf("expected voice lookup for long name") + } + if id != "id-long" { + t.Fatalf("expected id-long, got %q", id) + } +} + +func TestResolveVoiceLooksLikeIDNoMatchPassesThrough(t *testing.T) { + var called bool + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + called = true + if _, err := w.Write([]byte(`{"voices":[{"voice_id":"id1","name":"Other","category":"premade"}]}`)); err != nil { + t.Fatalf("write response: %v", err) + } + })) + defer srv.Close() + + client := elevenlabs.NewClient("key", srv.URL) + input := "LongVoiceNameAlpha" + id, err := resolveVoice(context.Background(), client, input, false) + if err != nil { + t.Fatalf("resolveVoice error: %v", err) + } + if !called { + t.Fatalf("expected voice lookup for ambiguous input") + } + if id != input { + t.Fatalf("expected %q to pass through, got %q", input, id) + } +} + func TestResolveVoiceClosestMatch(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { if _, err := w.Write([]byte(`{"voices":[{"voice_id":"id1","name":"Near","category":"premade"}]}`)); err != nil { @@ -163,7 +229,7 @@ func TestResolveVoiceClosestMatch(t *testing.T) { defer restore() client := elevenlabs.NewClient("key", srv.URL) - id, err := resolveVoice(context.Background(), client, "nothing-match") + id, err := resolveVoice(context.Background(), client, "nothing-match", false) if err != nil { t.Fatalf("resolveVoice error: %v", err) } @@ -187,7 +253,7 @@ func TestResolveVoiceListOutputsTable(t *testing.T) { defer restore() client := elevenlabs.NewClient("key", srv.URL) - id, err := resolveVoice(context.Background(), client, "?") + id, err := resolveVoice(context.Background(), client, "?", false) if err != nil { t.Fatalf("resolveVoice error: %v", err) } @@ -367,7 +433,7 @@ func TestResolveVoiceByName(t *testing.T) { defer srv.Close() client := elevenlabs.NewClient("key", srv.URL) - id, err := resolveVoice(context.Background(), client, "roger") + id, err := resolveVoice(context.Background(), client, "roger", false) if err != nil { t.Fatalf("resolveVoice error: %v", err) } From 090ad3b8897988cadadf58f438400032b285d4f5 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 24 Jan 2026 04:26:11 +0000 Subject: [PATCH 2/4] fix: tighten voice id heuristic --- cmd/speak.go | 12 ++++++++++++ cmd/speak_test.go | 24 +++++++++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/cmd/speak.go b/cmd/speak.go index f1cfcaa..7babcc8 100644 --- a/cmd/speak.go +++ b/cmd/speak.go @@ -469,6 +469,9 @@ func resolveVoice(ctx context.Context, client *elevenlabs.Client, voiceInput str } if looksLikeVoiceID(voiceInput) { + if containsDigit(voiceInput) { + return voiceInput, nil + } ctx, cancel := context.WithTimeout(ctx, 30*time.Second) defer cancel() voices, err := client.ListVoices(ctx, voiceInput) @@ -510,6 +513,15 @@ func looksLikeVoiceID(voiceInput string) bool { return len(voiceInput) >= 15 && !strings.ContainsRune(voiceInput, ' ') } +func containsDigit(s string) bool { + for _, r := range s { + if r >= '0' && r <= '9' { + return true + } + } + return false +} + func inferFormatFromExt(path string) string { ext := strings.ToLower(filepath.Ext(path)) switch ext { diff --git a/cmd/speak_test.go b/cmd/speak_test.go index 57a6460..443a3f9 100644 --- a/cmd/speak_test.go +++ b/cmd/speak_test.go @@ -153,15 +153,15 @@ func TestResolveVoiceDefaultsToFirst(t *testing.T) { } } -func TestResolveVoicePassThroughID(t *testing.T) { - // Should short-circuit without hitting the server when input looks like an ID. +func TestResolveVoicePassThroughIDWithDigits(t *testing.T) { + // Should short-circuit without hitting the server when input looks like an ID with digits. srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) { t.Fatalf("server should not be called for ID pass-through") })) defer srv.Close() client := elevenlabs.NewClient("key", srv.URL) - id, err := resolveVoice(context.Background(), client, "abc1234567890123", true) + id, err := resolveVoice(context.Background(), client, "abc1234567890123", false) if err != nil { t.Fatalf("resolveVoice error: %v", err) } @@ -170,6 +170,24 @@ func TestResolveVoicePassThroughID(t *testing.T) { } } +func TestResolveVoiceForceIDPassThrough(t *testing.T) { + // Should short-circuit without hitting the server when --voice-id is set. + srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) { + t.Fatalf("server should not be called for forced ID pass-through") + })) + defer srv.Close() + + client := elevenlabs.NewClient("key", srv.URL) + input := "OnlyLettersVoiceID" + id, err := resolveVoice(context.Background(), client, input, true) + if err != nil { + t.Fatalf("resolveVoice error: %v", err) + } + if id != input { + t.Fatalf("expected ID to pass through, got %q", id) + } +} + func TestResolveVoiceLongNameExactMatch(t *testing.T) { var called bool srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { From f6635c1de9eac861a2c2f76cb9b32b9efcc5dcc0 Mon Sep 17 00:00:00 2001 From: Joel Davies <114788106+joelbdavies@users.noreply.github.com> Date: Sun, 18 Jan 2026 21:11:02 +0800 Subject: [PATCH 3/4] fix: use consistent default value for --voice flag The --voice flag was using a variable reference (opts.voiceID) as its default value instead of an empty string literal. This could cause issues with cobra's flag parsing when both --voice-id and --voice flags point to the same variable. Changed to use "" for consistency with the --voice-id flag. Co-Authored-By: Claude Sonnet 4.5 --- cmd/speak.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/speak.go b/cmd/speak.go index 7babcc8..ebf3ca8 100644 --- a/cmd/speak.go +++ b/cmd/speak.go @@ -137,7 +137,7 @@ func init() { } cmd.Flags().StringVar(&opts.voiceID, "voice-id", "", "Voice ID to use (ELEVENLABS_VOICE_ID)") - cmd.Flags().StringVarP(&opts.voiceID, "voice", "v", opts.voiceID, "Alias for --voice-id; accepts name or ID; use '?' to list voices") + cmd.Flags().StringVarP(&opts.voiceID, "voice", "v", "", "Alias for --voice-id; accepts name or ID; use '?' to list voices") cmd.Flags().StringVar(&opts.modelID, "model-id", opts.modelID, "Model ID (default: eleven_v3). Common: eleven_multilingual_v2 (stable), eleven_flash_v2_5 (fast/cheap), eleven_turbo_v2_5 (balanced).") cmd.Flags().StringVarP(&opts.outputPath, "output", "o", "", "Write audio to file (disables playback unless --play is also set)") cmd.Flags().StringVar(&opts.outputFmt, "format", opts.outputFmt, "Output format (e.g. mp3_44100_128)") From 6b744dbed6bb2acb5b3c78b89de428e354136c62 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 24 Jan 2026 04:33:54 +0000 Subject: [PATCH 4/4] fix: update changelog for voice resolution (#7) (thanks @joelbdavies) --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6852e5e..439feca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Changelog ## 0.2.2 - Unreleased +### Fixed +- Voice ID resolution respects `--voice-id` and avoids misclassifying long names; `--rate` now overrides `--speed` validation. (#7, thanks @joelbdavies) ## 0.2.1 - 2026-01-01 ### Fixed