diff --git a/CHANGELOG.md b/CHANGELOG.md index 6852e5e..439feca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Changelog ## 0.2.2 - Unreleased +### Fixed +- Voice ID resolution respects `--voice-id` and avoids misclassifying long names; `--rate` now overrides `--speed` validation. (#7, thanks @joelbdavies) ## 0.2.1 - 2026-01-01 ### Fixed diff --git a/cmd/speak.go b/cmd/speak.go index d3ad81d..ebf3ca8 100644 --- a/cmd/speak.go +++ b/cmd/speak.go @@ -62,26 +62,24 @@ func init() { return ensureAPIKey() }, RunE: func(cmd *cobra.Command, args []string) error { - if opts.speed <= 0.5 || opts.speed >= 2.0 { - return errors.New("speed must be between 0.5 and 2.0 (e.g. 1.1 for 10% faster)") - } - if opts.rateWPM > 0 { - // Map macOS `say` rate (words per minute) to ElevenLabs speed multiplier. - opts.speed = float64(opts.rateWPM) / float64(defaultWPM) - if opts.speed <= 0.5 || opts.speed >= 2.0 { - return fmt.Errorf("rate %d wpm maps to speed %.2f, which is outside the allowed 0.5–2.0 range", opts.rateWPM, opts.speed) - } + if err := applyRateAndSpeed(&opts); err != nil { + return err } - if opts.voiceID == "" { - opts.voiceID = os.Getenv("ELEVENLABS_VOICE_ID") - } - if opts.voiceID == "" { - opts.voiceID = os.Getenv("SAG_VOICE_ID") + forceVoiceID := cmd.Flags().Changed("voice-id") + voiceInput := opts.voiceID + if voiceInput == "" { + if env := os.Getenv("ELEVENLABS_VOICE_ID"); env != "" { + voiceInput = env + forceVoiceID = true + } else if env := os.Getenv("SAG_VOICE_ID"); env != "" { + voiceInput = env + forceVoiceID = true + } } client := elevenlabs.NewClient(cfg.APIKey, cfg.BaseURL) - voiceID, err := resolveVoice(cmd.Context(), client, opts.voiceID) + voiceID, err := resolveVoice(cmd.Context(), client, voiceInput, forceVoiceID) if err != nil { return err } @@ -139,7 +137,7 @@ func init() { } cmd.Flags().StringVar(&opts.voiceID, "voice-id", "", "Voice ID to use (ELEVENLABS_VOICE_ID)") - cmd.Flags().StringVarP(&opts.voiceID, "voice", "v", opts.voiceID, "Alias for --voice-id; accepts name or ID; use '?' to list voices") + cmd.Flags().StringVarP(&opts.voiceID, "voice", "v", "", "Alias for --voice-id; accepts name or ID; use '?' to list voices") cmd.Flags().StringVar(&opts.modelID, "model-id", opts.modelID, "Model ID (default: eleven_v3). Common: eleven_multilingual_v2 (stable), eleven_flash_v2_5 (fast/cheap), eleven_turbo_v2_5 (balanced).") cmd.Flags().StringVarP(&opts.outputPath, "output", "o", "", "Write audio to file (disables playback unless --play is also set)") cmd.Flags().StringVar(&opts.outputFmt, "format", opts.outputFmt, "Output format (e.g. mp3_44100_128)") @@ -172,6 +170,21 @@ func init() { rootCmd.AddCommand(cmd) } +func applyRateAndSpeed(opts *speakOptions) error { + if opts.rateWPM > 0 { + // Map macOS `say` rate (words per minute) to ElevenLabs speed multiplier. + opts.speed = float64(opts.rateWPM) / float64(defaultWPM) + if opts.speed <= 0.5 || opts.speed >= 2.0 { + return fmt.Errorf("rate %d wpm maps to speed %.2f, which is outside the allowed 0.5–2.0 range", opts.rateWPM, opts.speed) + } + return nil + } + if opts.speed <= 0.5 || opts.speed >= 2.0 { + return errors.New("speed must be between 0.5 and 2.0 (e.g. 1.1 for 10% faster)") + } + return nil +} + func buildTTSRequest(cmd *cobra.Command, opts speakOptions, text string) (elevenlabs.TTSRequest, error) { flags := cmd.Flags() @@ -414,7 +427,7 @@ func convertAndPlay(ctx context.Context, client *elevenlabs.Client, opts speakOp return n, nil } -func resolveVoice(ctx context.Context, client *elevenlabs.Client, voiceInput string) (string, error) { +func resolveVoice(ctx context.Context, client *elevenlabs.Client, voiceInput string, forceID bool) (string, error) { voiceInput = strings.TrimSpace(voiceInput) if voiceInput == "" { ctx, cancel := context.WithTimeout(ctx, 30*time.Second) @@ -451,8 +464,27 @@ func resolveVoice(ctx context.Context, client *elevenlabs.Client, voiceInput str return "", nil } - // If input looks like an ID (UUID-like), use directly. - if len(voiceInput) >= 15 && strings.ContainsAny(voiceInput, "0123456789") { + if forceID { + return voiceInput, nil + } + + if looksLikeVoiceID(voiceInput) { + if containsDigit(voiceInput) { + return voiceInput, nil + } + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + voices, err := client.ListVoices(ctx, voiceInput) + if err != nil { + return "", err + } + voiceInputLower := strings.ToLower(voiceInput) + for _, v := range voices { + if strings.ToLower(v.Name) == voiceInputLower { + fmt.Fprintf(os.Stderr, "using voice %s (%s)\n", v.Name, v.VoiceID) + return v.VoiceID, nil + } + } return voiceInput, nil } @@ -477,6 +509,19 @@ func resolveVoice(ctx context.Context, client *elevenlabs.Client, voiceInput str return "", fmt.Errorf("voice %q not found; try 'sag voices' or -v '?'", voiceInput) } +func looksLikeVoiceID(voiceInput string) bool { + return len(voiceInput) >= 15 && !strings.ContainsRune(voiceInput, ' ') +} + +func containsDigit(s string) bool { + for _, r := range s { + if r >= '0' && r <= '9' { + return true + } + } + return false +} + func inferFormatFromExt(path string) string { ext := strings.ToLower(filepath.Ext(path)) switch ext { diff --git a/cmd/speak_test.go b/cmd/speak_test.go index 94ef7b4..443a3f9 100644 --- a/cmd/speak_test.go +++ b/cmd/speak_test.go @@ -3,6 +3,7 @@ package cmd import ( "context" "io" + "math" "net/http" "net/http/httptest" "os" @@ -116,6 +117,24 @@ func TestResolveTextEmptyFile(t *testing.T) { } } +func TestApplyRateOverridesInvalidSpeed(t *testing.T) { + opts := &speakOptions{speed: 0.3, rateWPM: 200} + if err := applyRateAndSpeed(opts); err != nil { + t.Fatalf("applyRateAndSpeed error: %v", err) + } + want := float64(200) / float64(defaultWPM) + if math.Abs(opts.speed-want) > 1e-9 { + t.Fatalf("expected speed %.2f, got %.2f", want, opts.speed) + } +} + +func TestApplyRateAndSpeedInvalidSpeed(t *testing.T) { + opts := &speakOptions{speed: 0.3} + if err := applyRateAndSpeed(opts); err == nil { + t.Fatalf("expected speed validation error") + } +} + func TestResolveVoiceDefaultsToFirst(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { if _, err := w.Write([]byte(`{"voices":[{"voice_id":"id1","name":"Alpha","category":"premade"},{"voice_id":"id2","name":"Beta","category":"premade"}]}`)); err != nil { @@ -125,7 +144,7 @@ func TestResolveVoiceDefaultsToFirst(t *testing.T) { defer srv.Close() client := elevenlabs.NewClient("key", srv.URL) - id, err := resolveVoice(context.Background(), client, "") + id, err := resolveVoice(context.Background(), client, "", false) if err != nil { t.Fatalf("resolveVoice error: %v", err) } @@ -134,15 +153,15 @@ func TestResolveVoiceDefaultsToFirst(t *testing.T) { } } -func TestResolveVoicePassThroughID(t *testing.T) { - // Should short-circuit without hitting the server when input looks like an ID. +func TestResolveVoicePassThroughIDWithDigits(t *testing.T) { + // Should short-circuit without hitting the server when input looks like an ID with digits. srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) { t.Fatalf("server should not be called for ID pass-through") })) defer srv.Close() client := elevenlabs.NewClient("key", srv.URL) - id, err := resolveVoice(context.Background(), client, "abc1234567890123") + id, err := resolveVoice(context.Background(), client, "abc1234567890123", false) if err != nil { t.Fatalf("resolveVoice error: %v", err) } @@ -151,6 +170,71 @@ func TestResolveVoicePassThroughID(t *testing.T) { } } +func TestResolveVoiceForceIDPassThrough(t *testing.T) { + // Should short-circuit without hitting the server when --voice-id is set. + srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) { + t.Fatalf("server should not be called for forced ID pass-through") + })) + defer srv.Close() + + client := elevenlabs.NewClient("key", srv.URL) + input := "OnlyLettersVoiceID" + id, err := resolveVoice(context.Background(), client, input, true) + if err != nil { + t.Fatalf("resolveVoice error: %v", err) + } + if id != input { + t.Fatalf("expected ID to pass through, got %q", id) + } +} + +func TestResolveVoiceLongNameExactMatch(t *testing.T) { + var called bool + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + called = true + if _, err := w.Write([]byte(`{"voices":[{"voice_id":"id-long","name":"LongVoiceNameAlpha","category":"premade"}]}`)); err != nil { + t.Fatalf("write response: %v", err) + } + })) + defer srv.Close() + + client := elevenlabs.NewClient("key", srv.URL) + id, err := resolveVoice(context.Background(), client, "LongVoiceNameAlpha", false) + if err != nil { + t.Fatalf("resolveVoice error: %v", err) + } + if !called { + t.Fatalf("expected voice lookup for long name") + } + if id != "id-long" { + t.Fatalf("expected id-long, got %q", id) + } +} + +func TestResolveVoiceLooksLikeIDNoMatchPassesThrough(t *testing.T) { + var called bool + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + called = true + if _, err := w.Write([]byte(`{"voices":[{"voice_id":"id1","name":"Other","category":"premade"}]}`)); err != nil { + t.Fatalf("write response: %v", err) + } + })) + defer srv.Close() + + client := elevenlabs.NewClient("key", srv.URL) + input := "LongVoiceNameAlpha" + id, err := resolveVoice(context.Background(), client, input, false) + if err != nil { + t.Fatalf("resolveVoice error: %v", err) + } + if !called { + t.Fatalf("expected voice lookup for ambiguous input") + } + if id != input { + t.Fatalf("expected %q to pass through, got %q", input, id) + } +} + func TestResolveVoiceClosestMatch(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { if _, err := w.Write([]byte(`{"voices":[{"voice_id":"id1","name":"Near","category":"premade"}]}`)); err != nil { @@ -163,7 +247,7 @@ func TestResolveVoiceClosestMatch(t *testing.T) { defer restore() client := elevenlabs.NewClient("key", srv.URL) - id, err := resolveVoice(context.Background(), client, "nothing-match") + id, err := resolveVoice(context.Background(), client, "nothing-match", false) if err != nil { t.Fatalf("resolveVoice error: %v", err) } @@ -187,7 +271,7 @@ func TestResolveVoiceListOutputsTable(t *testing.T) { defer restore() client := elevenlabs.NewClient("key", srv.URL) - id, err := resolveVoice(context.Background(), client, "?") + id, err := resolveVoice(context.Background(), client, "?", false) if err != nil { t.Fatalf("resolveVoice error: %v", err) } @@ -367,7 +451,7 @@ func TestResolveVoiceByName(t *testing.T) { defer srv.Close() client := elevenlabs.NewClient("key", srv.URL) - id, err := resolveVoice(context.Background(), client, "roger") + id, err := resolveVoice(context.Background(), client, "roger", false) if err != nil { t.Fatalf("resolveVoice error: %v", err) }