Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Changelog

## 0.2.2 - Unreleased
### Fixed
- Voice ID resolution respects `--voice-id` and avoids misclassifying long names; `--rate` now overrides `--speed` validation. (#7, thanks @joelbdavies)

## 0.2.1 - 2026-01-01
### Fixed
Expand Down
83 changes: 64 additions & 19 deletions cmd/speak.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,26 +62,24 @@ func init() {
return ensureAPIKey()
},
RunE: func(cmd *cobra.Command, args []string) error {
if opts.speed <= 0.5 || opts.speed >= 2.0 {
return errors.New("speed must be between 0.5 and 2.0 (e.g. 1.1 for 10% faster)")
}
if opts.rateWPM > 0 {
// Map macOS `say` rate (words per minute) to ElevenLabs speed multiplier.
opts.speed = float64(opts.rateWPM) / float64(defaultWPM)
if opts.speed <= 0.5 || opts.speed >= 2.0 {
return fmt.Errorf("rate %d wpm maps to speed %.2f, which is outside the allowed 0.5–2.0 range", opts.rateWPM, opts.speed)
}
if err := applyRateAndSpeed(&opts); err != nil {
return err
}

if opts.voiceID == "" {
opts.voiceID = os.Getenv("ELEVENLABS_VOICE_ID")
}
if opts.voiceID == "" {
opts.voiceID = os.Getenv("SAG_VOICE_ID")
forceVoiceID := cmd.Flags().Changed("voice-id")
voiceInput := opts.voiceID
if voiceInput == "" {
if env := os.Getenv("ELEVENLABS_VOICE_ID"); env != "" {
voiceInput = env
forceVoiceID = true
} else if env := os.Getenv("SAG_VOICE_ID"); env != "" {
voiceInput = env
forceVoiceID = true
}
}
client := elevenlabs.NewClient(cfg.APIKey, cfg.BaseURL)

voiceID, err := resolveVoice(cmd.Context(), client, opts.voiceID)
voiceID, err := resolveVoice(cmd.Context(), client, voiceInput, forceVoiceID)
if err != nil {
return err
}
Expand Down Expand Up @@ -139,7 +137,7 @@ func init() {
}

cmd.Flags().StringVar(&opts.voiceID, "voice-id", "", "Voice ID to use (ELEVENLABS_VOICE_ID)")
cmd.Flags().StringVarP(&opts.voiceID, "voice", "v", opts.voiceID, "Alias for --voice-id; accepts name or ID; use '?' to list voices")
cmd.Flags().StringVarP(&opts.voiceID, "voice", "v", "", "Alias for --voice-id; accepts name or ID; use '?' to list voices")
cmd.Flags().StringVar(&opts.modelID, "model-id", opts.modelID, "Model ID (default: eleven_v3). Common: eleven_multilingual_v2 (stable), eleven_flash_v2_5 (fast/cheap), eleven_turbo_v2_5 (balanced).")
cmd.Flags().StringVarP(&opts.outputPath, "output", "o", "", "Write audio to file (disables playback unless --play is also set)")
cmd.Flags().StringVar(&opts.outputFmt, "format", opts.outputFmt, "Output format (e.g. mp3_44100_128)")
Expand Down Expand Up @@ -172,6 +170,21 @@ func init() {
rootCmd.AddCommand(cmd)
}

func applyRateAndSpeed(opts *speakOptions) error {
if opts.rateWPM > 0 {
// Map macOS `say` rate (words per minute) to ElevenLabs speed multiplier.
opts.speed = float64(opts.rateWPM) / float64(defaultWPM)
if opts.speed <= 0.5 || opts.speed >= 2.0 {
return fmt.Errorf("rate %d wpm maps to speed %.2f, which is outside the allowed 0.5–2.0 range", opts.rateWPM, opts.speed)
}
return nil
}
if opts.speed <= 0.5 || opts.speed >= 2.0 {
return errors.New("speed must be between 0.5 and 2.0 (e.g. 1.1 for 10% faster)")
}
return nil
}

func buildTTSRequest(cmd *cobra.Command, opts speakOptions, text string) (elevenlabs.TTSRequest, error) {
flags := cmd.Flags()

Expand Down Expand Up @@ -414,7 +427,7 @@ func convertAndPlay(ctx context.Context, client *elevenlabs.Client, opts speakOp
return n, nil
}

func resolveVoice(ctx context.Context, client *elevenlabs.Client, voiceInput string) (string, error) {
func resolveVoice(ctx context.Context, client *elevenlabs.Client, voiceInput string, forceID bool) (string, error) {
voiceInput = strings.TrimSpace(voiceInput)
if voiceInput == "" {
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
Expand Down Expand Up @@ -451,8 +464,27 @@ func resolveVoice(ctx context.Context, client *elevenlabs.Client, voiceInput str
return "", nil
}

// If input looks like an ID (UUID-like), use directly.
if len(voiceInput) >= 15 && strings.ContainsAny(voiceInput, "0123456789") {
if forceID {
return voiceInput, nil
}

if looksLikeVoiceID(voiceInput) {
if containsDigit(voiceInput) {
return voiceInput, nil
}
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
voices, err := client.ListVoices(ctx, voiceInput)
if err != nil {
return "", err
}
voiceInputLower := strings.ToLower(voiceInput)
for _, v := range voices {
if strings.ToLower(v.Name) == voiceInputLower {
fmt.Fprintf(os.Stderr, "using voice %s (%s)\n", v.Name, v.VoiceID)
return v.VoiceID, nil
}
}
return voiceInput, nil
}

Expand All @@ -477,6 +509,19 @@ func resolveVoice(ctx context.Context, client *elevenlabs.Client, voiceInput str
return "", fmt.Errorf("voice %q not found; try 'sag voices' or -v '?'", voiceInput)
}

func looksLikeVoiceID(voiceInput string) bool {
return len(voiceInput) >= 15 && !strings.ContainsRune(voiceInput, ' ')
}

func containsDigit(s string) bool {
for _, r := range s {
if r >= '0' && r <= '9' {
return true
}
}
return false
}

func inferFormatFromExt(path string) string {
ext := strings.ToLower(filepath.Ext(path))
switch ext {
Expand Down
98 changes: 91 additions & 7 deletions cmd/speak_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cmd
import (
"context"
"io"
"math"
"net/http"
"net/http/httptest"
"os"
Expand Down Expand Up @@ -116,6 +117,24 @@ func TestResolveTextEmptyFile(t *testing.T) {
}
}

func TestApplyRateOverridesInvalidSpeed(t *testing.T) {
opts := &speakOptions{speed: 0.3, rateWPM: 200}
if err := applyRateAndSpeed(opts); err != nil {
t.Fatalf("applyRateAndSpeed error: %v", err)
}
want := float64(200) / float64(defaultWPM)
if math.Abs(opts.speed-want) > 1e-9 {
t.Fatalf("expected speed %.2f, got %.2f", want, opts.speed)
}
}

func TestApplyRateAndSpeedInvalidSpeed(t *testing.T) {
opts := &speakOptions{speed: 0.3}
if err := applyRateAndSpeed(opts); err == nil {
t.Fatalf("expected speed validation error")
}
}

func TestResolveVoiceDefaultsToFirst(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
if _, err := w.Write([]byte(`{"voices":[{"voice_id":"id1","name":"Alpha","category":"premade"},{"voice_id":"id2","name":"Beta","category":"premade"}]}`)); err != nil {
Expand All @@ -125,7 +144,7 @@ func TestResolveVoiceDefaultsToFirst(t *testing.T) {
defer srv.Close()

client := elevenlabs.NewClient("key", srv.URL)
id, err := resolveVoice(context.Background(), client, "")
id, err := resolveVoice(context.Background(), client, "", false)
if err != nil {
t.Fatalf("resolveVoice error: %v", err)
}
Expand All @@ -134,15 +153,15 @@ func TestResolveVoiceDefaultsToFirst(t *testing.T) {
}
}

func TestResolveVoicePassThroughID(t *testing.T) {
// Should short-circuit without hitting the server when input looks like an ID.
func TestResolveVoicePassThroughIDWithDigits(t *testing.T) {
// Should short-circuit without hitting the server when input looks like an ID with digits.
srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) {
t.Fatalf("server should not be called for ID pass-through")
}))
defer srv.Close()

client := elevenlabs.NewClient("key", srv.URL)
id, err := resolveVoice(context.Background(), client, "abc1234567890123")
id, err := resolveVoice(context.Background(), client, "abc1234567890123", false)
if err != nil {
t.Fatalf("resolveVoice error: %v", err)
}
Expand All @@ -151,6 +170,71 @@ func TestResolveVoicePassThroughID(t *testing.T) {
}
}

func TestResolveVoiceForceIDPassThrough(t *testing.T) {
// Should short-circuit without hitting the server when --voice-id is set.
srv := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) {
t.Fatalf("server should not be called for forced ID pass-through")
}))
defer srv.Close()

client := elevenlabs.NewClient("key", srv.URL)
input := "OnlyLettersVoiceID"
id, err := resolveVoice(context.Background(), client, input, true)
if err != nil {
t.Fatalf("resolveVoice error: %v", err)
}
if id != input {
t.Fatalf("expected ID to pass through, got %q", id)
}
}

func TestResolveVoiceLongNameExactMatch(t *testing.T) {
var called bool
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
called = true
if _, err := w.Write([]byte(`{"voices":[{"voice_id":"id-long","name":"LongVoiceNameAlpha","category":"premade"}]}`)); err != nil {
t.Fatalf("write response: %v", err)
}
}))
defer srv.Close()

client := elevenlabs.NewClient("key", srv.URL)
id, err := resolveVoice(context.Background(), client, "LongVoiceNameAlpha", false)
if err != nil {
t.Fatalf("resolveVoice error: %v", err)
}
if !called {
t.Fatalf("expected voice lookup for long name")
}
if id != "id-long" {
t.Fatalf("expected id-long, got %q", id)
}
}

func TestResolveVoiceLooksLikeIDNoMatchPassesThrough(t *testing.T) {
var called bool
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
called = true
if _, err := w.Write([]byte(`{"voices":[{"voice_id":"id1","name":"Other","category":"premade"}]}`)); err != nil {
t.Fatalf("write response: %v", err)
}
}))
defer srv.Close()

client := elevenlabs.NewClient("key", srv.URL)
input := "LongVoiceNameAlpha"
id, err := resolveVoice(context.Background(), client, input, false)
if err != nil {
t.Fatalf("resolveVoice error: %v", err)
}
if !called {
t.Fatalf("expected voice lookup for ambiguous input")
}
if id != input {
t.Fatalf("expected %q to pass through, got %q", input, id)
}
}

func TestResolveVoiceClosestMatch(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
if _, err := w.Write([]byte(`{"voices":[{"voice_id":"id1","name":"Near","category":"premade"}]}`)); err != nil {
Expand All @@ -163,7 +247,7 @@ func TestResolveVoiceClosestMatch(t *testing.T) {
defer restore()

client := elevenlabs.NewClient("key", srv.URL)
id, err := resolveVoice(context.Background(), client, "nothing-match")
id, err := resolveVoice(context.Background(), client, "nothing-match", false)
if err != nil {
t.Fatalf("resolveVoice error: %v", err)
}
Expand All @@ -187,7 +271,7 @@ func TestResolveVoiceListOutputsTable(t *testing.T) {
defer restore()

client := elevenlabs.NewClient("key", srv.URL)
id, err := resolveVoice(context.Background(), client, "?")
id, err := resolveVoice(context.Background(), client, "?", false)
if err != nil {
t.Fatalf("resolveVoice error: %v", err)
}
Expand Down Expand Up @@ -367,7 +451,7 @@ func TestResolveVoiceByName(t *testing.T) {
defer srv.Close()

client := elevenlabs.NewClient("key", srv.URL)
id, err := resolveVoice(context.Background(), client, "roger")
id, err := resolveVoice(context.Background(), client, "roger", false)
if err != nil {
t.Fatalf("resolveVoice error: %v", err)
}
Expand Down
Loading