Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fixed tts_server.py on macOS #418

Merged
merged 1 commit into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 70 additions & 39 deletions gptme/tools/tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
else:
console.log("TTS disabled: server not available")
except (ImportError, OSError):
# will happen if tts extras not installed
# sounddevice may throw OSError("PortAudio library not found")
_available = False
# fmt: on
Expand Down Expand Up @@ -203,6 +204,67 @@ def clean_for_speech(content: str) -> str:
return content.strip()


def get_output_device():
"""Get the best available output device and its sample rate.

Returns:
tuple: (device_index, sample_rate)

Raises:
RuntimeError: If no suitable output device is found
"""
devices = sd.query_devices()
log.debug("Available audio devices:")
for i, dev in enumerate(devices):
log.debug(
f" [{i}] {dev['name']} (in: {dev['max_input_channels']}, "
f"out: {dev['max_output_channels']}, hostapi: {dev['hostapi']})"
)

# First try: use system default output device
try:
default_output = sd.default.device[1]
if default_output is not None:
device_info = sd.query_devices(default_output)
if device_info["max_output_channels"] > 0:
log.debug(f"Using system default output device: {device_info['name']}")
return default_output, int(device_info["default_samplerate"])
except Exception as e:
log.debug(f"Could not use default device: {e}")

# Second try: prefer CoreAudio devices
output_device = next(
(
i
for i, d in enumerate(devices)
if d["max_output_channels"] > 0 and d["hostapi"] == 2
),
None,
)

# Third try: any device with output channels
if output_device is None:
output_device = next(
(i for i, d in enumerate(devices) if d["max_output_channels"] > 0),
None,
)

if output_device is None:
raise RuntimeError(
"No suitable audio output device found. "
"Available devices:\n"
+ "\n".join(f" {i}: {d['name']}" for i, d in enumerate(devices))
)

device_info = sd.query_devices(output_device)
device_sr = int(device_info["default_samplerate"])

log.debug(f"Selected output device: {output_device} ({device_info['name']})")
log.debug(f"Sample rate: {device_sr}")

return output_device, device_sr


def audio_player_thread():
"""Background thread for playing audio."""
log.debug("Audio player thread started")
Expand All @@ -221,22 +283,13 @@ def audio_player_thread():
f"Playing audio: shape={data.shape}, sr={sample_rate}, vol={current_volume}"
)

# Play audio using explicit device index
devices = sd.query_devices()
output_device = next(
(
i
for i, d in enumerate(devices)
if d["max_output_channels"] > 0 and d["hostapi"] == 2
),
None,
)
if output_device is None:
log.error("No suitable output device found")
# Get output device
try:
output_device, _ = get_output_device()
log.debug(f"Playing on device: {output_device}")
except RuntimeError as e:
log.error(str(e))
continue

device_info = sd.query_devices(output_device)
log.debug(f"Playing on device: {output_device} ({device_info['name']})")
sd.play(data, sample_rate, device=output_device)
sd.wait() # Wait until audio is finished playing
log.debug("Finished playing audio chunk")
Expand Down Expand Up @@ -300,30 +353,8 @@ def speak(text, block=False, interrupt=True, clean=True):
chunks = [c.replace("gptme", "gpt-me") for c in chunks] # Fix pronunciation

try:
# Find the current output device
devices = sd.query_devices()
output_device = next(
(
i
for i, d in enumerate(devices)
if d["max_output_channels"] > 0 and d["hostapi"] == 2
),
None,
)
if output_device is None:
raise RuntimeError("No suitable output device found")

device_info = sd.query_devices(output_device)
device_sr = int(device_info["default_samplerate"])

log.debug("Available audio devices:")
for i, dev in enumerate(devices):
log.debug(
f" [{i}] {dev['name']} (in: {dev['max_input_channels']}, out: {dev['max_output_channels']}, hostapi: {dev['hostapi']})"
)

log.debug(f"Selected output device: {output_device} ({device_info['name']})")
log.debug(f"Sample rate: {device_sr}")
# Get output device and sample rate
output_device, device_sr = get_output_device()

# Ensure playback thread is running
ensure_playback_thread()
Expand Down
15 changes: 9 additions & 6 deletions scripts/tts_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import glob
import io
import logging
import subprocess
import shutil
import sys
from pathlib import Path

Expand All @@ -35,13 +35,19 @@
import uvicorn
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from phonemizer.backend.espeak.wrapper import EspeakWrapper

script_dir = Path(__file__).parent

# Add Kokoro-82M to Python path
kokoro_path = (script_dir / "Kokoro-82M").absolute()
sys.path.insert(0, str(kokoro_path))

# on macOS, use workaround for espeak detection
if sys.platform == "darwin":
_ESPEAK_LIBRARY = "/opt/homebrew/Cellar/espeak/1.48.04_1/lib/libespeak.1.1.48.dylib"
Copy link
Owner Author

@ErikBjare ErikBjare Jan 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might need to search for this path more intelligently in the future as new espeak versions are released

EspeakWrapper.set_library(_ESPEAK_LIBRARY)

from kokoro import generate # fmt: skip
from models import build_model # fmt: skip

Expand Down Expand Up @@ -75,12 +81,9 @@ def load_voice(voice_name: str):


def _check_espeak():
try:
# check that espeak-ng is installed
subprocess.run(["espeak-ng", "--version"], capture_output=True, check=True)
except (subprocess.CalledProcessError, FileNotFoundError):
if not any([shutil.which("espeak"), shutil.which("espeak-ng")]):
raise RuntimeError(
"Failed to run `espeak-ng`. Try to install it using 'sudo apt-get install espeak-ng' or equivalent"
"Failed to find `espeak` or `espeak-ng`. Try to install it using 'sudo apt-get install espeak-ng' or equivalent"
) from None


Expand Down
Loading