diff --git a/docs/get-started/quickstart.mdx b/docs/get-started/quickstart.mdx
index f88bffe0..235d1ec6 100644
--- a/docs/get-started/quickstart.mdx
+++ b/docs/get-started/quickstart.mdx
@@ -4,8 +4,14 @@ pagination_next: null
description: Take your first steps with the Speechmatics API.
---
import { LinkCard } from "@site/src/theme/LinkCard";
+import DevIcon from '@site/src/components/devicon';
import { Flex, Grid } from "@radix-ui/themes";
-import { ChevronsRightIcon, FileAudio, BotMessageSquare, BookOpen, Braces } from "lucide-react";
+import { ChevronsRightIcon, FileAudio, BotMessageSquare, BookOpen, Braces, Speech, GraduationCap } from "lucide-react";
+import Head from '@docusaurus/Head';
+
+
+
+
# Quickstart
@@ -30,9 +36,15 @@ The easiest way to try our products is via the [web portal](https://portal.speec
/>
}
- href="/voice-agents-flow/"
+ href="/voice-agents/overview"
+ />
+ }
+ href="/text-to-speech/quickstart"
/>
@@ -51,4 +63,16 @@ The easiest way to try our products is via the [web portal](https://portal.speec
icon={}
href="https://github.com/speechmatics/speechmatics-js-sdk"
/>
+ }
+ href="https://github.com/speechmatics/speechmatics-python-sdk"
+ />
+ }
+ href="https://github.com/speechmatics/speechmatics-academy"
+ />
\ No newline at end of file
diff --git a/docs/get-started/sidebar.ts b/docs/get-started/sidebar.ts
index 55816967..817736f4 100644
--- a/docs/get-started/sidebar.ts
+++ b/docs/get-started/sidebar.ts
@@ -12,6 +12,7 @@ export default {
{
type: "doc",
id: "get-started/quickstart",
+ label: "Quickstart",
},
{
type: "doc",
diff --git a/docs/index.mdx b/docs/index.mdx
index b32fdfa6..8c79d209 100644
--- a/docs/index.mdx
+++ b/docs/index.mdx
@@ -1,14 +1,14 @@
---
sidebar_position: 1
title: Welcome
-description: Enterprise-grade APIs for speech-to-text and voice AI agents.
+description: Enterprise-grade APIs for speech-to-text, text-to-speech, and voice AI agents.
hide_table_of_contents: true
pagination_prev: null
pagination_next: null
---
import { LinkCard } from "@site/src/theme/LinkCard";
-import { ChevronsRightIcon, FileAudio, BotMessageSquare, } from "lucide-react";
+import { ChevronsRightIcon, FileAudio, BotMessageSquare, Speech } from "lucide-react";
import { Box, Flex, Card, Grid, Inset } from "@radix-ui/themes";
# Welcome to Speechmatics
@@ -31,7 +31,8 @@ With Speechmatics, you can:
- Receive immediate and continuous text transcriptions from live audio streams or calls (real-time transcription)
- Generate complete transcripts from recorded audio files (batch transcription)
-- Build voice AI agents that naturally converse with your users (Flow service)
+- Build and power your applications with responsive, real-time, voice AI using our Voice SDK
+- Transform your text into speech using our Text-to-Speech API
- Choose flexible deployment options: use our managed SaaS platform or host Speechmatics APIs within your infrastructure (on-prem)
## Developer quickstart
@@ -54,9 +55,16 @@ With Speechmatics, you can:
}
title="Build a voice agent"
- description="Use our Flow service to build conversational AI agents with ease"
+ description="Use our Voice SDK to build voice agents with ease"
direction="column"
- href="/voice-agents-flow"
+ href="/voice-agents/overview"
+ />
+ }
+ title="Generate speech from text"
+ description="Use our TTS API to generate speech from text"
+ direction="column"
+ href="/text-to-speech/quickstart"
/>
@@ -96,8 +104,8 @@ With Speechmatics, you can:
+
+
-Discover which integrations and SDKs to use to add Speechmatics speech and voice agents to your stack.
+
+# Overview
Use this page to quickly choose how you connect to Speechmatics: through popular voice agent platforms, or directly from your code using our SDKs. Start with the option that best matches your current stack, then follow its quickstart.
@@ -14,92 +21,77 @@ Use this page to quickly choose how you connect to Speechmatics: through popular
Choose an integration to build accurate, low-latency voice agents rapidly with the Speechmatics API available in these frameworks.
-
+
}
+ description="Turnkey voice agent platform. Deploy fast with no code. Rapid prototyping. Best for: non-technical builders."
+ icon={
}
href="/integrations-and-sdks/vapi"
/>
}
+ description="Open-source framework for building agents with LiveKit’s WebRTC infra. Simple setup. Best for: engineers."
+ icon={
}
href="/integrations-and-sdks/livekit"
/>
}
- href="/integrations-and-sdks/pipecat"
- />
- }
- href="/integrations-and-sdks/vapi"
- />
- }
- href="/integrations-and-sdks/livekit"
- />
- }
+ description="Open-source framework with full control of the voice pipeline in code. Complex agents. Best for: power builders."
+ icon={
}
href="/integrations-and-sdks/pipecat"
/>
+
+
+
## SDKs
Use an SDK if you want to call Speechmatics directly from your own services or applications.
### Speech to text
-
+
}
+ title="Voice SDK (Python)"
+ description="Build responsive voice agents"
+ icon={}
href="https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/voice"
/>
- }
+ }
href="https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/rt"
/>
- }
- href="https://github.com/speechmatics/speechmatics-javascript-sdk/tree/main/sdk/rt"
- />
- }
- href="https://github.com/speechmatics/speechmatics-dotnet-community-sdk"
- />
-}
+ }
href="https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/batch"
/>
}
+ title="Realtime (JavaScript)"
+ description="Stream audio and receive instant transcripts"
+ icon={}
+ href="https://github.com/speechmatics/speechmatics-javascript-sdk/tree/main/sdk/rt"
+ />
+ }
href="https://github.com/speechmatics/speechmatics-javascript-sdk/tree/main/sdk/batch"
/>
}
+ title="Realtime (.NET)"
+ description="Stream audio and receive instant transcripts"
+ icon={}
+ href="https://github.com/speechmatics/speechmatics-dotnet-community-sdk"
+ />
+ }
href="https://github.com/speechmatics/speechmatics-rust-sdk"
/>
@@ -107,22 +99,11 @@ Use an SDK if you want to call Speechmatics directly from your own services or a
### Text to speech
-
+
}
+ title="TTS (Python)"
+ description="Convert text to speech"
+ icon={}
href="https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/tts"
/>
-
-### Voice SDK
-
-
-}
- href="https://github.com/speechmatics/speechmatics-python-sdk/blob/main/sdk/voice/README.md"
- />
-
\ No newline at end of file
diff --git a/docs/integrations-and-sdks/sdks.mdx b/docs/integrations-and-sdks/sdks.mdx
index 0d15cd66..0e18110e 100644
--- a/docs/integrations-and-sdks/sdks.mdx
+++ b/docs/integrations-and-sdks/sdks.mdx
@@ -1,15 +1,18 @@
---
-title: SDKs
+id: sdks
description: Learn how to use the Speechmatics SDKs
---
import { LinkCard } from "@site/src/theme/LinkCard";
+import DevIcon from '@site/src/components/devicon';
import { Flex, Grid } from "@radix-ui/themes";
-import {
- ChevronsRightIcon,
- FileAudio,
- BotMessageSquare,
-} from "lucide-react";
+import Head from '@docusaurus/Head';
+
+
+
+
+
+# SDKs
Find the right Speechmatics SDK for your language and start building fast.
@@ -17,78 +20,76 @@ Each SDK card provides simple installation steps and practical examples to get y
## Speech to text SDKs
-
+
+
}
+ direction="column"
+ title="Voice (Python)"
+ description="Add voice features to your agentic workflows"
+ icon={}
quickstart="/voice-sdk"
href="https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/voice"
/>
- }
+ }
quickstart="/rt-stt-sdk"
href="https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/rt"
/>
}
- quickstart="/js-rt-stt-sdk"
- href="https://github.com/speechmatics/speechmatics-js-sdk/tree/main/packages/real-time-client"
- />
- }
- quickstart="/dotnet-rt-stt-sdk"
- href="https://github.com/speechmatics/speechmatics-dotnet/blob/main/README.md"
- />
- }
+ direction="column"
+ title="Batch (Python)"
+ description="Input audio files and output transcription"
+ icon={}
quickstart="/python-batch-stt-sdk"
href="https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/batch"
/>
}
+ direction="column"
+ title="Realtime (JavaScript)"
+ description="Stream audio and receive instant transcripts"
+ icon={}
+ quickstart="/js-rt-stt-sdk"
+ href="https://github.com/speechmatics/speechmatics-js-sdk/tree/main/packages/real-time-client"
+ />
+ }
quickstart="/js-batch-stt-sdk"
href="https://github.com/speechmatics/speechmatics-js-sdk/tree/main/packages/batch-client"
+ />
+ }
+ quickstart="/dotnet-rt-stt-sdk"
+ href="https://github.com/speechmatics/speechmatics-dotnet/blob/main/README.md"
/>
}
+ direction="column"
+ title="Realtime and Batch (Rust)"
+ description="Input audio streams or files and receive transcription"
+ icon={}
quickstart="/rust-stt-sdk"
href="https://github.com/speechmatics/speechmatics-rs/blob/main/README.md"
/>
+
## Text to speech SDKs
-
+
}
+ direction="column"
+ title="TTS (Python)"
+ description="Convert text to speech"
+ icon={}
quickstart="/python-tts-sdk"
href="https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/tts"
/>
-
-## Voice SDKs
-
-
- }
- quickstart="/python-voice-sdk"
- href="https://github.com/speechmatics/speechmatics-python-sdk/blob/main/sdk/voice/README.md"
- />
-
\ No newline at end of file
diff --git a/docs/integrations-and-sdks/sidebar.ts b/docs/integrations-and-sdks/sidebar.ts
index 4b9dc1f4..337d99a5 100644
--- a/docs/integrations-and-sdks/sidebar.ts
+++ b/docs/integrations-and-sdks/sidebar.ts
@@ -6,7 +6,7 @@ export default {
items: [
{
type: "doc",
- id: "integrations-and-sdks/Overview",
+ id: "integrations-and-sdks/index",
label: "Overview",
},
{
diff --git a/docs/integrations-and-sdks/vapi.mdx b/docs/integrations-and-sdks/vapi.mdx
index 7b42ffea..3ba41864 100644
--- a/docs/integrations-and-sdks/vapi.mdx
+++ b/docs/integrations-and-sdks/vapi.mdx
@@ -25,7 +25,6 @@ Vapi is perfect for:
- **Speaker diarization**: Speechmatics is the only transcriber on Vapi that provides speaker diarization, which identifies and labels who said what in multi-speaker scenarios.
## Quickstart
-
### Requirements
- [VAPI Account](https://vapi.ai)
- [VAPI Private Key](https://dashboard.vapi.ai)
diff --git a/docs/speech-to-text/batch/input.mdx b/docs/speech-to-text/batch/input.mdx
index fbfa6f5f..ecc961fb 100644
--- a/docs/speech-to-text/batch/input.mdx
+++ b/docs/speech-to-text/batch/input.mdx
@@ -13,7 +13,6 @@ import batchSchema from "!openapi-schema-loader!@site/spec/batch.yaml";
:::info
This page documents audio inputs for transcription by **REST API** (a.k.a. Batch SaaS).
* For Realtime transcription, see the [Realtime Transcription input](/speech-to-text/realtime/input).
-* For Flow Voice AI, see the [Flow Voice AI supported formats and limits](/voice-agents-flow/supported-formats-and-limits).
:::
## Supported file types
diff --git a/docs/speech-to-text/languages.mdx b/docs/speech-to-text/languages.mdx
index ed217a0b..712c4781 100644
--- a/docs/speech-to-text/languages.mdx
+++ b/docs/speech-to-text/languages.mdx
@@ -27,7 +27,7 @@ keywords:
]
---
-# Languages & Models
+# Languages and Models
### Operating points
diff --git a/docs/speech-to-text/realtime/input.mdx b/docs/speech-to-text/realtime/input.mdx
index 06b2ce15..a93047ea 100644
--- a/docs/speech-to-text/realtime/input.mdx
+++ b/docs/speech-to-text/realtime/input.mdx
@@ -14,7 +14,6 @@ import realtimeSchema from "!asyncapi-schema-loader!@site/spec/realtime.yaml"
:::info
This page is about the **Real-time transcription API** (websocket).
* For information on Batch SaaS, see the [Batch SaaS input](/speech-to-text/batch/input).
-* For information on Flow Voice AI, see the [Flow Voice AI input](/voice-agents-flow/supported-formats-and-limits).
:::
## Supported input audio formats
diff --git a/docs/voice-agents/assets/basic-quickstart.py b/docs/voice-agents/assets/basic-quickstart.py
index 00544746..e649dbec 100644
--- a/docs/voice-agents/assets/basic-quickstart.py
+++ b/docs/voice-agents/assets/basic-quickstart.py
@@ -6,7 +6,7 @@
async def main():
# Create client with preset
client = VoiceAgentClient(
- api_key=os.getenv("SPEECHMATICS_API_KEY"),
+ api_key=os.getenv("YOUR_API_KEY"),
preset="scribe"
)
diff --git a/docs/voice-agents/assets/custom-config.py b/docs/voice-agents/assets/custom-config.py
index 87d60cdf..b55e8854 100644
--- a/docs/voice-agents/assets/custom-config.py
+++ b/docs/voice-agents/assets/custom-config.py
@@ -7,4 +7,4 @@
end_of_utterance_mode=EndOfUtteranceMode.ADAPTIVE,
)
-client = VoiceAgentClient(api_key=api_key, config=config)
\ No newline at end of file
+client = VoiceAgentClient(api_key=os.getenv("YOUR_API_KEY"), config=config)
diff --git a/docs/voice-agents/assets/presets.py b/docs/voice-agents/assets/presets.py
index f73a0316..a85b3028 100644
--- a/docs/voice-agents/assets/presets.py
+++ b/docs/voice-agents/assets/presets.py
@@ -1,19 +1,19 @@
# Presets provide optimized configurations for common use cases:
# External end of turn preset - endpointing handled by the client
-client = VoiceAgentClient(api_key=api_key, preset="external")
+client = VoiceAgentClient(api_key=os.getenv("YOUR_API_KEY"), preset="external")
# Scribe preset - for note-taking
-client = VoiceAgentClient(api_key=api_key, preset="scribe")
+client = VoiceAgentClient(api_key=os.getenv("YOUR_API_KEY"), preset="scribe")
# Low latency preset - for fast responses
-client = VoiceAgentClient(api_key=api_key, preset="low_latency")
+client = VoiceAgentClient(api_key=os.getenv("YOUR_API_KEY"), preset="low_latency")
# Conversation preset - for natural dialogue
-client = VoiceAgentClient(api_key=api_key, preset="conversation_adaptive")
+client = VoiceAgentClient(api_key=os.getenv("YOUR_API_KEY"), preset="conversation_adaptive")
# Advanced conversation with ML turn detection
-client = VoiceAgentClient(api_key=api_key, preset="conversation_smart_turn")
+client = VoiceAgentClient(api_key=os.getenv("YOUR_API_KEY"), preset="conversation_smart_turn")
# Captions preset - for live captioning
-client = VoiceAgentClient(api_key=api_key, preset="captions")
\ No newline at end of file
+client = VoiceAgentClient(api_key=os.getenv("YOUR_API_KEY"), preset="captions")
diff --git a/docs/voice-agents/features.mdx b/docs/voice-agents/features.mdx
index 3b432edd..8232cc91 100644
--- a/docs/voice-agents/features.mdx
+++ b/docs/voice-agents/features.mdx
@@ -1,42 +1,65 @@
---
-description: Learn about configuration parameters for the voice SDK
+description: Learn about configuration parameters for the Voice SDK
---
import CodeBlock from '@theme/CodeBlock';
-# Configuration Parameters
+# Features
-### Basic Parameters
-**language** (str, default: "en") Language code for transcription (e.g., "en", "es", "fr"). See supported languages.
+### Basic parameters
+`language` (str, default: "en")
+Language code for transcription (e.g., "en", "es", "fr").
+See [supported languages](/speech-to-text/languages).
-**operating_point** (OperatingPoint, default: ENHANCED) Balance accuracy vs latency. Options: STANDARD or ENHANCED.
+`operating_point` (OperatingPoint, default: ENHANCED)
+Balance accuracy vs latency.
+Options: STANDARD or ENHANCED.
-**domain** (str, default: None) Domain-specific model (e.g., "finance", "medical"). See supported languages and domains.
+`domain` (str, default: None)
+Domain-specific model (e.g., "finance", "medical").
+See [supported languages and domains](/speech-to-text/languages).
-**output_locale** (str, default: None) Output locale for formatting (e.g., "en-GB", "en-US"). See supported languages and locales.
+`output_locale` (str, default: None)
+Output locale for formatting (e.g., "en-GB", "en-US").
+See [supported languages and locales](/speech-to-text/languages).
-**enable_diarization** (bool, default: False) Enable speaker diarization to identify and label different speakers.
+`enable_diarization` (bool, default: False)
+Enable speaker diarization to identify and label different speakers.
-### Turn Detection Parameters
-**end_of_utterance_mode** (EndOfUtteranceMode, default: FIXED) Controls how turn endings are detected:
+### Turn detection
+`end_of_utterance_mode` (EndOfUtteranceMode, default: FIXED)
+Controls how turn endings are detected:
-FIXED - Uses fixed silence threshold. Fast but may split slow speech.
-ADAPTIVE - Adjusts delay based on speech rate, pauses, and disfluencies. Best for natural conversation.
-SMART_TURN - Uses ML model to detect acoustic turn-taking cues. Requires [smart] extras.
-EXTERNAL - Manual control via client.finalize(). For custom turn logic.
-end_of_utterance_silence_trigger (float, default: 0.2) Silence duration in seconds to trigger turn end.
+- `FIXED`: Uses fixed silence threshold.
+Fast but may split slow speech.
+- `ADAPTIVE`: Adjusts delay based on speech rate, pauses, and disfluencies.
+Best for natural conversation.
+- `SMART_TURN`: Uses ML model to detect acoustic turn-taking cues.
+Requires [smart] extras.
+- `EXTERNAL`: Manual control via client.finalize().
+For custom turn logic.
-end_of_utterance_max_delay (float, default: 10.0) Maximum delay before forcing turn end.
+`end_of_utterance_silence_trigger` (float, default: 0.2)
+Silence duration in seconds to trigger turn end.
-max_delay (float, default: 0.7) Maximum transcription delay for word emission.
+`end_of_utterance_max_delay` (float, default: 10.0)
+Maximum delay before forcing turn end.
-### Speaker Configuration
-**speaker_sensitivity** (float, default: 0.5) Diarization sensitivity between 0.0 and 1.0. Higher values detect more speakers.
+`max_delay` (float, default: 0.7)
+Maximum transcription delay for word emission.
-**max_speakers** (int, default: None) Limit maximum number of speakers to detect.
+### Speaker configuration
+`speaker_sensitivity` (float, default: 0.5)
+Diarization sensitivity between 0.0 and 1.0.
+Higher values detect more speakers.
-**prefer_current_speaker** (bool, default: False) Give extra weight to current speaker for word grouping.
+`max_speakers` (int, default: None)
+Limit maximum number of speakers to detect.
-**speaker_config** (SpeakerFocusConfig, default: SpeakerFocusConfig()) Configure speaker focus/ignore rules.
+`prefer_current_speaker` (bool, default: False)
+Give extra weight to current speaker for word grouping.
+
+`speaker_config` (SpeakerFocusConfig, default: SpeakerFocusConfig())
+Configure speaker focus/ignore rules.
{
@@ -61,7 +84,8 @@ config = VoiceAgentConfig(
)`
}
-**known_speakers** (list[SpeakerIdentifier], default: []) Pre-enrolled speaker identifiers for speaker identification.
+`known_speakers` (list[SpeakerIdentifier], default: [])
+Pre-enrolled speaker identifiers for speaker identification.
{
@@ -76,8 +100,10 @@ config = VoiceAgentConfig(
)`
}
-### Language & Vocabulary
-**additional_vocab** (list[AdditionalVocabEntry], default: []) Custom vocabulary for domain-specific terms.
+### Language and vocabulary
+`additional_vocab` (list[AdditionalVocabEntry], default: [])
+
+Custom vocabulary for domain-specific terms.
{
@@ -95,25 +121,33 @@ config = VoiceAgentConfig(
)`
}
-**punctuation_overrides** (dict, default: None) Custom punctuation rules.
+`punctuation_overrides` (dict, default: None)
+Custom punctuation rules.
-### Audio Parameters
-**sample_rate** (int, default: 16000) Audio sample rate in Hz.
+### Audio parameters
+`sample_rate` (int, default: 16000)
+Audio sample rate in Hz.
-**audio_encoding** (AudioEncoding, default: PCM_S16LE) Audio encoding format.
+`audio_encoding` (AudioEncoding, default: PCM_S16LE)
+Audio encoding format.
-### Advanced Parameters
-**transcription_update_preset** (TranscriptionUpdatePreset, default: COMPLETE) Controls when to emit updates: COMPLETE, COMPLETE_PLUS_TIMING, WORDS, WORDS_PLUS_TIMING, or TIMING.
+### Advanced parameters
+`transcription_update_preset` (TranscriptionUpdatePreset, default: COMPLETE)
+Controls when to emit updates: COMPLETE, COMPLETE_PLUS_TIMING, WORDS, WORDS_PLUS_TIMING, or TIMING.
-**speech_segment_config** (SpeechSegmentConfig, default: SpeechSegmentConfig()) Fine-tune segment generation and post-processing.
+`speech_segment_config` (SpeechSegmentConfig, default: SpeechSegmentConfig())
+Fine-tune segment generation and post-processing.
-**smart_turn_config** (SmartTurnConfig, default: None) Configure SMART_TURN behavior (buffer length, threshold).
+`smart_turn_config` (SmartTurnConfig, default: None)
+Configure SMART_TURN behavior (buffer length, threshold).
-**include_results** (bool, default: False) Include word-level timing data in segments.
+`include_results` (bool, default: False)
+Include word-level timing data in segments.
-**include_partials** (bool, default: True) Emit partial segments. Set to False for final-only output.
+`include_partials` (bool, default: True)
+Emit partial segments. Set to False for final-only output.
-### Configuration with Overlays
+### Configuration with overlays.
Use presets as a starting point and customize with overlays:
@@ -138,7 +172,7 @@ config = VoiceAgentConfigPreset.SCRIBE(
}
-### Configuration Serialization
+### Configuration serialization
Export and import configurations as JSON:
@@ -157,4 +191,5 @@ config = VoiceAgentConfig.from_json('{"language": "en", "enable_diarization": tr
-For more information, see the [voice agent Python SDK](https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/voice) on github.
+For more information, see the [Voice SDK](https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/voice) on github.
+`
diff --git a/docs/voice-agents/overview.mdx b/docs/voice-agents/overview.mdx
new file mode 100644
index 00000000..4f7dced0
--- /dev/null
+++ b/docs/voice-agents/overview.mdx
@@ -0,0 +1,111 @@
+---
+description: Learn how to build voice-enabled applications with the Speechmatics Voice SDK
+---
+import Admonition from '@theme/Admonition';
+import CodeBlock from '@theme/CodeBlock';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+import pythonVoiceQuickstart from "./assets/basic-quickstart.py?raw"
+import pythonVoicePresets from "./assets/presets.py?raw"
+import pythonVoiceCustomConfig from "./assets/custom-config.py?raw"
+
+# Voice agents overview
+The Voice SDK builds on our Realtime API to provide features optimized for conversational AI:
+
+- **Intelligent segmentation**: groups words into meaningful speech segments per speaker.
+- **Turn detection**: automatically detects when speakers finish talking.
+- **Speaker management**: focus on or ignore specific speakers in multi-speaker scenarios.
+- **Preset configurations**: offers ready-to-use settings for conversations, note-taking, and captions.
+- **Simplified event handling**: delivers clean, structured segments instead of raw word-level events.
+
+### Voice SDK vs Realtime SDK
+
+Use the Voice SDK when:
+
+- Building conversational AI or voice agents
+- You need automatic turn detection
+- You want speaker-focused transcription
+- You need ready-to-use presets for common scenarios
+
+Use the Realtime SDK when:
+
+- You need the raw stream of word-by-word transcription data
+- Building custom segmentation logic
+- You want fine-grained control over every event
+- Processing audio files or custom workflows
+
+## Getting started
+
+### 1. Create an API key
+
+[Create an API key in the portal](https://portal.speechmatics.com/settings/api-keys) to access the Voice SDK. Store your key securely as a managed secret.
+
+### 2. Install dependencies
+
+```bash
+# Standard installation
+pip install speechmatics-voice
+
+# With SMART_TURN (ML-based turn detection)
+pip install speechmatics-voice[smart]
+```
+
+### 3. Configure
+
+Replace `YOUR_API_KEY` with your actual API key from the portal:
+
+
+
+
+ {pythonVoiceQuickstart}
+
+
+
+
+ {pythonVoicePresets}
+
+
+
+
+ {pythonVoiceCustomConfig}
+
+
+
+
+## FAQ
+
+### Implementation and deployment
+
+
+Can I deploy this in my own environment?
+
+Yes! The Voice SDK can be consumed via our managed service or deployed in your own environment. To learn more about on-premises deployment options, [speak to sales](https://www.speechmatics.com/speak-to-sales).
+
+
+### Support
+
+
+Where can I provide feedback or get help?
+
+You can submit feedback, bug reports, or feature requests through the Speechmatics [GitHub discussions](https://github.com/orgs/speechmatics/discussions).
+
+
+## Next steps
+
+For more information, see the [Voice SDK](https://github.com/speechmatics/speechmatics-python-sdk/tree/main/sdk/voice) on github.
+
+To learn more, check out [the Speechmatics Academy](https://github.com/speechmatics/speechmatics-academy).
+
+### Building something amazing
+
+We'd love to hear about your project and help you succeed.
+
+**Get in touch with us:**
+- Share your feedback and feature requests
+- Ask questions about implementation
+- Discuss enterprise pricing and custom voices
+- Report any issues or bugs you encounter
+
+[Contact our team](https://support.speechmatics.com) or [join our developer community](https://www.reddit.com/r/Speechmatics) to connect with other builders in voice AI.
+
diff --git a/docs/voice-agents/sidebar.ts b/docs/voice-agents/sidebar.ts
index 6eedfcaf..412e29bc 100644
--- a/docs/voice-agents/sidebar.ts
+++ b/docs/voice-agents/sidebar.ts
@@ -6,8 +6,8 @@ export default {
items: [
{
type: "doc",
- id: "voice-agents/quickstart",
- label: "Quickstart",
+ id: "voice-agents/overview",
+ label: "Overview",
},
{
type: "doc",
diff --git a/docusaurus.config.ts b/docusaurus.config.ts
index 54ea012b..d08de409 100644
--- a/docusaurus.config.ts
+++ b/docusaurus.config.ts
@@ -74,6 +74,7 @@ const config: Config = {
remarkPlugins: [math],
rehypePlugins: [katex],
sidebarItemsGenerator,
+ exclude: ["**/voice-agents-flow/**"],
},
theme: {
customCss: "./src/css/custom.css",
diff --git a/package-lock.json b/package-lock.json
index 64144524..4725e7ae 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -16,6 +16,7 @@
"@signalwire/docusaurus-plugin-llms-txt": "^1.2.1",
"asyncapi-schema-loader": "^0.0.1",
"clsx": "^2.0.0",
+ "devicon": "^2.17.0",
"docusaurus-plugin-openapi-docs": "^4.4.0",
"docusaurus-theme-openapi-docs": "^4.4.0",
"lucide-react": "^0.512.0",
@@ -13487,6 +13488,12 @@
"node": ">= 4.0.0"
}
},
+ "node_modules/devicon": {
+ "version": "2.17.0",
+ "resolved": "https://registry.npmjs.org/devicon/-/devicon-2.17.0.tgz",
+ "integrity": "sha512-2nKUdjobJlmRSaCHa50PGsVq0VDURnq9gVzQoJggsM/NKN0tLhC/Uq2zmy2pH36Q/1q3gvYwp/GjTgv/R0Ysbg==",
+ "license": "MIT"
+ },
"node_modules/devlop": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz",
diff --git a/package.json b/package.json
index dd339700..8fcfa5f6 100644
--- a/package.json
+++ b/package.json
@@ -29,6 +29,7 @@
"@signalwire/docusaurus-plugin-llms-txt": "^1.2.1",
"asyncapi-schema-loader": "^0.0.1",
"clsx": "^2.0.0",
+ "devicon": "^2.17.0",
"docusaurus-plugin-openapi-docs": "^4.4.0",
"docusaurus-theme-openapi-docs": "^4.4.0",
"lucide-react": "^0.512.0",
diff --git a/scripts/redirects/old-site-routes.json b/scripts/redirects/old-site-routes.json
index 742019b1..c5e5066b 100644
--- a/scripts/redirects/old-site-routes.json
+++ b/scripts/redirects/old-site-routes.json
@@ -26,14 +26,6 @@
"/features/realtime-latency",
"/features/word-tagging",
"/flow-api-ref",
- "/flow/application-inputs",
- "/flow/config",
- "/flow/function-calling",
- "/flow/introduction",
- "/flow/languages-supported",
- "/flow/livekit-webrtc",
- "/flow/nextjs-guide",
- "/flow/react-native-guide",
"/introduction",
"/introduction/additional-info",
"/introduction/authentication",
diff --git a/sidebars.ts b/sidebars.ts
index 9a81597c..88dcea82 100644
--- a/sidebars.ts
+++ b/sidebars.ts
@@ -3,7 +3,6 @@ import deploymentsSidebar from "./docs/deployments/sidebar";
import gettingStartedSidebar from "./docs/get-started/sidebar";
import speechToTextSidebar from "./docs/speech-to-text/sidebar";
import textToSpeechSidebar from "./docs/text-to-speech/sidebar";
-import voiceAgentsFlowSidebar from "./docs/voice-agents-flow/sidebar";
import integrationsAndSDKSidebar from "./docs/integrations-and-sdks/sidebar";
import voiceAgentsSidebar from "./docs/voice-agents/sidebar";
@@ -14,7 +13,6 @@ export default {
voiceAgentsSidebar,
textToSpeechSidebar,
integrationsAndSDKSidebar,
- voiceAgentsFlowSidebar,
deploymentsSidebar,
{
type: "category",
diff --git a/spec/flow-api.yaml b/spec/flow-api.yaml
index f8e6e671..49b48118 100644
--- a/spec/flow-api.yaml
+++ b/spec/flow-api.yaml
@@ -707,7 +707,7 @@ components:
type: string
# description: The id of the agent or persona to use during the conversation.
description: |
- Required in the the `StartConversation` message in the Flow API. Generated from the [Speechmatics Portal](https://portal.speechmatics.com/). This maps to the [language supported](/voice-agents-flow/supported-languages), agent's prompt, LLM, TTS voice, & custom dictionary. These can be customised by creating or modifying agents in the Portal.
+ Required in the the `StartConversation` message in the Flow API. Generated from the [Speechmatics Portal](https://portal.speechmatics.com/). This maps to the language supported, agent's prompt, LLM, TTS voice, & custom dictionary. These can be customised by creating or modifying agents in the Portal.
template_variables:
type: object
additionalProperties:
diff --git a/src/components/devicon.tsx b/src/components/devicon.tsx
new file mode 100644
index 00000000..bf2d19bf
--- /dev/null
+++ b/src/components/devicon.tsx
@@ -0,0 +1,16 @@
+import React from 'react';
+
+interface DevIconProps {
+ name: string;
+ size?: number;
+}
+
+const DevIcon: React.FC = ({ name, size = 24 }) => {
+ const style = { fontSize: `${size}px` };
+ return ;
+};
+
+export default DevIcon;
+
+
+
diff --git a/src/theme/LinkCard.tsx b/src/theme/LinkCard.tsx
index 464de198..26be8f0f 100644
--- a/src/theme/LinkCard.tsx
+++ b/src/theme/LinkCard.tsx
@@ -45,12 +45,10 @@ const LinkCardTitle = forwardRef(
},
ref: React.Ref,
) => {
- const isExternal = props.href?.startsWith("http");
return (
{children}
- {isExternal ? : null}
);
@@ -136,16 +134,22 @@ const LinkCardComposite = forwardRef(
ref: React.Ref,
) => {
const href = "href" in props ? props.href : undefined;
+ const isExternal = href?.startsWith("http");
return (
- {!icon ? null : {icon}}
-
-
- {title}
- {badgeText ? {badgeText} : null}
-
- {description}
-
+
+
+ {!icon ? : {icon}}
+ {isExternal ? : null}
+
+
+
+ {title}
+ {badgeText ? {badgeText} : null}
+
+ {description}
+
+
);
},
diff --git a/static/img/integration-logos/livekit.png b/static/img/integration-logos/livekit.png
new file mode 100644
index 00000000..0d28ba22
Binary files /dev/null and b/static/img/integration-logos/livekit.png differ
diff --git a/static/img/integration-logos/pipecat.png b/static/img/integration-logos/pipecat.png
new file mode 100644
index 00000000..656050ec
Binary files /dev/null and b/static/img/integration-logos/pipecat.png differ
diff --git a/static/img/integration-logos/vapi.png b/static/img/integration-logos/vapi.png
new file mode 100644
index 00000000..1c848bd6
Binary files /dev/null and b/static/img/integration-logos/vapi.png differ
diff --git a/vercel.json b/vercel.json
index 9786e042..bef3ea2a 100644
--- a/vercel.json
+++ b/vercel.json
@@ -161,46 +161,6 @@
"destination": "/api-ref/flow-voice-ai-websocket",
"permanent": true
},
- {
- "source": "/flow/application-inputs",
- "destination": "/voice-agents-flow/features/application-inputs",
- "permanent": true
- },
- {
- "source": "/flow/config",
- "destination": "/voice-agents-flow/setup",
- "permanent": true
- },
- {
- "source": "/flow/function-calling",
- "destination": "/voice-agents-flow/features/function-calling",
- "permanent": true
- },
- {
- "source": "/flow/introduction",
- "destination": "/voice-agents-flow",
- "permanent": true
- },
- {
- "source": "/flow/languages-supported",
- "destination": "/voice-agents-flow/supported-languages",
- "permanent": true
- },
- {
- "source": "/flow/livekit-webrtc",
- "destination": "/voice-agents-flow/features/webrtc-livekit",
- "permanent": true
- },
- {
- "source": "/flow/nextjs-guide",
- "destination": "/voice-agents-flow/guides/nextjs-guide",
- "permanent": true
- },
- {
- "source": "/flow/react-native-guide",
- "destination": "/voice-agents-flow/guides/react-native",
- "permanent": true
- },
{
"source": "/introduction",
"destination": "/",
@@ -861,16 +821,6 @@
"destination": "/deployments/virtual-appliance",
"permanent": true
},
- {
- "source": "/flow",
- "destination": "/voice-agents-flow",
- "permanent": true
- },
- {
- "source": "/flow/getting-started",
- "destination": "/voice-agents-flow",
- "permanent": true
- },
{
"source": "/flow/node-sdk",
"destination": "https://www.npmjs.com/package/@speechmatics/flow-client",