Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .changeset/lucky-grapes-care.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
"@livekit/agents": patch
"@livekit/agents-plugin-cartesia": patch
"@livekit/agents-plugin-deepgram": patch
"@livekit/agents-plugin-google": patch
"@livekit/agents-plugin-openai": patch
"livekit-agents-examples": patch
---

Add granular session models usage stats
31 changes: 0 additions & 31 deletions agents/src/inference/interruption/defaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,6 @@ export const SAMPLE_RATE = 16000;
export const FRAMES_PER_SECOND = 40;
export const FRAME_DURATION_IN_S = 0.025; // 25ms per frame

/** Default production inference URL */
export const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';

/** Staging inference URL */
export const STAGING_BASE_URL = 'https://agent-gateway-staging.livekit.cloud/v1';

/**
* Get the default inference URL based on the environment.
*
* Priority:
* 1. LIVEKIT_INFERENCE_URL if set
* 2. If LIVEKIT_URL contains '.staging.livekit.cloud', use staging gateway
* 3. Otherwise, use production gateway
*/
export function getDefaultInferenceUrl(): string {
// Priority 1: LIVEKIT_INFERENCE_URL
const inferenceUrl = process.env.LIVEKIT_INFERENCE_URL;
if (inferenceUrl) {
return inferenceUrl;
}

// Priority 2: Check LIVEKIT_URL for staging (exact match to Python)
const livekitUrl = process.env.LIVEKIT_URL || '';
if (livekitUrl.includes('.staging.livekit.cloud')) {
return STAGING_BASE_URL;
}

// Priority 3: Default to production
return DEFAULT_BASE_URL;
}

export const apiConnectDefaults: ApiConnectOptions = {
maxRetries: 3,
retryInterval: 2_000,
Expand Down
13 changes: 4 additions & 9 deletions agents/src/inference/interruption/interruption_detector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,8 @@
import type { TypedEventEmitter } from '@livekit/typed-emitter';
import EventEmitter from 'events';
import { log } from '../../log.js';
import {
DEFAULT_BASE_URL,
FRAMES_PER_SECOND,
SAMPLE_RATE,
STAGING_BASE_URL,
getDefaultInferenceUrl,
interruptionOptionDefaults,
} from './defaults.js';
import { DEFAULT_INFERENCE_URL, STAGING_INFERENCE_URL, getDefaultInferenceUrl } from '../utils.js';
import { FRAMES_PER_SECOND, SAMPLE_RATE, interruptionOptionDefaults } from './defaults.js';
import type { InterruptionDetectionError } from './errors.js';
import { InterruptionStreamBase } from './interruption_stream.js';
import type { InterruptionEvent, InterruptionOptions } from './types.js';
Expand Down Expand Up @@ -56,7 +50,8 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
let useProxy: boolean;

// Use LiveKit credentials if using the inference service (production or staging)
const isInferenceUrl = lkBaseUrl === DEFAULT_BASE_URL || lkBaseUrl === STAGING_BASE_URL;
const isInferenceUrl =
lkBaseUrl === DEFAULT_INFERENCE_URL || lkBaseUrl === STAGING_INFERENCE_URL;
if (isInferenceUrl) {
lkApiKey =
apiKey ?? process.env.LIVEKIT_INFERENCE_API_KEY ?? process.env.LIVEKIT_API_KEY ?? '';
Expand Down
10 changes: 6 additions & 4 deletions agents/src/inference/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@ import {
} from '../index.js';
import * as llm from '../llm/index.js';
import type { APIConnectOptions } from '../types.js';
import { type AnyString, createAccessToken } from './utils.js';

const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
import { type AnyString, createAccessToken, getDefaultInferenceUrl } from './utils.js';

export type OpenAIModels =
| 'openai/gpt-5.2'
Expand Down Expand Up @@ -127,7 +125,7 @@ export class LLM extends llm.LLM {
strictToolSchema = false,
} = opts;

const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
const lkBaseURL = baseURL || getDefaultInferenceUrl();
const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
if (!lkApiKey) {
throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
Expand Down Expand Up @@ -163,6 +161,10 @@ export class LLM extends llm.LLM {
return this.opts.model;
}

get provider(): string {
return 'livekit';
}

static fromModelString(modelString: string): LLM {
return new LLM({ model: modelString });
}
Expand Down
13 changes: 10 additions & 3 deletions agents/src/inference/stt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import {
type SttTranscriptEvent,
sttServerEventSchema,
} from './api_protos.js';
import { type AnyString, connectWs, createAccessToken } from './utils.js';
import { type AnyString, connectWs, createAccessToken, getDefaultInferenceUrl } from './utils.js';

export type DeepgramModels =
| 'deepgram/flux-general'
Expand Down Expand Up @@ -97,7 +97,6 @@ export type STTEncoding = 'pcm_s16le';

const DEFAULT_ENCODING: STTEncoding = 'pcm_s16le';
const DEFAULT_SAMPLE_RATE = 16000;
const DEFAULT_BASE_URL = 'wss://agent-gateway.livekit.cloud/v1';
const DEFAULT_CANCEL_TIMEOUT = 5000;

export interface InferenceSTTOptions<TModel extends STTModels> {
Expand Down Expand Up @@ -143,7 +142,7 @@ export class STT<TModel extends STTModels> extends BaseSTT {
modelOptions = {} as STTOptions<TModel>,
} = opts || {};

const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
const lkBaseURL = baseURL || getDefaultInferenceUrl();
const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
if (!lkApiKey) {
throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
Expand Down Expand Up @@ -171,6 +170,14 @@ export class STT<TModel extends STTModels> extends BaseSTT {
return 'inference.STT';
}

get model(): string {
return this.opts.model ?? 'auto';
}

get provider(): string {
return 'livekit';
}

static fromModelString(modelString: string): STT<AnyString> {
if (modelString.includes(':')) {
const [model, language] = modelString.split(':') as [AnyString, STTLanguages];
Expand Down
13 changes: 10 additions & 3 deletions agents/src/inference/tts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import {
ttsClientEventSchema,
ttsServerEventSchema,
} from './api_protos.js';
import { type AnyString, connectWs, createAccessToken } from './utils.js';
import { type AnyString, connectWs, createAccessToken, getDefaultInferenceUrl } from './utils.js';

export type CartesiaModels =
| 'cartesia/sonic-3'
Expand Down Expand Up @@ -94,7 +94,6 @@ type TTSEncoding = 'pcm_s16le';

const DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';
const DEFAULT_SAMPLE_RATE = 16000;
const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
const NUM_CHANNELS = 1;
const DEFAULT_LANGUAGE = 'en';

Expand Down Expand Up @@ -145,7 +144,7 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
modelOptions = {} as TTSOptions<TModel>,
} = opts || {};

const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
const lkBaseURL = baseURL || getDefaultInferenceUrl();
const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
if (!lkApiKey) {
throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
Expand Down Expand Up @@ -202,6 +201,14 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
return 'inference.TTS';
}

get model(): string {
return this.opts.model ?? 'unknown';
}

get provider(): string {
return 'livekit';
}

static fromModelString(modelString: string): TTS<AnyString> {
if (modelString.includes(':')) {
const [model, voice] = modelString.split(':') as [TTSModels, string];
Expand Down
28 changes: 28 additions & 0 deletions agents/src/inference/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,34 @@ import { APIConnectionError, APIStatusError } from '../index.js';

export type AnyString = string & NonNullable<unknown>;

/** Default production inference URL */
export const DEFAULT_INFERENCE_URL = 'https://agent-gateway.livekit.cloud/v1';

/** Staging inference URL */
export const STAGING_INFERENCE_URL = 'https://agent-gateway.staging.livekit.cloud/v1';

/**
* Get the default inference URL based on the environment.
*
* Priority:
* 1. LIVEKIT_INFERENCE_URL if set
* 2. If LIVEKIT_URL contains '.staging.livekit.cloud', use staging gateway
* 3. Otherwise, use production gateway
*/
export function getDefaultInferenceUrl(): string {
const inferenceUrl = process.env.LIVEKIT_INFERENCE_URL;
if (inferenceUrl) {
return inferenceUrl;
}

const livekitUrl = process.env.LIVEKIT_URL || '';
if (livekitUrl.includes('.staging.livekit.cloud')) {
return STAGING_INFERENCE_URL;
}

return DEFAULT_INFERENCE_URL;
}

export async function createAccessToken(
apiKey: string,
apiSecret: string,
Expand Down
16 changes: 16 additions & 0 deletions agents/src/llm/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,18 @@ export abstract class LLM extends (EventEmitter as new () => TypedEmitter<LLMCal
return 'unknown';
}

/**
* Get the provider name for this LLM instance.
*
* @returns The provider name if available, "unknown" otherwise.
*
* @remarks
* Plugins should override this property to provide their provider information.
*/
get provider(): string {
return 'unknown';
}

/**
* Returns a {@link LLMStream} that can be used to push text and receive LLM responses.
*/
Expand Down Expand Up @@ -248,6 +260,10 @@ export abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {
}
return (usage?.completionTokens || 0) / (durationMs / 1000);
})(),
metadata: {
modelProvider: this.#llm.provider,
modelName: this.#llm.model,
},
};

if (this.#llmRequestSpan) {
Expand Down
4 changes: 4 additions & 0 deletions agents/src/llm/realtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ export abstract class RealtimeModel {
/** The model name/identifier used by this realtime model */
abstract get model(): string;

get provider(): string {
return 'unknown';
}

abstract session(): RealtimeSession;

abstract close(): Promise<void>;
Expand Down
28 changes: 28 additions & 0 deletions agents/src/metrics/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
//
// SPDX-License-Identifier: Apache-2.0

export type MetricsMetadata = {
/** The provider name (e.g., 'openai', 'anthropic'). */
modelProvider?: string;
/** The model name (e.g., 'gpt-4o', 'claude-3-5-sonnet'). */
modelName?: string;
};

export type AgentMetrics =
| STTMetrics
| LLMMetrics
Expand All @@ -26,6 +33,8 @@ export type LLMMetrics = {
totalTokens: number;
tokensPerSecond: number;
speechId?: string;
/** Metadata for model provider and name tracking. */
metadata?: MetricsMetadata;
};

export type STTMetrics = {
Expand All @@ -41,10 +50,16 @@ export type STTMetrics = {
* The duration of the pushed audio in milliseconds.
*/
audioDurationMs: number;
/** Input audio tokens (for token-based billing). */
inputTokens?: number;
/** Output text tokens (for token-based billing). */
outputTokens?: number;
/**
* Whether the STT is streaming (e.g using websocket).
*/
streamed: boolean;
/** Metadata for model provider and name tracking. */
metadata?: MetricsMetadata;
};

export type TTSMetrics = {
Expand All @@ -59,10 +74,17 @@ export type TTSMetrics = {
/** Generated audio duration in milliseconds. */
audioDurationMs: number;
cancelled: boolean;
/** Number of characters synthesized (for character-based billing). */
charactersCount: number;
/** Input text tokens (for token-based billing, e.g., OpenAI TTS). */
inputTokens?: number;
/** Output audio tokens (for token-based billing, e.g., OpenAI TTS). */
outputTokens?: number;
streamed: boolean;
segmentId?: string;
speechId?: string;
/** Metadata for model provider and name tracking. */
metadata?: MetricsMetadata;
};

export type VADMetrics = {
Expand Down Expand Up @@ -133,6 +155,10 @@ export type RealtimeModelMetrics = {
* The duration of the response from created to done in milliseconds.
*/
durationMs: number;
/**
* The duration of the session connection in milliseconds (for session-based billing like xAI).
*/
sessionDurationMs?: number;
/**
* Time to first audio token in milliseconds. -1 if no audio token was sent.
*/
Expand Down Expand Up @@ -165,4 +191,6 @@ export type RealtimeModelMetrics = {
* Details about the output tokens used in the Response.
*/
outputTokenDetails: RealtimeModelMetricsOutputTokenDetails;
/** Metadata for model provider and name tracking. */
metadata?: MetricsMetadata;
};
9 changes: 9 additions & 0 deletions agents/src/metrics/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,19 @@ export type {
AgentMetrics,
EOUMetrics,
LLMMetrics,
MetricsMetadata,
RealtimeModelMetrics,
STTMetrics,
TTSMetrics,
VADMetrics,
} from './base.js';
export {
filterZeroValues,
ModelUsageCollector,
type LLMModelUsage,
type ModelUsage,
type STTModelUsage,
type TTSModelUsage,
} from './model_usage.js';
export { UsageCollector, type UsageSummary } from './usage_collector.js';
export { logMetrics } from './utils.js';
Loading