livekit · toubatbrian · Feb 5, 2026 · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026
diff --git a/.changeset/lucky-grapes-care.md b/.changeset/lucky-grapes-care.md
@@ -0,0 +1,10 @@
+---
+"@livekit/agents": patch
+"@livekit/agents-plugin-cartesia": patch
+"@livekit/agents-plugin-deepgram": patch
+"@livekit/agents-plugin-google": patch
+"@livekit/agents-plugin-openai": patch
+"livekit-agents-examples": patch
+---
+
+Add granular session models usage stats
diff --git a/agents/src/inference/interruption/defaults.ts b/agents/src/inference/interruption/defaults.ts
@@ -14,37 +14,6 @@ export const SAMPLE_RATE = 16000;
 export const FRAMES_PER_SECOND = 40;
 export const FRAME_DURATION_IN_S = 0.025; // 25ms per frame
 
-/** Default production inference URL */
-export const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
-
-/** Staging inference URL */
-export const STAGING_BASE_URL = 'https://agent-gateway-staging.livekit.cloud/v1';
-
-/**
- * Get the default inference URL based on the environment.
- *
- * Priority:
- * 1. LIVEKIT_INFERENCE_URL if set
- * 2. If LIVEKIT_URL contains '.staging.livekit.cloud', use staging gateway
- * 3. Otherwise, use production gateway
- */
-export function getDefaultInferenceUrl(): string {
-  // Priority 1: LIVEKIT_INFERENCE_URL
-  const inferenceUrl = process.env.LIVEKIT_INFERENCE_URL;
-  if (inferenceUrl) {
-    return inferenceUrl;
-  }
-
-  // Priority 2: Check LIVEKIT_URL for staging (exact match to Python)
-  const livekitUrl = process.env.LIVEKIT_URL || '';
-  if (livekitUrl.includes('.staging.livekit.cloud')) {
-    return STAGING_BASE_URL;
-  }
-
-  // Priority 3: Default to production
-  return DEFAULT_BASE_URL;
-}
-
 export const apiConnectDefaults: ApiConnectOptions = {
   maxRetries: 3,
   retryInterval: 2_000,

diff --git a/agents/src/inference/interruption/interruption_detector.ts b/agents/src/inference/interruption/interruption_detector.ts
@@ -4,14 +4,8 @@
 import type { TypedEventEmitter } from '@livekit/typed-emitter';
 import EventEmitter from 'events';
 import { log } from '../../log.js';
-import {
-  DEFAULT_BASE_URL,
-  FRAMES_PER_SECOND,
-  SAMPLE_RATE,
-  STAGING_BASE_URL,
-  getDefaultInferenceUrl,
-  interruptionOptionDefaults,
-} from './defaults.js';
+import { DEFAULT_INFERENCE_URL, STAGING_INFERENCE_URL, getDefaultInferenceUrl } from '../utils.js';
+import { FRAMES_PER_SECOND, SAMPLE_RATE, interruptionOptionDefaults } from './defaults.js';
 import type { InterruptionDetectionError } from './errors.js';
 import { InterruptionStreamBase } from './interruption_stream.js';
 import type { InterruptionEvent, InterruptionOptions } from './types.js';
@@ -56,7 +50,8 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
     let useProxy: boolean;
 
     // Use LiveKit credentials if using the inference service (production or staging)
-    const isInferenceUrl = lkBaseUrl === DEFAULT_BASE_URL || lkBaseUrl === STAGING_BASE_URL;
+    const isInferenceUrl =
+      lkBaseUrl === DEFAULT_INFERENCE_URL || lkBaseUrl === STAGING_INFERENCE_URL;
     if (isInferenceUrl) {
       lkApiKey =
         apiKey ?? process.env.LIVEKIT_INFERENCE_API_KEY ?? process.env.LIVEKIT_API_KEY ?? '';

diff --git a/agents/src/inference/llm.ts b/agents/src/inference/llm.ts
@@ -12,9 +12,7 @@ import {
 } from '../index.js';
 import * as llm from '../llm/index.js';
 import type { APIConnectOptions } from '../types.js';
-import { type AnyString, createAccessToken } from './utils.js';
-
-const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
+import { type AnyString, createAccessToken, getDefaultInferenceUrl } from './utils.js';
 
 export type OpenAIModels =
   | 'openai/gpt-5.2'
@@ -127,7 +125,7 @@ export class LLM extends llm.LLM {
       strictToolSchema = false,
     } = opts;
 
-    const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
+    const lkBaseURL = baseURL || getDefaultInferenceUrl();
     const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
     if (!lkApiKey) {
       throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
@@ -163,6 +161,10 @@ export class LLM extends llm.LLM {
     return this.opts.model;
   }
 
+  get provider(): string {
+    return 'livekit';
+  }
+
   static fromModelString(modelString: string): LLM {
     return new LLM({ model: modelString });
   }

diff --git a/agents/src/inference/stt.ts b/agents/src/inference/stt.ts
@@ -22,7 +22,7 @@ import {
   type SttTranscriptEvent,
   sttServerEventSchema,
 } from './api_protos.js';
-import { type AnyString, connectWs, createAccessToken } from './utils.js';
+import { type AnyString, connectWs, createAccessToken, getDefaultInferenceUrl } from './utils.js';
 
 export type DeepgramModels =
   | 'deepgram/flux-general'
@@ -97,7 +97,6 @@ export type STTEncoding = 'pcm_s16le';
 
 const DEFAULT_ENCODING: STTEncoding = 'pcm_s16le';
 const DEFAULT_SAMPLE_RATE = 16000;
-const DEFAULT_BASE_URL = 'wss://agent-gateway.livekit.cloud/v1';
 const DEFAULT_CANCEL_TIMEOUT = 5000;
 
 export interface InferenceSTTOptions<TModel extends STTModels> {
@@ -143,7 +142,7 @@ export class STT<TModel extends STTModels> extends BaseSTT {
       modelOptions = {} as STTOptions<TModel>,
     } = opts || {};
 
-    const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
+    const lkBaseURL = baseURL || getDefaultInferenceUrl();
     const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
     if (!lkApiKey) {
       throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
@@ -171,6 +170,14 @@ export class STT<TModel extends STTModels> extends BaseSTT {
     return 'inference.STT';
   }
 
+  get model(): string {
+    return this.opts.model ?? 'auto';
+  }
+
+  get provider(): string {
+    return 'livekit';
+  }
+
   static fromModelString(modelString: string): STT<AnyString> {
     if (modelString.includes(':')) {
       const [model, language] = modelString.split(':') as [AnyString, STTLanguages];

diff --git a/agents/src/inference/tts.ts b/agents/src/inference/tts.ts
@@ -20,7 +20,7 @@ import {
   ttsClientEventSchema,
   ttsServerEventSchema,
 } from './api_protos.js';
-import { type AnyString, connectWs, createAccessToken } from './utils.js';
+import { type AnyString, connectWs, createAccessToken, getDefaultInferenceUrl } from './utils.js';
 
 export type CartesiaModels =
   | 'cartesia/sonic-3'
@@ -94,7 +94,6 @@ type TTSEncoding = 'pcm_s16le';
 
 const DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';
 const DEFAULT_SAMPLE_RATE = 16000;
-const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
 const NUM_CHANNELS = 1;
 const DEFAULT_LANGUAGE = 'en';
 
@@ -145,7 +144,7 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
       modelOptions = {} as TTSOptions<TModel>,
     } = opts || {};
 
-    const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
+    const lkBaseURL = baseURL || getDefaultInferenceUrl();
     const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
     if (!lkApiKey) {
       throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
@@ -202,6 +201,14 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
     return 'inference.TTS';
   }
 
+  get model(): string {
+    return this.opts.model ?? 'unknown';
+  }
+
+  get provider(): string {
+    return 'livekit';
+  }
+
   static fromModelString(modelString: string): TTS<AnyString> {
     if (modelString.includes(':')) {
       const [model, voice] = modelString.split(':') as [TTSModels, string];

diff --git a/agents/src/inference/utils.ts b/agents/src/inference/utils.ts
@@ -7,6 +7,34 @@ import { APIConnectionError, APIStatusError } from '../index.js';
 
 export type AnyString = string & NonNullable<unknown>;
 
+/** Default production inference URL */
+export const DEFAULT_INFERENCE_URL = 'https://agent-gateway.livekit.cloud/v1';
+
+/** Staging inference URL */
+export const STAGING_INFERENCE_URL = 'https://agent-gateway.staging.livekit.cloud/v1';
+
+/**
+ * Get the default inference URL based on the environment.
+ *
+ * Priority:
+ * 1. LIVEKIT_INFERENCE_URL if set
+ * 2. If LIVEKIT_URL contains '.staging.livekit.cloud', use staging gateway
+ * 3. Otherwise, use production gateway
+ */
+export function getDefaultInferenceUrl(): string {
+  const inferenceUrl = process.env.LIVEKIT_INFERENCE_URL;
+  if (inferenceUrl) {
+    return inferenceUrl;
+  }
+
+  const livekitUrl = process.env.LIVEKIT_URL || '';
+  if (livekitUrl.includes('.staging.livekit.cloud')) {
+    return STAGING_INFERENCE_URL;
+  }
+
+  return DEFAULT_INFERENCE_URL;
+}
+
 export async function createAccessToken(
   apiKey: string,
   apiSecret: string,

diff --git a/agents/src/llm/llm.ts b/agents/src/llm/llm.ts
@@ -65,6 +65,18 @@ export abstract class LLM extends (EventEmitter as new () => TypedEmitter<LLMCal
     return 'unknown';
   }
 
+  /**
+   * Get the provider name for this LLM instance.
+   *
+   * @returns The provider name if available, "unknown" otherwise.
+   *
+   * @remarks
+   * Plugins should override this property to provide their provider information.
+   */
+  get provider(): string {
+    return 'unknown';
+  }
+
   /**
    * Returns a {@link LLMStream} that can be used to push text and receive LLM responses.
    */
@@ -248,6 +260,10 @@ export abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {
         }
         return (usage?.completionTokens || 0) / (durationMs / 1000);
       })(),
+      metadata: {
+        modelProvider: this.#llm.provider,
+        modelName: this.#llm.model,
+      },
     };
 
     if (this.#llmRequestSpan) {

diff --git a/agents/src/llm/realtime.ts b/agents/src/llm/realtime.ts
@@ -72,6 +72,10 @@ export abstract class RealtimeModel {
   /** The model name/identifier used by this realtime model */
   abstract get model(): string;
 
+  get provider(): string {
+    return 'unknown';
+  }
+
   abstract session(): RealtimeSession;
 
   abstract close(): Promise<void>;

diff --git a/agents/src/metrics/base.ts b/agents/src/metrics/base.ts
@@ -2,6 +2,13 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
+export type MetricsMetadata = {
+  /** The provider name (e.g., 'openai', 'anthropic'). */
+  modelProvider?: string;
+  /** The model name (e.g., 'gpt-4o', 'claude-3-5-sonnet'). */
+  modelName?: string;
+};
+
 export type AgentMetrics =
   | STTMetrics
   | LLMMetrics
@@ -26,6 +33,8 @@ export type LLMMetrics = {
   totalTokens: number;
   tokensPerSecond: number;
   speechId?: string;
+  /** Metadata for model provider and name tracking. */
+  metadata?: MetricsMetadata;
 };
 
 export type STTMetrics = {
@@ -41,10 +50,16 @@ export type STTMetrics = {
    * The duration of the pushed audio in milliseconds.
    */
   audioDurationMs: number;
+  /** Input audio tokens (for token-based billing). */
+  inputTokens?: number;
+  /** Output text tokens (for token-based billing). */
+  outputTokens?: number;
   /**
    * Whether the STT is streaming (e.g using websocket).
    */
   streamed: boolean;
+  /** Metadata for model provider and name tracking. */
+  metadata?: MetricsMetadata;
 };
 
 export type TTSMetrics = {
@@ -59,10 +74,17 @@ export type TTSMetrics = {
   /** Generated audio duration in milliseconds. */
   audioDurationMs: number;
   cancelled: boolean;
+  /** Number of characters synthesized (for character-based billing). */
   charactersCount: number;
+  /** Input text tokens (for token-based billing, e.g., OpenAI TTS). */
+  inputTokens?: number;
+  /** Output audio tokens (for token-based billing, e.g., OpenAI TTS). */
+  outputTokens?: number;
   streamed: boolean;
   segmentId?: string;
   speechId?: string;
+  /** Metadata for model provider and name tracking. */
+  metadata?: MetricsMetadata;
 };
 
 export type VADMetrics = {
@@ -133,6 +155,10 @@ export type RealtimeModelMetrics = {
    * The duration of the response from created to done in milliseconds.
    */
   durationMs: number;
+  /**
+   * The duration of the session connection in milliseconds (for session-based billing like xAI).
+   */
+  sessionDurationMs?: number;
   /**
    * Time to first audio token in milliseconds. -1 if no audio token was sent.
    */
@@ -165,4 +191,6 @@ export type RealtimeModelMetrics = {
    * Details about the output tokens used in the Response.
    */
   outputTokenDetails: RealtimeModelMetricsOutputTokenDetails;
+  /** Metadata for model provider and name tracking. */
+  metadata?: MetricsMetadata;
 };
diff --git a/agents/src/metrics/index.ts b/agents/src/metrics/index.ts
@@ -6,10 +6,19 @@ export type {
   AgentMetrics,
   EOUMetrics,
   LLMMetrics,
+  MetricsMetadata,
   RealtimeModelMetrics,
   STTMetrics,
   TTSMetrics,
   VADMetrics,
 } from './base.js';
+export {
+  filterZeroValues,
+  ModelUsageCollector,
+  type LLMModelUsage,
+  type ModelUsage,
+  type STTModelUsage,
+  type TTSModelUsage,
+} from './model_usage.js';
 export { UsageCollector, type UsageSummary } from './usage_collector.js';
 export { logMetrics } from './utils.js';