Skip to content

Commit 728bcbe

Browse files
feat(realtime): unify initial state with modelName, image, and prompt (#29)
* feat(realtime): replace AvatarOptions with ModelState initial state API Remove AvatarOptions and InitialPromptOptions in favor of unified ModelState with optional image (bytes, str, or Path) and Prompt fields. This aligns the Python SDK with the TS SDK's PR #84 API changes. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> * feat(realtime): wire model_name and initial state through WebRTC stack - Replace is_avatar_live with model_name throughout the connection pipeline - Unify initial image/prompt handling into Phase 2 pre-handshake - Add receive-only video transceiver for avatar-live with local track - Add audio transceiver for subscribe (receive-only) mode - Accept Path, raw base64, data URI, URL, and file paths in _image_to_base64 Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> * feat(realtime): update avatar-live example and tests for new API - Update avatar_live.py to use ModelState(image=...) instead of AvatarOptions - Update existing tests for new initial_state wiring - Add unit tests for _image_to_base64: raw base64, Path, bytes, data URI, file path Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> * chore: use lucy 2 --------- Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
1 parent 24bc92d commit 728bcbe

File tree

11 files changed

+154
-99
lines changed

11 files changed

+154
-99
lines changed

decart/__init__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
decode_subscribe_token,
3737
RealtimeConnectOptions,
3838
ConnectionState,
39-
AvatarOptions,
4039
)
4140

4241
REALTIME_AVAILABLE = True
@@ -50,7 +49,6 @@
5049
decode_subscribe_token = None # type: ignore
5150
RealtimeConnectOptions = None # type: ignore
5251
ConnectionState = None # type: ignore
53-
AvatarOptions = None # type: ignore
5452

5553
__version__ = "0.0.1"
5654

@@ -93,6 +91,5 @@
9391
"decode_subscribe_token",
9492
"RealtimeConnectOptions",
9593
"ConnectionState",
96-
"AvatarOptions",
9794
]
9895
)

decart/realtime/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
decode_subscribe_token,
77
)
88
from .messages import GenerationTickMessage
9-
from .types import RealtimeConnectOptions, ConnectionState, AvatarOptions
9+
from .types import RealtimeConnectOptions, ConnectionState
1010

1111
__all__ = [
1212
"RealtimeClient",
@@ -18,5 +18,4 @@
1818
"GenerationTickMessage",
1919
"RealtimeConnectOptions",
2020
"ConnectionState",
21-
"AvatarOptions",
2221
]

decart/realtime/client.py

Lines changed: 21 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
)
1919
from .types import ConnectionState, RealtimeConnectOptions
2020
from ..types import FileInput
21+
from ..models import RealTimeModels
2122
from ..errors import DecartSDKError, InvalidInputError, WebRTCError
2223
from ..process.request import file_input_to_bytes
2324

@@ -34,9 +35,13 @@ class SetInput(BaseModel):
3435

3536

3637
async def _image_to_base64(
37-
image: Union[bytes, str],
38+
image: Union[bytes, str, Path],
3839
http_session: aiohttp.ClientSession,
3940
) -> str:
41+
if isinstance(image, Path):
42+
image_bytes, _ = await file_input_to_bytes(image, http_session)
43+
return base64.b64encode(image_bytes).decode("utf-8")
44+
4045
if isinstance(image, bytes):
4146
return base64.b64encode(image).decode("utf-8")
4247

@@ -56,21 +61,20 @@ async def _image_to_base64(
5661
image_bytes, _ = await file_input_to_bytes(image, http_session)
5762
return base64.b64encode(image_bytes).decode("utf-8")
5863

59-
raise InvalidInputError(
60-
"Invalid image input: string is not a data URI, URL, or valid file path"
61-
)
64+
# Non-URL, non-file string — treat as raw base64 (matches TS SDK behavior)
65+
return image
6266

6367

6468
class RealtimeClient:
6569
def __init__(
6670
self,
6771
manager: WebRTCManager,
6872
http_session: Optional[aiohttp.ClientSession] = None,
69-
is_avatar_live: bool = False,
73+
model_name: Optional[str] = None,
7074
):
7175
self._manager = manager
7276
self._http_session = http_session
73-
self._is_avatar_live = is_avatar_live
77+
self._model_name = model_name
7478
self._connection_callbacks: list[Callable[[ConnectionState], None]] = []
7579
self._error_callbacks: list[Callable[[DecartSDKError], None]] = []
7680
self._generation_tick_callbacks: list[Callable[[GenerationTickMessage], None]] = []
@@ -105,7 +109,7 @@ async def connect(
105109
ws_url = f"{base_url}{options.model.url_path}"
106110
ws_url += f"?api_key={quote(api_key)}&model={quote(options.model.name)}"
107111

108-
is_avatar_live = options.model.name == "avatar-live"
112+
model_name: RealTimeModels = options.model.name # type: ignore[assignment]
109113

110114
config = WebRTCConfiguration(
111115
webrtc_url=ws_url,
@@ -119,7 +123,7 @@ async def connect(
119123
initial_state=options.initial_state,
120124
customize_offer=options.customize_offer,
121125
integration=integration,
122-
is_avatar_live=is_avatar_live,
126+
model_name=model_name,
123127
)
124128

125129
# Create HTTP session for file conversions
@@ -129,7 +133,7 @@ async def connect(
129133
client = cls(
130134
manager=manager,
131135
http_session=http_session,
132-
is_avatar_live=is_avatar_live,
136+
model_name=model_name,
133137
)
134138

135139
config.on_connection_state_change = client._emit_connection_change
@@ -138,35 +142,22 @@ async def connect(
138142
config.on_generation_tick = client._emit_generation_tick
139143

140144
try:
141-
# For avatar-live, convert and send avatar image before WebRTC connection
142-
avatar_image_base64: Optional[str] = None
143-
if is_avatar_live and options.avatar:
144-
image_bytes, _ = await file_input_to_bytes(
145-
options.avatar.avatar_image, http_session
146-
)
147-
avatar_image_base64 = base64.b64encode(image_bytes).decode("utf-8")
148-
149-
# Prepare initial prompt if provided
145+
initial_image: Optional[str] = None
146+
if options.initial_state and options.initial_state.image:
147+
initial_image = await _image_to_base64(options.initial_state.image, http_session)
148+
150149
initial_prompt: Optional[dict] = None
151-
if options.initial_prompt:
150+
if options.initial_state and options.initial_state.prompt:
152151
initial_prompt = {
153-
"text": options.initial_prompt.text,
154-
"enhance": options.initial_prompt.enhance,
152+
"text": options.initial_state.prompt.text,
153+
"enhance": options.initial_state.prompt.enhance,
155154
}
156155

157156
await manager.connect(
158157
local_track,
159-
avatar_image_base64=avatar_image_base64,
158+
initial_image=initial_image,
160159
initial_prompt=initial_prompt,
161160
)
162-
163-
# Handle initial_state.prompt for backward compatibility (after WebRTC connection)
164-
if options.initial_state:
165-
if options.initial_state.prompt:
166-
await client.set_prompt(
167-
options.initial_state.prompt.text,
168-
enhance=options.initial_state.prompt.enhance,
169-
)
170161
except Exception as e:
171162
await manager.cleanup()
172163
await http_session.close()

decart/realtime/types.py

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import Literal, Callable, Optional
22
from dataclasses import dataclass
33
from ..models import ModelDefinition
4-
from ..types import ModelState, FileInput
4+
from ..types import ModelState
55

66
try:
77
from aiortc import MediaStreamTrack
@@ -12,31 +12,9 @@
1212
ConnectionState = Literal["connecting", "connected", "generating", "disconnected", "reconnecting"]
1313

1414

15-
@dataclass
16-
class AvatarOptions:
17-
"""Options for avatar-live model."""
18-
19-
avatar_image: FileInput
20-
"""The avatar image to use. Can be bytes, Path, URL string, or file-like object."""
21-
22-
23-
@dataclass
24-
class InitialPromptOptions:
25-
"""Options for initial prompt sent before WebRTC handshake."""
26-
27-
text: str
28-
"""The prompt text to send."""
29-
30-
enhance: bool = True
31-
"""Whether to enhance the prompt. Defaults to True."""
32-
33-
3415
@dataclass
3516
class RealtimeConnectOptions:
3617
model: ModelDefinition
3718
on_remote_stream: Callable[[MediaStreamTrack], None]
3819
initial_state: Optional[ModelState] = None
3920
customize_offer: Optional[Callable] = None
40-
avatar: Optional[AvatarOptions] = None
41-
initial_prompt: Optional[InitialPromptOptions] = None
42-
"""Initial prompt to send before WebRTC handshake (optional)."""

decart/realtime/webrtc_connection.py

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -61,21 +61,21 @@ def __init__(
6161
self._pending_prompts: dict[str, tuple[asyncio.Event, dict]] = {}
6262
self._pending_image_set: Optional[tuple[asyncio.Event, dict]] = None
6363
self._local_track: Optional[MediaStreamTrack] = None
64-
self._is_avatar_live: bool = False
64+
self._model_name: Optional[str] = None
6565

6666
async def connect(
6767
self,
6868
url: str,
6969
local_track: Optional[MediaStreamTrack],
7070
timeout: float,
7171
integration: Optional[str] = None,
72-
is_avatar_live: bool = False,
73-
avatar_image_base64: Optional[str] = None,
72+
model_name: Optional[str] = None,
73+
initial_image: Optional[str] = None,
7474
initial_prompt: Optional[dict] = None,
7575
) -> None:
7676
try:
7777
self._local_track = local_track
78-
self._is_avatar_live = is_avatar_live
78+
self._model_name = model_name
7979

8080
await self._set_state("connecting")
8181

@@ -90,13 +90,16 @@ async def connect(
9090

9191
self._ws_task = asyncio.create_task(self._receive_messages())
9292

93-
if is_avatar_live and avatar_image_base64:
94-
await self._send_avatar_image_and_wait(avatar_image_base64)
95-
96-
if initial_prompt:
93+
if initial_image:
94+
await self._send_initial_image_and_wait(
95+
initial_image,
96+
prompt=initial_prompt.get("text") if initial_prompt else None,
97+
enhance=initial_prompt.get("enhance") if initial_prompt else None,
98+
)
99+
elif initial_prompt:
97100
await self._send_initial_prompt_and_wait(initial_prompt)
98101

99-
await self._setup_peer_connection(local_track, is_avatar_live=is_avatar_live)
102+
await self._setup_peer_connection(local_track, model_name=model_name)
100103

101104
await self._create_and_send_offer()
102105

@@ -115,23 +118,32 @@ async def connect(
115118
self._on_error(e)
116119
raise WebRTCError(str(e), cause=e)
117120

118-
async def _send_avatar_image_and_wait(self, image_base64: str, timeout: float = 30.0) -> None:
119-
"""Send avatar image and wait for acknowledgment."""
121+
async def _send_initial_image_and_wait(
122+
self,
123+
image_base64: str,
124+
prompt: Optional[str] = None,
125+
enhance: Optional[bool] = None,
126+
timeout: float = 30.0,
127+
) -> None:
120128
event, result = self.register_image_set_wait()
121129

122130
try:
123-
await self._send_message(
124-
SetAvatarImageMessage(type="set_image", image_data=image_base64)
125-
)
131+
message = SetAvatarImageMessage(type="set_image", image_data=image_base64)
132+
if prompt is not None:
133+
message.prompt = prompt
134+
if enhance is not None:
135+
message.enhance_prompt = enhance
136+
137+
await self._send_message(message)
126138

127139
try:
128140
await asyncio.wait_for(event.wait(), timeout=timeout)
129141
except asyncio.TimeoutError:
130-
raise WebRTCError("Avatar image acknowledgment timed out")
142+
raise WebRTCError("Initial image acknowledgment timed out")
131143

132144
if not result["success"]:
133145
raise WebRTCError(
134-
f"Failed to set avatar image: {result.get('error', 'unknown error')}"
146+
f"Failed to set initial image: {result.get('error', 'unknown error')}"
135147
)
136148
finally:
137149
self.unregister_image_set_wait()
@@ -163,7 +175,7 @@ async def _send_initial_prompt_and_wait(self, prompt: dict, timeout: float = 15.
163175
async def _setup_peer_connection(
164176
self,
165177
local_track: Optional[MediaStreamTrack],
166-
is_avatar_live: bool = False,
178+
model_name: Optional[str] = None,
167179
) -> None:
168180
config = RTCConfiguration(iceServers=[RTCIceServer(urls=["stun:stun.l.google.com:19302"])])
169181

@@ -205,8 +217,12 @@ async def on_ice_connection_state_change():
205217

206218
if local_track is None:
207219
self._pc.addTransceiver("video", direction="recvonly")
208-
logger.debug("Added video transceiver (recvonly) for receive-only mode")
220+
self._pc.addTransceiver("audio", direction="recvonly")
221+
logger.debug("Added video+audio transceivers (recvonly) for subscribe mode")
209222
else:
223+
if model_name == "avatar-live":
224+
self._pc.addTransceiver("video", direction="recvonly")
225+
logger.debug("Added video transceiver (recvonly) for avatar-live mode")
210226
self._pc.addTrack(local_track)
211227
logger.debug("Added local track to peer connection")
212228

decart/realtime/webrtc_manager.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class WebRTCConfiguration:
4848
initial_state: Optional[ModelState] = None
4949
customize_offer: Optional[Callable] = None
5050
integration: Optional[str] = None
51-
is_avatar_live: bool = False
51+
model_name: Optional[str] = None
5252

5353

5454
def _is_permanent_error(exception: BaseException) -> bool:
@@ -155,7 +155,7 @@ async def _attempt():
155155
local_track=self._local_track,
156156
timeout=CONNECTION_TIMEOUT,
157157
integration=self._config.integration,
158-
is_avatar_live=self._config.is_avatar_live,
158+
model_name=self._config.model_name,
159159
)
160160

161161
if self._intentional_disconnect or reconnect_generation != self._reconnect_generation:
@@ -174,7 +174,7 @@ async def _attempt():
174174
async def connect(
175175
self,
176176
local_track: Optional[MediaStreamTrack],
177-
avatar_image_base64: Optional[str] = None,
177+
initial_image: Optional[str] = None,
178178
initial_prompt: Optional[dict] = None,
179179
) -> bool:
180180
self._local_track = local_track
@@ -192,8 +192,8 @@ async def connect(
192192
local_track=local_track,
193193
timeout=CONNECTION_TIMEOUT,
194194
integration=self._config.integration,
195-
is_avatar_live=self._config.is_avatar_live,
196-
avatar_image_base64=avatar_image_base64,
195+
model_name=self._config.model_name,
196+
initial_image=initial_image,
197197
initial_prompt=initial_prompt,
198198
)
199199
return True

decart/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class Prompt(BaseModel):
2020

2121
class ModelState(BaseModel):
2222
prompt: Optional[Prompt] = None
23+
image: Optional[Union[bytes, str, Path]] = None
2324

2425

2526
class MotionTrajectoryInput(BaseModel):

examples/avatar_live.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ async def main():
7676

7777
try:
7878
from decart.realtime.client import RealtimeClient
79-
from decart.realtime.types import RealtimeConnectOptions, AvatarOptions
79+
from decart.realtime.types import RealtimeConnectOptions
80+
from decart.types import ModelState
8081
except ImportError:
8182
print("Error: Realtime API not available")
8283
print("Install with: pip install decart[realtime]")
@@ -120,7 +121,7 @@ def on_error(error):
120121
options=RealtimeConnectOptions(
121122
model=model,
122123
on_remote_stream=on_remote_stream,
123-
avatar=AvatarOptions(avatar_image=Path(avatar_image)),
124+
initial_state=ModelState(image=avatar_image),
124125
),
125126
)
126127

examples/files/image.png

1.35 MB
Loading

examples/realtime_synthetic.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ async def main():
7373
print("Creating synthetic video track...")
7474
video_track = SyntheticVideoTrack()
7575

76-
model = models.realtime("mirage_v2")
76+
model = models.realtime("lucy_2_rt")
7777
print(f"Using model: {model.name}")
7878
print(f"Model config - FPS: {model.fps}, Size: {model.width}x{model.height}")
7979

@@ -111,7 +111,13 @@ def on_error(error):
111111
options=RealtimeConnectOptions(
112112
model=model,
113113
on_remote_stream=on_remote_stream,
114-
initial_state=ModelState(prompt=Prompt(text="Anime style", enhance=True)),
114+
initial_state=ModelState(
115+
prompt=Prompt(
116+
text="use the image as a reference",
117+
enhance=True,
118+
),
119+
image=Path("examples/files/image.png"),
120+
),
115121
),
116122
)
117123

0 commit comments

Comments
 (0)