Skip to content
17 changes: 15 additions & 2 deletions docker/repl_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,8 @@ def execute_code(
timeout: int = 30,
working_dir: str = "/mnt/data",
initial_state: str = None,
capture_state: bool = False
capture_state: bool = False,
args: list = None
) -> dict:
"""Execute code in isolated namespace and capture output.

Expand All @@ -301,6 +302,7 @@ def execute_code(
working_dir: Working directory for execution
initial_state: Base64-encoded cloudpickle state to restore before execution
capture_state: Whether to capture and return state after execution
args: Optional list of command line arguments

Returns:
Dict with exit_code, stdout, stderr, execution_time_ms, and optionally state/state_errors
Expand Down Expand Up @@ -330,6 +332,12 @@ def execute_code(

exit_code = 0

# Save and set sys.argv if args provided
original_argv = sys.argv
if args is not None:
# Set sys.argv to [script_name] + args (matches file-based execution)
sys.argv = ['/mnt/data/code.py'] + list(args)

# Set up timeout handler
old_handler = signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(timeout)
Expand Down Expand Up @@ -370,6 +378,9 @@ def execute_code(
signal.alarm(0)
signal.signal(signal.SIGALRM, old_handler)

# Restore sys.argv
sys.argv = original_argv

# Restore working directory
try:
os.chdir(original_dir)
Expand Down Expand Up @@ -503,14 +514,16 @@ def main():
working_dir = request.get("working_dir", "/mnt/data")
initial_state = request.get("initial_state")
capture_state = request.get("capture_state", False)
args = request.get("args") # List of command line arguments

# Execute code with optional state persistence
response = execute_code(
code,
timeout,
working_dir,
initial_state=initial_state,
capture_state=capture_state
capture_state=capture_state,
args=args
)

# Send response
Expand Down
19 changes: 15 additions & 4 deletions src/api/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

# Local application imports
from ..config import settings
from ..dependencies import FileServiceDep
from ..dependencies import FileServiceDep, SessionServiceDep
from ..models import SessionCreate
from ..services.execution.output import OutputProcessor
from ..utils.id_generator import generate_session_id

logger = structlog.get_logger(__name__)
router = APIRouter()
Expand Down Expand Up @@ -55,6 +55,7 @@ async def upload_file(
files: Optional[List[UploadFile]] = File(None),
entity_id: Optional[str] = Form(None),
file_service: FileServiceDep = None,
session_service: SessionServiceDep = None,
):
"""Upload files with multipart form handling - LibreChat compatible.

Expand Down Expand Up @@ -112,8 +113,17 @@ async def upload_file(

uploaded_files = []

# Create a session ID for this upload
session_id = generate_session_id()
# Create a real session for file uploads
# This enables session reuse when files are referenced in /exec
metadata = {}
if entity_id:
metadata["entity_id"] = entity_id
session = await session_service.create_session(SessionCreate(metadata=metadata))
session_id = session.session_id

# Determine if this is an agent file (uploaded with entity_id)
# Agent files are read-only and cannot be modified by user code
is_agent_file = entity_id is not None and len(entity_id) > 0

for file in upload_files:
# Read file content
Expand All @@ -125,6 +135,7 @@ async def upload_file(
filename=file.filename,
content=content,
content_type=file.content_type,
is_agent_file=is_agent_file,
)

# Sanitize filename to match what will be used in container
Expand Down
5 changes: 5 additions & 0 deletions src/models/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class FileRef(BaseModel):
id: str
name: str
path: Optional[str] = None # Make path optional
session_id: Optional[str] = None # Session ID for cross-message file persistence


class RequestFile(BaseModel):
Expand All @@ -22,6 +23,10 @@ class RequestFile(BaseModel):
id: str
session_id: str
name: str
restore_state: bool = Field(
default=False,
description="If true, restore Python state from when this file was last used",
)


class ExecRequest(BaseModel):
Expand Down
3 changes: 3 additions & 0 deletions src/models/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ class ExecuteCodeRequest(BaseModel):
timeout: Optional[int] = Field(
default=None, description="Execution timeout in seconds"
)
args: Optional[List[str]] = Field(
default=None, description="Command line arguments to pass to the executed code"
)


class ExecuteCodeResponse(BaseModel):
Expand Down
12 changes: 12 additions & 0 deletions src/models/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@ class FileInfo(BaseModel):
content_type: str
created_at: datetime
path: str = Field(..., description="File path in the session")
# State restoration fields (for Python state-file linking)
execution_id: Optional[str] = Field(
default=None, description="ID of the execution that created/last used this file"
)
state_hash: Optional[str] = Field(
default=None,
description="SHA256 hash of the Python state when this file was last used",
)
last_used_at: Optional[datetime] = Field(
default=None,
description="Timestamp of when this file was last used in an execution",
)

class Config:
json_encoders = {datetime: lambda v: v.isoformat()}
Expand Down
8 changes: 7 additions & 1 deletion src/services/container/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ async def copy_to_container(
return False

async def copy_content_to_container(
self, container: Container, content: bytes, dest_path: str
self, container: Container, content: bytes, dest_path: str, language: str = "py"
) -> bool:
"""Copy content directly to container without tempfiles.

Expand All @@ -376,19 +376,25 @@ async def copy_content_to_container(
container: Target container
content: File content as bytes
dest_path: Destination path in container (e.g., /mnt/data/file.py)
language: Programming language (used to set correct file ownership)

Returns:
True if successful, False otherwise
"""
try:
loop = asyncio.get_event_loop()

# Get user ID for this language's container
user_id = self.get_user_id_for_language(language)

# Build in-memory tar archive
tar_buffer = io.BytesIO()
with tarfile.open(fileobj=tar_buffer, mode="w") as tar:
tarinfo = tarfile.TarInfo(name=dest_path.split("/")[-1])
tarinfo.size = len(content)
tarinfo.mode = 0o644
tarinfo.uid = user_id
tarinfo.gid = user_id
tar.addfile(tarinfo, io.BytesIO(content))

tar_buffer.seek(0)
Expand Down
9 changes: 9 additions & 0 deletions src/services/container/repl_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ async def execute(
code: str,
timeout: int = None,
working_dir: str = "/mnt/data",
args: Optional[List[str]] = None,
) -> Tuple[int, str, str]:
"""Execute code in running REPL.

Expand All @@ -51,6 +52,7 @@ async def execute(
code: Python code to execute
timeout: Maximum execution time in seconds
working_dir: Working directory for code execution
args: Optional list of command line arguments

Returns:
Tuple of (exit_code, stdout, stderr)
Expand All @@ -62,6 +64,8 @@ async def execute(

# Build request
request = {"code": code, "timeout": timeout, "working_dir": working_dir}
if args:
request["args"] = args
request_json = json.dumps(request)
request_bytes = request_json.encode("utf-8") + DELIMITER

Expand Down Expand Up @@ -109,6 +113,7 @@ async def execute_with_state(
working_dir: str = "/mnt/data",
initial_state: Optional[str] = None,
capture_state: bool = False,
args: Optional[List[str]] = None,
) -> Tuple[int, str, str, Optional[str], List[str]]:
"""Execute code in running REPL with optional state persistence.

Expand All @@ -119,6 +124,7 @@ async def execute_with_state(
working_dir: Working directory for code execution
initial_state: Base64-encoded state to restore before execution
capture_state: Whether to capture state after execution
args: Optional list of command line arguments

Returns:
Tuple of (exit_code, stdout, stderr, new_state, state_errors)
Expand All @@ -138,6 +144,9 @@ async def execute_with_state(
if capture_state:
request["capture_state"] = True

if args:
request["args"] = args

request_json = json.dumps(request)
request_bytes = request_json.encode("utf-8") + DELIMITER

Expand Down
49 changes: 40 additions & 9 deletions src/services/execution/runner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Code execution runner - core execution logic."""

import asyncio
import shlex
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
Expand Down Expand Up @@ -152,7 +153,7 @@ async def execute(

# Mount files if provided
if files:
await self._mount_files_to_container(container, files)
await self._mount_files_to_container(container, files, request.language)

# Execute the code
start_time = datetime.utcnow()
Expand Down Expand Up @@ -185,11 +186,16 @@ async def execute(
request.timeout or settings.max_execution_time,
initial_state=initial_state,
capture_state=capture_state,
args=request.args,
)
else:
# Standard execution (no state persistence)
exit_code, stdout, stderr = await self._execute_code_in_container(
container, request.code, request.language, request.timeout
container,
request.code,
request.language,
request.timeout,
args=request.args,
)
end_time = datetime.utcnow()

Expand Down Expand Up @@ -435,12 +441,20 @@ async def _execute_code_in_container(
code: str,
language: str,
timeout: Optional[int] = None,
args: Optional[List[str]] = None,
) -> Tuple[int, str, str]:
"""Execute code in the container.

For REPL-enabled containers (Python with REPL mode), uses the fast
REPL executor which communicates with the pre-warmed Python interpreter.
For other containers, uses the standard execution path.

Args:
container: Docker container to execute in
code: Code to execute
language: Programming language
timeout: Execution timeout in seconds
args: Optional list of command line arguments
"""
language = language.lower()
lang_config = get_language(language)
Expand All @@ -454,7 +468,9 @@ async def _execute_code_in_container(
logger.debug(
"Using REPL executor", container_id=container.id[:12], language=language
)
return await self._execute_via_repl(container, code, execution_timeout)
return await self._execute_via_repl(
container, code, execution_timeout, args=args
)

# Standard execution path for non-REPL containers
exec_command = lang_config.execution_command
Expand All @@ -480,13 +496,20 @@ async def _execute_code_in_container(
# Direct memory-to-container transfer (no tempfiles)
dest_path = f"/mnt/data/{code_filename}"
if not await self.container_manager.copy_content_to_container(
container, code.encode("utf-8"), dest_path
container, code.encode("utf-8"), dest_path, language=language
):
return 1, "", "Failed to write code file to container"

# Build execution command with args if provided
final_command = exec_command
if args:
# Safely quote each argument to prevent shell injection
quoted_args = " ".join(shlex.quote(arg) for arg in args)
final_command = f"{exec_command} {quoted_args}"

return await self.container_manager.execute_command(
container,
exec_command,
final_command,
timeout=execution_timeout,
language=language,
working_dir="/mnt/data",
Expand Down Expand Up @@ -521,21 +544,26 @@ def _is_repl_container(self, container: Container, language: str) -> bool:
return False

async def _execute_via_repl(
self, container: Container, code: str, timeout: int
self,
container: Container,
code: str,
timeout: int,
args: Optional[List[str]] = None,
) -> Tuple[int, str, str]:
"""Execute code via REPL server in container.

Args:
container: Docker container with REPL server running
code: Python code to execute
timeout: Maximum execution time in seconds
args: Optional list of command line arguments

Returns:
Tuple of (exit_code, stdout, stderr)
"""
repl_executor = REPLExecutor(self.container_manager.client)
return await repl_executor.execute(
container, code, timeout=timeout, working_dir="/mnt/data"
container, code, timeout=timeout, working_dir="/mnt/data", args=args
)

async def _execute_via_repl_with_state(
Expand All @@ -545,6 +573,7 @@ async def _execute_via_repl_with_state(
timeout: int,
initial_state: Optional[str] = None,
capture_state: bool = True,
args: Optional[List[str]] = None,
) -> Tuple[int, str, str, Optional[str], List[str]]:
"""Execute code via REPL server with state persistence.

Expand All @@ -554,6 +583,7 @@ async def _execute_via_repl_with_state(
timeout: Maximum execution time in seconds
initial_state: Base64-encoded state to restore before execution
capture_state: Whether to capture state after execution
args: Optional list of command line arguments

Returns:
Tuple of (exit_code, stdout, stderr, new_state, state_errors)
Expand All @@ -566,10 +596,11 @@ async def _execute_via_repl_with_state(
working_dir="/mnt/data",
initial_state=initial_state,
capture_state=capture_state,
args=args,
)

async def _mount_files_to_container(
self, container: Container, files: List[Dict[str, Any]]
self, container: Container, files: List[Dict[str, Any]], language: str = "py"
) -> None:
"""Mount files to container workspace."""
try:
Expand Down Expand Up @@ -599,7 +630,7 @@ async def _mount_files_to_container(
dest_path = f"/mnt/data/{normalized_filename}"

if await self.container_manager.copy_content_to_container(
container, file_content, dest_path
container, file_content, dest_path, language=language
):
logger.info(
"Mounted file",
Expand Down
Loading