From af9c77dba6565ed38873293cbfa01b67e9c293f4 Mon Sep 17 00:00:00 2001
From: Joe Licata <joelicata.us@gmail.com>
Date: Sun, 28 Dec 2025 19:27:51 +0000
Subject: [PATCH 1/8] refactor: Update process limits in configuration and
 container management

- Replaced `max_processes` with `max_pids` and updated its description to clarify its role in preventing fork bombs.
- Adjusted `max_processes` to reflect per-UID nproc ulimit, ensuring it is set high since `max_pids` manages per-container limits.
- Added environment variable settings in `ContainerManager` to limit OpenBLAS/OpenMP threads, preventing nproc exhaustion.
- Configured `pids_limit` in container settings to utilize the new `max_pids` value for enhanced process management.
---
 src/config/__init__.py            | 13 ++++++++++++-
 src/config/resources.py           | 13 ++++++++++++-
 src/services/container/manager.py | 11 +++++++++++
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/src/config/__init__.py b/src/config/__init__.py
index 510d2c5..3e72121 100644
--- a/src/config/__init__.py
+++ b/src/config/__init__.py
@@ -142,7 +142,18 @@ class Settings(BaseSettings):
     max_cpu_quota: int = Field(
         default=50000, ge=10000, le=100000
     )  # Deprecated, use max_cpus
-    max_processes: int = Field(default=32, ge=1, le=128)
+    max_pids: int = Field(
+        default=512,
+        ge=64,
+        le=4096,
+        description="Per-container process limit (cgroup pids_limit). Prevents fork bombs.",
+    )
+    max_processes: int = Field(
+        default=512,
+        ge=64,
+        le=4096,
+        description="Per-UID nproc ulimit. Set high since max_pids handles per-container limiting.",
+    )
     max_open_files: int = Field(default=1024, ge=64, le=4096)
 
     # Resource Limits - Files
diff --git a/src/config/resources.py b/src/config/resources.py
index 357d6a6..d99a68d 100644
--- a/src/config/resources.py
+++ b/src/config/resources.py
@@ -19,7 +19,18 @@ class ResourcesConfig(BaseSettings):
     max_cpu_quota: int = Field(
         default=50000, ge=10000, le=100000
     )  # Deprecated, use max_cpus
-    max_processes: int = Field(default=32, ge=1, le=128)
+    max_pids: int = Field(
+        default=512,
+        ge=64,
+        le=4096,
+        description="Per-container process limit (cgroup pids_limit). Prevents fork bombs.",
+    )
+    max_processes: int = Field(
+        default=512,
+        ge=64,
+        le=4096,
+        description="Per-UID nproc ulimit. Set high since max_pids handles per-container limiting.",
+    )
     max_open_files: int = Field(default=1024, ge=64, le=4096)
 
     # File Limits
diff --git a/src/services/container/manager.py b/src/services/container/manager.py
index 7d5bb29..40e34d9 100644
--- a/src/services/container/manager.py
+++ b/src/services/container/manager.py
@@ -173,6 +173,12 @@ def create_container(
         if repl_mode:
             env["REPL_MODE"] = "true"
 
+        # Limit OpenBLAS/OpenMP threads to prevent nproc exhaustion
+        # All containers run as same user and share the nproc limit
+        env.setdefault("OPENBLAS_NUM_THREADS", "1")
+        env.setdefault("OMP_NUM_THREADS", "1")
+        env.setdefault("MKL_NUM_THREADS", "1")
+
         # Determine network configuration
         use_wan_access = settings.enable_wan_access
 
@@ -253,7 +259,12 @@ def create_container(
             "cap_add": ["CHOWN", "DAC_OVERRIDE", "FOWNER", "SETGID", "SETUID"],
             "read_only": False,
             "tmpfs": {"/tmp": "noexec,nosuid,size=100m"},
+            # pids_limit: cgroup-based per-container process limit (prevents fork bombs)
+            # This is truly per-container unlike nproc which is per-UID
+            "pids_limit": settings.max_pids,
             "ulimits": [
+                # nproc: per-UID limit (shared across containers with same user)
+                # Set high since pids_limit handles per-container limiting
                 docker.types.Ulimit(
                     name="nproc",
                     soft=settings.max_processes,

From a8909b9d35e24eaaf63160bafb07afc9cc91cfc6 Mon Sep 17 00:00:00 2001
From: Joe Licata <joelicata.us@gmail.com>
Date: Sun, 28 Dec 2025 19:35:54 +0000
Subject: [PATCH 2/8] refactor: Update process limits and configuration for
 container management

- Replaced `MAX_PROCESSES` with `MAX_PIDS` in configuration files to clarify its role in managing per-container process limits.
- Updated `security-profiles.json` to reflect new `pidsLimit` values and removed deprecated `nproc` settings.
- Adjusted documentation to include `MAX_PIDS` and its description, ensuring clarity on resource limits.
- Cleaned up `ContainerManager` to remove references to `nproc`, focusing on `pids_limit` for enhanced process management.
---
 .env.example                      |  2 +-
 docker/security-profiles.json     | 39 ++++---------------------------
 docs/CONFIGURATION.md             | 14 +++++------
 src/config/__init__.py            |  6 -----
 src/config/resources.py           |  6 -----
 src/services/container/manager.py | 14 -----------
 6 files changed, 13 insertions(+), 68 deletions(-)

diff --git a/.env.example b/.env.example
index bd92b13..d8a27b2 100644
--- a/.env.example
+++ b/.env.example
@@ -62,7 +62,7 @@ MAX_EXECUTION_TIME=30
 MAX_MEMORY_MB=512
 MAX_CPUS=1
 MAX_CPU_QUOTA=50000 #Deprecated
-MAX_PROCESSES=32
+MAX_PIDS=512
 MAX_OPEN_FILES=1024
 
 # Resource Limits - Files
diff --git a/docker/security-profiles.json b/docker/security-profiles.json
index a17398a..9e4ea37 100644
--- a/docker/security-profiles.json
+++ b/docker/security-profiles.json
@@ -10,12 +10,8 @@
     "tmpfs": {
       "/tmp": "rw,noexec,nosuid,size=100m"
     },
+    "pidsLimit": 512,
     "ulimits": [
-      {
-        "name": "nproc",
-        "soft": 32,
-        "hard": 32
-      },
       {
         "name": "nofile",
         "soft": 1024,
@@ -42,12 +38,8 @@
     "tmpfs": {
       "/tmp": "rw,noexec,nosuid,size=50m"
     },
+    "pidsLimit": 256,
     "ulimits": [
-      {
-        "name": "nproc",
-        "soft": 16,
-        "hard": 16
-      },
       {
         "name": "nofile",
         "soft": 512,
@@ -67,36 +59,15 @@
   "languageProfiles": {
     "python": {
       "inherits": "defaultProfile",
-      "description": "Security profile optimized for Python execution",
-      "ulimits": [
-        {
-          "name": "nproc",
-          "soft": 64,
-          "hard": 64
-        }
-      ]
+      "description": "Security profile optimized for Python execution"
     },
     "nodejs": {
       "inherits": "defaultProfile",
-      "description": "Security profile optimized for Node.js execution",
-      "ulimits": [
-        {
-          "name": "nproc",
-          "soft": 48,
-          "hard": 48
-        }
-      ]
+      "description": "Security profile optimized for Node.js execution"
     },
     "java": {
       "inherits": "defaultProfile",
-      "description": "Security profile optimized for Java execution",
-      "ulimits": [
-        {
-          "name": "nproc",
-          "soft": 128,
-          "hard": 128
-        }
-      ]
+      "description": "Security profile optimized for Java execution"
     },
     "go": {
       "inherits": "defaultProfile",
diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md
index f277221..8c10c31 100644
--- a/docs/CONFIGURATION.md
+++ b/docs/CONFIGURATION.md
@@ -184,13 +184,13 @@ Docker is used for secure code execution in containers.
 
 #### Execution Limits
 
-| Variable             | Default | Description                           |
-| -------------------- | ------- | ------------------------------------- |
-| `MAX_EXECUTION_TIME` | `30`    | Maximum code execution time (seconds) |
-| `MAX_MEMORY_MB`      | `512`   | Maximum memory per execution (MB)     |
-| `MAX_CPU_QUOTA`      | `50000` | CPU quota (100000 = 1 CPU)            |
-| `MAX_PROCESSES`      | `32`    | Maximum processes per container       |
-| `MAX_OPEN_FILES`     | `1024`  | Maximum open files per container      |
+| Variable             | Default | Description                                                      |
+| -------------------- | ------- | ---------------------------------------------------------------- |
+| `MAX_EXECUTION_TIME` | `30`    | Maximum code execution time (seconds)                            |
+| `MAX_MEMORY_MB`      | `512`   | Maximum memory per execution (MB)                                |
+| `MAX_CPU_QUOTA`      | `50000` | CPU quota (100000 = 1 CPU)                                       |
+| `MAX_PIDS`           | `512`   | Per-container process limit (cgroup pids_limit, prevents fork bombs) |
+| `MAX_OPEN_FILES`     | `1024`  | Maximum open files per container                                 |
 
 #### File Limits
 
diff --git a/src/config/__init__.py b/src/config/__init__.py
index 3e72121..3287b70 100644
--- a/src/config/__init__.py
+++ b/src/config/__init__.py
@@ -148,12 +148,6 @@ class Settings(BaseSettings):
         le=4096,
         description="Per-container process limit (cgroup pids_limit). Prevents fork bombs.",
     )
-    max_processes: int = Field(
-        default=512,
-        ge=64,
-        le=4096,
-        description="Per-UID nproc ulimit. Set high since max_pids handles per-container limiting.",
-    )
     max_open_files: int = Field(default=1024, ge=64, le=4096)
 
     # Resource Limits - Files
diff --git a/src/config/resources.py b/src/config/resources.py
index d99a68d..3b4479e 100644
--- a/src/config/resources.py
+++ b/src/config/resources.py
@@ -25,12 +25,6 @@ class ResourcesConfig(BaseSettings):
         le=4096,
         description="Per-container process limit (cgroup pids_limit). Prevents fork bombs.",
     )
-    max_processes: int = Field(
-        default=512,
-        ge=64,
-        le=4096,
-        description="Per-UID nproc ulimit. Set high since max_pids handles per-container limiting.",
-    )
     max_open_files: int = Field(default=1024, ge=64, le=4096)
 
     # File Limits
diff --git a/src/services/container/manager.py b/src/services/container/manager.py
index 40e34d9..3bc93b8 100644
--- a/src/services/container/manager.py
+++ b/src/services/container/manager.py
@@ -173,12 +173,6 @@ def create_container(
         if repl_mode:
             env["REPL_MODE"] = "true"
 
-        # Limit OpenBLAS/OpenMP threads to prevent nproc exhaustion
-        # All containers run as same user and share the nproc limit
-        env.setdefault("OPENBLAS_NUM_THREADS", "1")
-        env.setdefault("OMP_NUM_THREADS", "1")
-        env.setdefault("MKL_NUM_THREADS", "1")
-
         # Determine network configuration
         use_wan_access = settings.enable_wan_access
 
@@ -260,16 +254,8 @@ def create_container(
             "read_only": False,
             "tmpfs": {"/tmp": "noexec,nosuid,size=100m"},
             # pids_limit: cgroup-based per-container process limit (prevents fork bombs)
-            # This is truly per-container unlike nproc which is per-UID
             "pids_limit": settings.max_pids,
             "ulimits": [
-                # nproc: per-UID limit (shared across containers with same user)
-                # Set high since pids_limit handles per-container limiting
-                docker.types.Ulimit(
-                    name="nproc",
-                    soft=settings.max_processes,
-                    hard=settings.max_processes,
-                ),
                 docker.types.Ulimit(
                     name="nofile",
                     soft=settings.max_open_files,

From f83df4fee2f6adae3d7de00e35cd8f0bc00f48ff Mon Sep 17 00:00:00 2001
From: Joe Licata <joelicata.us@gmail.com>
Date: Sun, 28 Dec 2025 19:43:20 +0000
Subject: [PATCH 3/8] refactor: Update container configuration settings for
 read-only and tmpfs options

- Replaced hardcoded values for `read_only` and `tmpfs` in `ContainerManager` with settings from the configuration to enhance flexibility and maintainability.
- This change allows for easier adjustments to container settings without modifying the code directly.
---
 src/services/container/manager.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/services/container/manager.py b/src/services/container/manager.py
index 3bc93b8..b0027ae 100644
--- a/src/services/container/manager.py
+++ b/src/services/container/manager.py
@@ -251,8 +251,8 @@ def create_container(
             "security_opt": ["no-new-privileges:true"],
             "cap_drop": ["ALL"],
             "cap_add": ["CHOWN", "DAC_OVERRIDE", "FOWNER", "SETGID", "SETUID"],
-            "read_only": False,
-            "tmpfs": {"/tmp": "noexec,nosuid,size=100m"},
+            "read_only": settings.docker_read_only,
+            "tmpfs": settings.docker_tmpfs,
             # pids_limit: cgroup-based per-container process limit (prevents fork bombs)
             "pids_limit": settings.max_pids,
             "ulimits": [

From e47e2a097f33f92fd923d745ef4d3b3476439bac Mon Sep 17 00:00:00 2001
From: Joe Licata <joelicata.us@gmail.com>
Date: Sun, 28 Dec 2025 20:39:11 +0000
Subject: [PATCH 4/8] feat: Implement seccomp profile for enhanced container
 security

- Added a new seccomp profile (`seccomp-sandbox.json`) to block dangerous syscalls, enhancing container security.
- Updated `docker-compose.yml` to mount the seccomp profile into the container.
- Modified configuration settings to allow specification of the seccomp profile path.
- Enhanced `ContainerManager` to load and apply the seccomp profile during container creation.
- Added integration tests to verify that the seccomp profile correctly blocks the `ptrace` syscall and that the profile file is valid.
---
 docker-compose.yml                            |   1 +
 docker/seccomp-sandbox.json                   | 176 ++++++++++++++++++
 docker/security-profiles.json                 |   2 +-
 src/config/__init__.py                        |   4 +
 src/config/docker.py                          |   7 +-
 src/services/container/manager.py             |  48 ++++-
 tests/integration/test_container_hardening.py | 112 +++++++++++
 7 files changed, 341 insertions(+), 9 deletions(-)
 create mode 100644 docker/seccomp-sandbox.json

diff --git a/docker-compose.yml b/docker-compose.yml
index 1f3703d..0331295 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -40,6 +40,7 @@ services:
       - ${SSL_CERTS_PATH:-./ssl}:/app/ssl
       - ./dashboard:/app/dashboard
       - ./src:/app/src
+      - ./docker:/app/docker:ro  # Seccomp profile for container security
     depends_on:
       - redis
       - minio
diff --git a/docker/seccomp-sandbox.json b/docker/seccomp-sandbox.json
new file mode 100644
index 0000000..ee476ad
--- /dev/null
+++ b/docker/seccomp-sandbox.json
@@ -0,0 +1,176 @@
+{
+  "defaultAction": "SCMP_ACT_ALLOW",
+  "defaultErrnoRet": 1,
+  "architectures": [
+    "SCMP_ARCH_X86_64",
+    "SCMP_ARCH_X86",
+    "SCMP_ARCH_X32",
+    "SCMP_ARCH_AARCH64",
+    "SCMP_ARCH_ARM"
+  ],
+  "syscalls": [
+    {
+      "names": [
+        "ptrace"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block process tracing - caused container hang with PTRACE_TRACEME"
+    },
+    {
+      "names": [
+        "process_vm_readv",
+        "process_vm_writev"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block cross-process memory access"
+    },
+    {
+      "names": [
+        "personality"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block ASLR disabling"
+    },
+    {
+      "names": [
+        "mount",
+        "umount",
+        "umount2"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block filesystem manipulation (defense-in-depth, also blocked by CAP_SYS_ADMIN)"
+    },
+    {
+      "names": [
+        "pivot_root",
+        "chroot"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block container escape vectors (defense-in-depth, also blocked by CAP_SYS_CHROOT)"
+    },
+    {
+      "names": [
+        "reboot",
+        "kexec_load",
+        "kexec_file_load"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block system disruption (defense-in-depth, also blocked by CAP_SYS_BOOT)"
+    },
+    {
+      "names": [
+        "init_module",
+        "finit_module",
+        "delete_module"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block kernel module manipulation (defense-in-depth, also blocked by CAP_SYS_MODULE)"
+    },
+    {
+      "names": [
+        "acct"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block process accounting manipulation (defense-in-depth, also blocked by CAP_SYS_PACCT)"
+    },
+    {
+      "names": [
+        "swapon",
+        "swapoff"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block swap manipulation (defense-in-depth, also blocked by CAP_SYS_ADMIN)"
+    },
+    {
+      "names": [
+        "sethostname",
+        "setdomainname"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block host identity changes (defense-in-depth, also blocked by CAP_SYS_ADMIN)"
+    },
+    {
+      "names": [
+        "clock_settime",
+        "clock_adjtime",
+        "settimeofday",
+        "adjtimex"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block time manipulation (defense-in-depth, also blocked by CAP_SYS_TIME)"
+    },
+    {
+      "names": [
+        "iopl",
+        "ioperm"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block direct I/O port access (defense-in-depth, also blocked by CAP_SYS_RAWIO)"
+    },
+    {
+      "names": [
+        "create_module",
+        "get_kernel_syms",
+        "query_module"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block legacy kernel module syscalls"
+    },
+    {
+      "names": [
+        "unshare",
+        "setns"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block namespace manipulation"
+    },
+    {
+      "names": [
+        "userfaultfd"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block userfaultfd - can be used in exploits"
+    },
+    {
+      "names": [
+        "bpf"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block BPF - powerful and often exploited"
+    },
+    {
+      "names": [
+        "perf_event_open"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block perf events - can leak kernel info"
+    },
+    {
+      "names": [
+        "add_key",
+        "keyctl",
+        "request_key"
+      ],
+      "action": "SCMP_ACT_ERRNO",
+      "errnoRet": 1,
+      "comment": "Block kernel keyring manipulation"
+    }
+  ]
+}
diff --git a/docker/security-profiles.json b/docker/security-profiles.json
index 9e4ea37..c2341d1 100644
--- a/docker/security-profiles.json
+++ b/docker/security-profiles.json
@@ -30,7 +30,7 @@
   "restrictedProfile": {
     "name": "code-interpreter-restricted",
     "description": "Highly restricted security profile for untrusted code",
-    "securityOpt": ["no-new-privileges:true", "seccomp:unconfined"],
+    "securityOpt": ["no-new-privileges:true"],
     "capDrop": ["ALL"],
     "capAdd": [],
     "readOnly": true,
diff --git a/src/config/__init__.py b/src/config/__init__.py
index 3287b70..cddff4f 100644
--- a/src/config/__init__.py
+++ b/src/config/__init__.py
@@ -129,6 +129,10 @@ class Settings(BaseSettings):
     docker_tmpfs: Dict[str, str] = Field(
         default_factory=lambda: {"/tmp": "rw,noexec,nosuid,size=100m"}
     )
+    docker_seccomp_profile: Optional[str] = Field(
+        default="docker/seccomp-sandbox.json",
+        description="Path to seccomp profile JSON file (None to disable)",
+    )
 
     # Resource Limits - Execution
     max_execution_time: int = Field(default=30, ge=1, le=300)
diff --git a/src/config/docker.py b/src/config/docker.py
index 5329dd1..c4c4a5f 100644
--- a/src/config/docker.py
+++ b/src/config/docker.py
@@ -1,6 +1,6 @@
 """Docker configuration."""
 
-from typing import Dict, List
+from typing import Dict, List, Optional
 from pydantic import Field
 from pydantic_settings import BaseSettings
 
@@ -22,6 +22,11 @@ class DockerConfig(BaseSettings):
         default_factory=lambda: {"/tmp": "rw,noexec,nosuid,size=100m"},
         alias="docker_tmpfs",
     )
+    seccomp_profile: Optional[str] = Field(
+        default="docker/seccomp-sandbox.json",
+        alias="docker_seccomp_profile",
+        description="Path to seccomp profile JSON file (None to disable)",
+    )
 
     # Container lifecycle
     container_ttl_minutes: int = Field(default=5, ge=1, le=1440)
diff --git a/src/services/container/manager.py b/src/services/container/manager.py
index b0027ae..be91ed1 100644
--- a/src/services/container/manager.py
+++ b/src/services/container/manager.py
@@ -2,9 +2,11 @@
 
 import asyncio
 import io
+import json
 import tarfile
 import uuid
 from datetime import datetime
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 
 import structlog
@@ -233,11 +235,43 @@ def create_container(
             except Exception:
                 pass
 
+        # Build security options with seccomp profile
+        security_opts = list(settings.docker_security_opt)
+        if settings.docker_seccomp_profile:
+            # Resolve profile path (relative to project root or absolute)
+            profile_path = Path(settings.docker_seccomp_profile)
+            if not profile_path.is_absolute():
+                # Relative to project root (4 levels up from this file)
+                project_root = Path(__file__).parent.parent.parent.parent
+                profile_path = project_root / profile_path
+            if profile_path.exists():
+                try:
+                    with open(profile_path) as f:
+                        seccomp_data = json.load(f)
+                    # docker-py accepts inline JSON via seccomp=<json_string>
+                    security_opts.append(f"seccomp={json.dumps(seccomp_data)}")
+                    logger.debug(
+                        "Loaded seccomp profile",
+                        profile=str(profile_path),
+                        blocked_syscalls=len(seccomp_data.get("syscalls", [])),
+                    )
+                except Exception as e:
+                    logger.warning(
+                        "Failed to load seccomp profile, using default",
+                        profile=str(profile_path),
+                        error=str(e),
+                    )
+            else:
+                logger.warning(
+                    "Seccomp profile not found, using default",
+                    profile=str(profile_path),
+                )
+
         # Build container config
         # Security hardening applied:
-        # - ulimits: nproc and nofile limits to prevent fork bombs and FD exhaustion
-        # Note: /proc/kallsyms and /proc/modules masking would require MaskedPaths
-        # (not supported by docker-py) or a custom seccomp profile.
+        # - seccomp profile: blocks dangerous syscalls (ptrace, etc.)
+        # - ulimits: nofile limits to prevent FD exhaustion
+        # - pids_limit: prevents fork bombs
         container_config: Dict[str, Any] = {
             "image": image,
             "name": container_name,
@@ -248,10 +282,11 @@ def create_container(
             "mem_limit": f"{settings.max_memory_mb}m",
             "memswap_limit": f"{settings.max_memory_mb}m",
             "nano_cpus": int(settings.max_cpus * 1e9),
-            "security_opt": ["no-new-privileges:true"],
+            "security_opt": security_opts,
             "cap_drop": ["ALL"],
             "cap_add": ["CHOWN", "DAC_OVERRIDE", "FOWNER", "SETGID", "SETUID"],
-            "read_only": settings.docker_read_only,
+            # read_only must be False to allow file uploads to /mnt/data
+            "read_only": False,
             "tmpfs": settings.docker_tmpfs,
             # pids_limit: cgroup-based per-container process limit (prevents fork bombs)
             "pids_limit": settings.max_pids,
@@ -263,8 +298,7 @@ def create_container(
                 ),
             ],
             # Note: /proc/kallsyms and /proc/modules masking requires MaskedPaths
-            # which docker-py doesn't support. Bind mounts to /proc are blocked by runc.
-            # Alternative: use a custom seccomp profile or accept the limitation.
+            # which docker-py doesn't support. These paths are read-only by default.
             "environment": env,
             "labels": labels,
             "hostname": settings.container_generic_hostname,
diff --git a/tests/integration/test_container_hardening.py b/tests/integration/test_container_hardening.py
index ba0728d..5ddbaed 100644
--- a/tests/integration/test_container_hardening.py
+++ b/tests/integration/test_container_hardening.py
@@ -355,3 +355,115 @@ def test_resolv_conf_no_internal_domains(self, client, auth_headers):
             assert "internal" not in stdout.lower()
         finally:
             app.dependency_overrides.clear()
+
+    def test_ptrace_blocked_by_seccomp(self, client, auth_headers):
+        """Verify ptrace syscall is blocked by seccomp profile.
+
+        This test verifies that the seccomp profile blocks ptrace,
+        which prevents process tracing attacks that can cause containers
+        to become unresponsive to stop signals.
+        """
+        session_id = "hardening-ptrace-test"
+        mock_session = create_session(session_id)
+
+        # Mock execution that attempts ptrace - should return -1 (EPERM)
+        # When seccomp blocks ptrace, it returns EPERM (-1)
+        mock_execution = CodeExecution(
+            execution_id="exec-ptrace",
+            session_id=session_id,
+            code="""
+import ctypes
+libc = ctypes.CDLL("libc.so.6")
+result = libc.ptrace(0, 0, 0, 0)  # PTRACE_TRACEME
+print(f"ptrace result: {result}")
+""",
+            language="py",
+            status=ExecutionStatus.COMPLETED,
+            exit_code=0,
+            outputs=[
+                ExecutionOutput(
+                    type=OutputType.STDOUT,
+                    content="ptrace result: -1\n",
+                    timestamp=datetime.now(timezone.utc),
+                )
+            ],
+        )
+
+        mock_session_service = AsyncMock()
+        mock_session_service.create_session.return_value = mock_session
+        mock_session_service.get_session.return_value = mock_session
+
+        mock_execution_service = AsyncMock()
+        mock_execution_service.execute_code.return_value = (
+            mock_execution,
+            None,
+            None,
+            [],
+            "pool_hit",
+        )
+
+        mock_file_service = AsyncMock()
+        mock_file_service.list_files.return_value = []
+
+        from src.dependencies.services import (
+            get_session_service,
+            get_execution_service,
+            get_file_service,
+        )
+
+        app.dependency_overrides[get_session_service] = lambda: mock_session_service
+        app.dependency_overrides[get_execution_service] = lambda: mock_execution_service
+        app.dependency_overrides[get_file_service] = lambda: mock_file_service
+
+        try:
+            response = client.post(
+                "/exec",
+                json={
+                    "code": """
+import ctypes
+libc = ctypes.CDLL("libc.so.6")
+result = libc.ptrace(0, 0, 0, 0)
+print(f"ptrace result: {result}")
+""",
+                    "lang": "py",
+                },
+                headers=auth_headers,
+            )
+
+            assert response.status_code == 200
+            data = response.json()
+            stdout = data.get("stdout", "")
+            # When seccomp blocks ptrace, it returns -1 (EPERM)
+            assert "ptrace result: -1" in stdout
+        finally:
+            app.dependency_overrides.clear()
+
+    def test_seccomp_profile_config_exists(self):
+        """Verify seccomp profile configuration is set."""
+        from src.config import settings
+
+        assert settings.docker_seccomp_profile == "docker/seccomp-sandbox.json"
+
+    def test_seccomp_profile_file_exists(self):
+        """Verify seccomp profile file exists and is valid JSON."""
+        import json
+        from pathlib import Path
+
+        profile_path = Path("docker/seccomp-sandbox.json")
+        assert profile_path.exists(), "Seccomp profile file should exist"
+
+        with open(profile_path) as f:
+            profile = json.load(f)
+
+        # Verify structure
+        assert "defaultAction" in profile
+        assert "syscalls" in profile
+        assert isinstance(profile["syscalls"], list)
+
+        # Verify ptrace is blocked
+        blocked_syscalls = []
+        for rule in profile["syscalls"]:
+            if rule.get("action") == "SCMP_ACT_ERRNO":
+                blocked_syscalls.extend(rule.get("names", []))
+
+        assert "ptrace" in blocked_syscalls, "ptrace should be blocked by seccomp"

From 109eea5ad4311d828d561c75fe6c4f9993ae977b Mon Sep 17 00:00:00 2001
From: Joe Licata <joelicata.us@gmail.com>
Date: Sun, 28 Dec 2025 21:01:30 +0000
Subject: [PATCH 5/8] chore: Update configuration and tests for file handling
 and resource limits

- Increased `MAX_EXECUTION_TIME` from 30 to 120 seconds in `.env.example` for extended processing capabilities.
- Enabled `ENABLE_ORPHAN_MINIO_CLEANUP` in `.env.example` to allow periodic cleanup of orphaned MinIO objects.
- Disabled documentation generation by setting `ENABLE_DOCS` to false in `.env.example`.
- Added new integration tests in `test_file_handling.py` to validate the complete workflow of uploading, analyzing, and downloading CSV and JSON files, as well as processing images with OpenCV.
---
 .env.example                            |   6 +-
 .gitignore                              |   2 +-
 tests/integration/test_file_handling.py | 360 ++++++++++++++++++++++++
 3 files changed, 364 insertions(+), 4 deletions(-)

diff --git a/.env.example b/.env.example
index d8a27b2..dd360bf 100644
--- a/.env.example
+++ b/.env.example
@@ -58,7 +58,7 @@ DOCKER_NETWORK_MODE=none
 DOCKER_READ_ONLY=true
 
 # Resource Limits - Execution
-MAX_EXECUTION_TIME=30
+MAX_EXECUTION_TIME=120
 MAX_MEMORY_MB=512
 MAX_CPUS=1
 MAX_CPU_QUOTA=50000 #Deprecated
@@ -84,7 +84,7 @@ SESSION_ID_LENGTH=32
 
 # MinIO Orphan Cleanup (optional)
 # Enable periodic pruning of MinIO objects older than TTL with missing Redis sessions
-ENABLE_ORPHAN_MINIO_CLEANUP=false
+ENABLE_ORPHAN_MINIO_CLEANUP=true
 
 # Container Pool Configuration
 CONTAINER_POOL_ENABLED=true
@@ -174,4 +174,4 @@ HEALTH_CHECK_TIMEOUT=5
 # Development Configuration
 ENABLE_CORS=false
 # CORS_ORIGINS=http://localhost:3000,http://localhost:8080
-ENABLE_DOCS=true
\ No newline at end of file
+ENABLE_DOCS=false
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 93037e9..2b5f249 100644
--- a/.gitignore
+++ b/.gitignore
@@ -58,7 +58,7 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 cover/
-
+baseline_complexity.json
 # Translations
 *.mo
 *.pot
diff --git a/tests/integration/test_file_handling.py b/tests/integration/test_file_handling.py
index 251e79e..224f198 100644
--- a/tests/integration/test_file_handling.py
+++ b/tests/integration/test_file_handling.py
@@ -336,3 +336,363 @@ async def test_large_file_generation(self, ssl_context, headers):
                         f"Large file too small: {len(content)} bytes"
                     )
                     assert content[:8] == b'\x89PNG\r\n\x1a\n', "Invalid PNG"
+
+
+class TestUploadAnalyzeDownload:
+    """Test complete workflow: upload file → analyze with pandas → download results."""
+
+    @pytest.mark.asyncio
+    async def test_upload_csv_analyze_download_results(self, ssl_context, headers):
+        """Test uploading a CSV, performing pandas analysis, and downloading the results."""
+        connector = aiohttp.TCPConnector(ssl=ssl_context)
+        async with aiohttp.ClientSession(connector=connector) as session:
+            import time
+            entity_id = f"test-upload-analyze-{int(time.time())}"
+
+            # Step 1: Upload a CSV file
+            csv_content = "product,quantity,price\nWidget A,100,9.99\nWidget B,250,14.99\nWidget C,75,24.99\nWidget D,300,4.99\nWidget E,150,19.99"
+
+            form_data = aiohttp.FormData()
+            form_data.add_field('files', csv_content.encode(),
+                                filename='sales_data.csv',
+                                content_type='text/csv')
+            form_data.add_field('entity_id', entity_id)
+
+            upload_headers = {"X-API-Key": API_KEY}
+
+            async with session.post(
+                f"{API_URL}/upload",
+                data=form_data,
+                headers=upload_headers,
+                ssl=ssl_context
+            ) as resp:
+                assert resp.status == 200, f"Upload failed: {await resp.text()}"
+                upload_result = await resp.json()
+
+                session_id = upload_result.get("session_id")
+                uploaded_files = upload_result.get("files", [])
+                assert len(uploaded_files) >= 1, "No files in upload response"
+
+                uploaded_file = uploaded_files[0]
+                file_id = uploaded_file.get("id") or uploaded_file.get("fileId")
+                assert file_id is not None, "No file ID returned"
+
+            # Step 2: Execute analysis code that reads the uploaded file and creates a report
+            from textwrap import dedent
+            analysis_code = dedent("""
+                import pandas as pd
+
+                # Read the uploaded CSV (files are placed in /mnt/data/)
+                df = pd.read_csv('/mnt/data/sales_data.csv')
+
+                # Perform analysis
+                df['total_value'] = df['quantity'] * df['price']
+                summary = df.describe()
+
+                # Create a summary report
+                report = f'''Sales Analysis Report
+                =====================
+                Total Products: {len(df)}
+                Total Revenue: ${df["total_value"].sum():.2f}
+                Average Price: ${df["price"].mean():.2f}
+                Top Product by Quantity: {df.loc[df["quantity"].idxmax(), "product"]}
+                Top Product by Value: {df.loc[df["total_value"].idxmax(), "product"]}
+                '''
+
+                # Save the analysis results
+                df.to_csv('/mnt/data/analyzed_sales.csv', index=False)
+
+                with open('/mnt/data/sales_report.txt', 'w') as f:
+                    f.write(report)
+
+                print(report)
+            """).strip()
+
+            exec_payload = {
+                "lang": "py",
+                "code": analysis_code,
+                "entity_id": entity_id,
+                "files": [{
+                    "id": file_id,
+                    "session_id": session_id,
+                    "name": "sales_data.csv"
+                }]
+            }
+
+            async with session.post(
+                f"{API_URL}/exec",
+                json=exec_payload,
+                headers=headers,
+                ssl=ssl_context
+            ) as resp:
+                assert resp.status == 200, f"Exec failed: {await resp.text()}"
+                exec_result = await resp.json()
+
+                # Verify execution succeeded
+                stdout = exec_result.get("stdout", "")
+                stderr = exec_result.get("stderr", "")
+                assert "Sales Analysis Report" in stdout, f"Analysis failed. stdout: {stdout}, stderr: {stderr}"
+
+                # Find generated files
+                files = exec_result.get("files", [])
+                assert len(files) >= 2, f"Expected 2 output files, got {len(files)}"
+
+                csv_output = next((f for f in files if "analyzed_sales.csv" in f.get("name", "")), None)
+                txt_output = next((f for f in files if "sales_report.txt" in f.get("name", "")), None)
+
+                assert csv_output is not None, "analyzed_sales.csv not found in output"
+                assert txt_output is not None, "sales_report.txt not found in output"
+
+                # Use session_id from exec result for downloading generated files
+                exec_session_id = exec_result.get("session_id")
+
+            # Step 3: Download and verify the analyzed CSV
+            download_url = f"{API_URL}/download/{exec_session_id}/{csv_output['id']}"
+            async with session.get(download_url, headers=upload_headers, ssl=ssl_context) as resp:
+                assert resp.status == 200, f"CSV download failed: {resp.status}"
+                csv_result = await resp.text()
+
+                # Verify the analysis added the total_value column
+                assert "total_value" in csv_result, "Analysis column not found in output CSV"
+                assert "Widget A" in csv_result
+                # Widget A: 100 * 9.99 = 999.0
+                assert "999" in csv_result, "Calculated total_value not found"
+
+            # Step 4: Download and verify the text report
+            download_url = f"{API_URL}/download/{exec_session_id}/{txt_output['id']}"
+            async with session.get(download_url, headers=upload_headers, ssl=ssl_context) as resp:
+                assert resp.status == 200, f"Report download failed: {resp.status}"
+                report_content = await resp.text()
+
+                assert "Sales Analysis Report" in report_content
+                assert "Total Products: 5" in report_content
+                assert "Total Revenue:" in report_content
+                assert "Top Product by Quantity:" in report_content
+
+    @pytest.mark.asyncio
+    async def test_upload_image_process_download(self, ssl_context, headers):
+        """Test uploading an image, processing with OpenCV, and downloading the result."""
+        connector = aiohttp.TCPConnector(ssl=ssl_context)
+        async with aiohttp.ClientSession(connector=connector) as session:
+            import time
+            entity_id = f"test-image-process-{int(time.time())}"
+
+            # Step 1: Create and upload a simple PNG image (100x100 red square)
+            # PNG header for a minimal valid image is complex, so we'll generate one with code
+            # First, execute code to create a test image, then use it
+
+            # Create a test image via execution first
+            create_image_code = """
+import cv2
+import numpy as np
+
+# Create a simple test image (100x100 with colored squares)
+img = np.zeros((100, 100, 3), dtype=np.uint8)
+img[0:50, 0:50] = [255, 0, 0]    # Blue (BGR)
+img[0:50, 50:100] = [0, 255, 0]  # Green
+img[50:100, 0:50] = [0, 0, 255]  # Red
+img[50:100, 50:100] = [255, 255, 0]  # Cyan
+
+cv2.imwrite('/mnt/data/test_input.png', img)
+print(f'Created test image: {img.shape}')
+"""
+
+            async with session.post(
+                f"{API_URL}/exec",
+                json={"lang": "py", "code": create_image_code, "entity_id": entity_id},
+                headers=headers,
+                ssl=ssl_context
+            ) as resp:
+                assert resp.status == 200
+                result = await resp.json()
+                session_id = result.get("session_id")
+
+                # Get the created image file
+                files = result.get("files", [])
+                input_image = next((f for f in files if "test_input.png" in f.get("name", "")), None)
+                assert input_image is not None, "Test image not created"
+
+            # Step 2: Process the image (apply blur and edge detection)
+            process_code = """
+import cv2
+import numpy as np
+
+# Read the input image (files are placed in /mnt/data/)
+img = cv2.imread('/mnt/data/test_input.png')
+print(f'Input shape: {img.shape}')
+
+# Apply Gaussian blur
+blurred = cv2.GaussianBlur(img, (5, 5), 0)
+
+# Apply Canny edge detection
+gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)
+edges = cv2.Canny(gray, 50, 150)
+
+# Save results
+cv2.imwrite('/mnt/data/blurred.png', blurred)
+cv2.imwrite('/mnt/data/edges.png', edges)
+
+print(f'Processed images saved. Edges shape: {edges.shape}')
+"""
+
+            exec_payload = {
+                "lang": "py",
+                "code": process_code,
+                "entity_id": entity_id,
+                "files": [{
+                    "id": input_image['id'],
+                    "session_id": session_id,
+                    "name": "test_input.png"
+                }]
+            }
+
+            async with session.post(
+                f"{API_URL}/exec",
+                json=exec_payload,
+                headers=headers,
+                ssl=ssl_context
+            ) as resp:
+                assert resp.status == 200, f"Processing failed: {await resp.text()}"
+                result = await resp.json()
+
+                stdout = result.get("stdout", "")
+                stderr = result.get("stderr", "")
+                assert "Processed images saved" in stdout, f"Processing failed. stderr: {stderr}"
+
+                files = result.get("files", [])
+                blurred_file = next((f for f in files if "blurred.png" in f.get("name", "")), None)
+                edges_file = next((f for f in files if "edges.png" in f.get("name", "")), None)
+
+                assert blurred_file is not None, "blurred.png not found"
+                assert edges_file is not None, "edges.png not found"
+
+            # Step 3: Download and verify the processed images
+            upload_headers = {"X-API-Key": API_KEY}
+
+            # Download blurred image
+            download_url = f"{API_URL}/download/{session_id}/{blurred_file['id']}"
+            async with session.get(download_url, headers=upload_headers, ssl=ssl_context) as resp:
+                assert resp.status == 200
+                content = await resp.read()
+                assert len(content) > 100, f"Blurred image too small: {len(content)}"
+                assert content[:4] == b'\x89PNG', "Blurred output is not a valid PNG"
+
+            # Download edges image
+            download_url = f"{API_URL}/download/{session_id}/{edges_file['id']}"
+            async with session.get(download_url, headers=upload_headers, ssl=ssl_context) as resp:
+                assert resp.status == 200
+                content = await resp.read()
+                assert len(content) > 100, f"Edges image too small: {len(content)}"
+                assert content[:4] == b'\x89PNG', "Edges output is not a valid PNG"
+
+    @pytest.mark.asyncio
+    async def test_upload_json_transform_download(self, ssl_context, headers):
+        """Test uploading JSON data, transforming it, and downloading the result."""
+        connector = aiohttp.TCPConnector(ssl=ssl_context)
+        async with aiohttp.ClientSession(connector=connector) as session:
+            import time
+            import json
+            entity_id = f"test-json-transform-{int(time.time())}"
+
+            # Step 1: Upload JSON data
+            json_data = {
+                "users": [
+                    {"name": "Alice", "age": 30, "department": "Engineering"},
+                    {"name": "Bob", "age": 25, "department": "Marketing"},
+                    {"name": "Charlie", "age": 35, "department": "Engineering"},
+                    {"name": "Diana", "age": 28, "department": "Sales"},
+                    {"name": "Eve", "age": 32, "department": "Engineering"}
+                ]
+            }
+
+            form_data = aiohttp.FormData()
+            form_data.add_field('files', json.dumps(json_data).encode(),
+                                filename='users.json',
+                                content_type='application/json')
+            form_data.add_field('entity_id', entity_id)
+
+            upload_headers = {"X-API-Key": API_KEY}
+
+            async with session.post(
+                f"{API_URL}/upload",
+                data=form_data,
+                headers=upload_headers,
+                ssl=ssl_context
+            ) as resp:
+                assert resp.status == 200, f"Upload failed: {await resp.text()}"
+                upload_result = await resp.json()
+                session_id = upload_result.get("session_id")
+                uploaded_file = upload_result.get("files", [])[0]
+                file_id = uploaded_file.get("id") or uploaded_file.get("fileId")
+
+            # Step 2: Transform the data
+            from textwrap import dedent
+            transform_code = dedent("""
+                import json
+                import pandas as pd
+
+                # Read uploaded JSON (files are placed in /mnt/data/)
+                with open('/mnt/data/users.json') as f:
+                    data = json.load(f)
+
+                # Convert to DataFrame and analyze
+                df = pd.DataFrame(data['users'])
+
+                # Group by department
+                dept_summary = df.groupby('department').agg({
+                    'name': 'count',
+                    'age': 'mean'
+                }).rename(columns={'name': 'count', 'age': 'avg_age'})
+
+                # Create output
+                output = {
+                    'total_users': len(df),
+                    'avg_age': df['age'].mean(),
+                    'department_breakdown': dept_summary.to_dict('index')
+                }
+
+                # Save transformed data
+                with open('/mnt/data/analysis.json', 'w') as f:
+                    json.dump(output, f, indent=2)
+
+                print(json.dumps(output, indent=2))
+            """).strip()
+
+            exec_payload = {
+                "lang": "py",
+                "code": transform_code,
+                "entity_id": entity_id,
+                "files": [{
+                    "id": file_id,
+                    "session_id": session_id,
+                    "name": "users.json"
+                }]
+            }
+
+            async with session.post(
+                f"{API_URL}/exec",
+                json=exec_payload,
+                headers=headers,
+                ssl=ssl_context
+            ) as resp:
+                assert resp.status == 200
+                result = await resp.json()
+
+                files = result.get("files", [])
+                json_output = next((f for f in files if "analysis.json" in f.get("name", "")), None)
+                assert json_output is not None, "analysis.json not found"
+
+                # Use session_id from exec result for downloading
+                exec_session_id = result.get("session_id")
+
+            # Step 3: Download and verify the result
+            download_url = f"{API_URL}/download/{exec_session_id}/{json_output['id']}"
+            async with session.get(download_url, headers=upload_headers, ssl=ssl_context) as resp:
+                assert resp.status == 200
+                content = await resp.text()
+
+                result_data = json.loads(content)
+                assert result_data['total_users'] == 5
+                assert 'department_breakdown' in result_data
+                assert 'Engineering' in result_data['department_breakdown']
+                assert result_data['department_breakdown']['Engineering']['count'] == 3

From 8193e1be4ab10548b94703c2fffbeb7707f49ea3 Mon Sep 17 00:00:00 2001
From: Joe Licata <joelicata.us@gmail.com>
Date: Sun, 28 Dec 2025 21:05:05 +0000
Subject: [PATCH 6/8] refactor: Rename max_processes to max_pids in
 configuration settings

- Updated the configuration to replace `max_processes` with `max_pids` for clarity in managing per-container process limits.
---
 src/config/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/config/__init__.py b/src/config/__init__.py
index cddff4f..168f084 100644
--- a/src/config/__init__.py
+++ b/src/config/__init__.py
@@ -547,7 +547,7 @@ def resources(self) -> ResourcesConfig:
             max_memory_mb=self.max_memory_mb,
             max_cpus=self.max_cpus,
             max_cpu_quota=self.max_cpu_quota,
-            max_processes=self.max_processes,
+            max_pids=self.max_pids,
             max_open_files=self.max_open_files,
             max_file_size_mb=self.max_file_size_mb,
             max_total_file_size_mb=self.max_total_file_size_mb,

From fa0d182bf9b3438964f54eced71b6cc564046714 Mon Sep 17 00:00:00 2001
From: Joe Licata <joelicata.us@gmail.com>
Date: Sun, 28 Dec 2025 21:12:33 +0000
Subject: [PATCH 7/8] feat: Expand allowed file extensions in security
 configuration

- Added a comprehensive list of allowed file extensions for various categories including text, Microsoft Office, OpenDocument formats, data formats, images, web files, scripts, and archives to enhance file handling capabilities.
---
 src/config/security.py | 61 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 7 deletions(-)

diff --git a/src/config/security.py b/src/config/security.py
index 26cc9c2..0430582 100644
--- a/src/config/security.py
+++ b/src/config/security.py
@@ -17,7 +17,48 @@ class SecurityConfig(BaseSettings):
     # File Security
     allowed_file_extensions: List[str] = Field(
         default_factory=lambda: [
+            # Text and documentation
             ".txt",
+            ".md",
+            ".rtf",
+            ".pdf",
+            # Microsoft Office
+            ".doc",
+            ".docx",
+            ".dotx",
+            ".xls",
+            ".xlsx",
+            ".xltx",
+            ".ppt",
+            ".pptx",
+            ".potx",
+            ".ppsx",
+            # OpenDocument formats
+            ".odt",
+            ".ods",
+            ".odp",
+            ".odg",
+            # Data formats
+            ".json",
+            ".csv",
+            ".xml",
+            ".yaml",
+            ".yml",
+            ".sql",
+            # Images
+            ".png",
+            ".jpg",
+            ".jpeg",
+            ".gif",
+            ".svg",
+            ".bmp",
+            ".webp",
+            ".ico",
+            # Web
+            ".html",
+            ".htm",
+            ".css",
+            # Code files
             ".py",
             ".js",
             ".ts",
@@ -32,18 +73,24 @@ class SecurityConfig(BaseSettings):
             ".r",
             ".f90",
             ".d",
-            ".json",
-            ".csv",
-            ".xml",
-            ".yaml",
-            ".yml",
-            ".md",
-            ".sql",
+            # Scripts and config
             ".sh",
             ".bat",
             ".ps1",
             ".dockerfile",
             ".makefile",
+            ".ini",
+            ".cfg",
+            ".conf",
+            ".log",
+            # Archives
+            ".zip",
+            # Email and calendar
+            ".eml",
+            ".msg",
+            ".mbox",
+            ".ics",
+            ".vcf",
         ]
     )
     blocked_file_patterns: List[str] = Field(

From 8d6c5d8af9f45bc00c1772bc75ceace92ff67312 Mon Sep 17 00:00:00 2001
From: Joe Licata <joelicata.us@gmail.com>
Date: Sun, 28 Dec 2025 21:32:56 +0000
Subject: [PATCH 8/8] feat: Implement filename sanitization for file uploads
 and listings

- Added `sanitize_filename` method in `OutputProcessor` to ensure filenames are safe for container use by replacing non-alphanumeric characters and handling edge cases.
- Updated `upload_file` and `list_files` functions to utilize the new sanitization logic, ensuring consistent filename formatting across file operations.
- Introduced unit tests for `sanitize_filename` to validate its functionality and edge case handling.
---
 src/api/files.py                    |  15 ++--
 src/services/execution/output.py    |  58 ++++++++++++--
 tests/unit/test_output_processor.py | 119 ++++++++++++++++++++++++++++
 3 files changed, 180 insertions(+), 12 deletions(-)
 create mode 100644 tests/unit/test_output_processor.py

diff --git a/src/api/files.py b/src/api/files.py
index 7b38146..6f436a0 100644
--- a/src/api/files.py
+++ b/src/api/files.py
@@ -15,6 +15,7 @@
 # Local application imports
 from ..config import settings
 from ..dependencies import FileServiceDep
+from ..services.execution.output import OutputProcessor
 from ..utils.id_generator import generate_session_id
 
 
@@ -119,13 +120,13 @@ async def upload_file(
                 content_type=file.content_type,
             )
 
-            # Get file info for complete details
-            file_info = await file_service.get_file_info(session_id, file_id)
+            # Sanitize filename to match what will be used in container
+            sanitized_name = OutputProcessor.sanitize_filename(file.filename)
 
             uploaded_files.append(
                 {
                     "id": file_id,
-                    "name": file.filename,
+                    "name": sanitized_name,
                     "session_id": session_id,
                     "content": None,  # LibreChat doesn't return content in upload response
                     "size": len(content),
@@ -207,10 +208,12 @@ async def list_files(
             # Return simple file information
             simple_files = []
             for file_info in files:
+                # Return sanitized filename to match container
+                sanitized_name = OutputProcessor.sanitize_filename(file_info.filename)
                 simple_files.append(
                     {
                         "id": file_info.file_id,
-                        "name": file_info.filename,
+                        "name": sanitized_name,
                         "path": file_info.path,
                     }
                 )
@@ -219,9 +222,11 @@ async def list_files(
             # Return full file details - LibreChat format
             detailed_files = []
             for file_info in files:
+                # Return sanitized filename to match container
+                sanitized_name = OutputProcessor.sanitize_filename(file_info.filename)
                 detailed_files.append(
                     {
-                        "name": file_info.filename,
+                        "name": sanitized_name,
                         "id": file_info.file_id,
                         "session_id": session_id,
                         "content": None,  # Not returned in list
diff --git a/src/services/execution/output.py b/src/services/execution/output.py
index 71a97f7..8bcd108 100644
--- a/src/services/execution/output.py
+++ b/src/services/execution/output.py
@@ -1,6 +1,8 @@
 """Output processing and validation for code execution."""
 
+import os
 import re
+import secrets
 from pathlib import Path
 from typing import Any, Dict
 
@@ -221,13 +223,55 @@ def format_error_message(cls, exit_code: int, stderr: str) -> str:
         return f"Execution failed (exit code {exit_code}):\n{stderr_clean}"
 
     @classmethod
-    def normalize_filename(cls, filename: str) -> str:
-        """Normalize filename for container use: replace spaces with underscores.
+    def sanitize_filename(cls, input_name: str) -> str:
+        """Sanitize filename to match LibreChat's sanitization logic.
+
+        Replaces all non-alphanumeric characters (except '.' and '-') with
+        underscores. This ensures filenames on disk match what LibreChat
+        reports in the system prompt.
 
-        Important: we deliberately KEEP non-ASCII characters (e.g., Japanese)
-        so that user-visible filenames aren't transliterated.
+        Args:
+            input_name: Original filename (may include path components)
+
+        Returns:
+            Sanitized filename safe for container use
         """
+        if not input_name:
+            return "_"
+
         try:
-            return filename.replace(" ", "_") if filename else filename
-        except Exception:
-            return filename
+            # Remove any directory components (path traversal prevention)
+            name = os.path.basename(input_name)
+
+            # Replace any non-alphanumeric characters except for '.' and '-'
+            name = re.sub(r"[^a-zA-Z0-9.-]", "_", name)
+
+            # Ensure the name doesn't start with a dot (hidden file in Unix)
+            if name.startswith(".") or name == "":
+                name = "_" + name
+
+            # Limit the length of the filename
+            max_length = 255
+            if len(name) > max_length:
+                ext = os.path.splitext(name)[1]
+                name_without_ext = os.path.splitext(name)[0]
+                random_suffix = secrets.token_hex(3)
+                truncate_len = max_length - len(ext) - 7
+                if truncate_len < 1:
+                    truncate_len = 1
+                name = name_without_ext[:truncate_len] + "-" + random_suffix + ext
+
+            return name
+
+        except Exception as e:
+            logger.error(f"Failed to sanitize filename: {e}")
+            return "_"
+
+    @classmethod
+    def normalize_filename(cls, filename: str) -> str:
+        """Deprecated: Use sanitize_filename instead.
+
+        This method is kept for backward compatibility but delegates to
+        sanitize_filename which matches LibreChat's sanitization logic.
+        """
+        return cls.sanitize_filename(filename)
diff --git a/tests/unit/test_output_processor.py b/tests/unit/test_output_processor.py
new file mode 100644
index 0000000..6b78c60
--- /dev/null
+++ b/tests/unit/test_output_processor.py
@@ -0,0 +1,119 @@
+"""Unit tests for the OutputProcessor."""
+
+import pytest
+from src.services.execution.output import OutputProcessor
+
+
+class TestSanitizeFilename:
+    """Tests for the sanitize_filename method."""
+
+    def test_spaces_replaced_with_underscores(self):
+        """Test that spaces are replaced with underscores."""
+        result = OutputProcessor.sanitize_filename("file with spaces.txt")
+        assert result == "file_with_spaces.txt"
+
+    def test_parentheses_replaced_with_underscores(self):
+        """Test that parentheses are replaced with underscores."""
+        result = OutputProcessor.sanitize_filename("manufacturing_analysis (v2).xlsx")
+        assert result == "manufacturing_analysis__v2_.xlsx"
+
+    def test_special_characters_replaced(self):
+        """Test that special characters are replaced with underscores."""
+        result = OutputProcessor.sanitize_filename("special@chars#here!.pdf")
+        assert result == "special_chars_here_.pdf"
+
+    def test_already_valid_unchanged(self):
+        """Test that already valid filenames are unchanged."""
+        result = OutputProcessor.sanitize_filename("already-valid.txt")
+        assert result == "already-valid.txt"
+
+    def test_uppercase_preserved(self):
+        """Test that uppercase letters are preserved."""
+        result = OutputProcessor.sanitize_filename("UPPERCASE.TXT")
+        assert result == "UPPERCASE.TXT"
+
+    def test_numbers_preserved(self):
+        """Test that numbers are preserved."""
+        result = OutputProcessor.sanitize_filename("123numbers.doc")
+        assert result == "123numbers.doc"
+
+    def test_hidden_file_prefixed(self):
+        """Test that hidden files (starting with dot) get underscore prefix."""
+        result = OutputProcessor.sanitize_filename(".hidden")
+        assert result == "_.hidden"
+
+    def test_empty_string_returns_underscore(self):
+        """Test that empty string returns underscore."""
+        result = OutputProcessor.sanitize_filename("")
+        assert result == "_"
+
+    def test_none_returns_underscore(self):
+        """Test that None returns underscore."""
+        result = OutputProcessor.sanitize_filename(None)
+        assert result == "_"
+
+    def test_path_traversal_stripped(self):
+        """Test that path traversal attempts are stripped."""
+        result = OutputProcessor.sanitize_filename("../../../etc/passwd")
+        assert result == "passwd"
+
+    def test_absolute_path_stripped(self):
+        """Test that absolute paths are stripped to basename."""
+        result = OutputProcessor.sanitize_filename("/absolute/path/file.txt")
+        assert result == "file.txt"
+
+    def test_unicode_characters_replaced(self):
+        """Test that non-ASCII characters are replaced."""
+        result = OutputProcessor.sanitize_filename("résumé.docx")
+        assert result == "r_sum_.docx"
+
+    def test_brackets_replaced(self):
+        """Test that brackets are replaced with underscores."""
+        result = OutputProcessor.sanitize_filename("[brackets].txt")
+        assert result == "_brackets_.txt"
+
+    def test_leading_parenthesis_prefixed(self):
+        """Test that filename starting with parenthesis is handled."""
+        result = OutputProcessor.sanitize_filename("(parentheses).txt")
+        assert result == "_parentheses_.txt"
+
+    def test_dashes_preserved(self):
+        """Test that dashes are preserved."""
+        result = OutputProcessor.sanitize_filename("file-name.with-dashes.txt")
+        assert result == "file-name.with-dashes.txt"
+
+    def test_dots_preserved(self):
+        """Test that dots in filename are preserved."""
+        result = OutputProcessor.sanitize_filename("file.name.multiple.dots.txt")
+        assert result == "file.name.multiple.dots.txt"
+
+    def test_simple_filename_unchanged(self):
+        """Test that simple alphanumeric filename is unchanged."""
+        result = OutputProcessor.sanitize_filename("SimpleFile123.pdf")
+        assert result == "SimpleFile123.pdf"
+
+    def test_long_filename_truncated(self):
+        """Test that filenames over 255 chars are truncated with hash suffix."""
+        long_name = "a" * 300 + ".txt"
+        result = OutputProcessor.sanitize_filename(long_name)
+        # Should be 255 chars or less
+        assert len(result) <= 255
+        # Should end with .txt
+        assert result.endswith(".txt")
+        # Should have a random suffix before extension
+        assert "-" in result
+
+
+class TestNormalizeFilename:
+    """Tests for the deprecated normalize_filename method."""
+
+    def test_delegates_to_sanitize_filename(self):
+        """Test that normalize_filename delegates to sanitize_filename."""
+        result = OutputProcessor.normalize_filename("file with spaces.txt")
+        expected = OutputProcessor.sanitize_filename("file with spaces.txt")
+        assert result == expected
+
+    def test_parentheses_now_replaced(self):
+        """Test that normalize_filename now also replaces parentheses."""
+        result = OutputProcessor.normalize_filename("file (v2).xlsx")
+        assert result == "file__v2_.xlsx"