Skip to content
3 changes: 3 additions & 0 deletions .env.dev.example
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ RESEND_API_KEY= # Optional if SMTP is configured (see below)
# JOB_TIMEOUT_SECONDS=320
# JOB_MAX_TRIES=3
# DEPLOYMENT_TIMEOUT_SECONDS=300
# RECONCILE_INTERVAL_SECONDS=60
# DEPLOYMENT_RESTART_POLICY=on-failure
# DEPLOYMENT_RESTART_MAX_RETRIES=5
# MAGIC_LINK_TTL_SECONDS=900
# AUTH_TOKEN_TTL_DAYS=30
# AUTH_TOKEN_REFRESH_THRESHOLD_DAYS=1
Expand Down
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ RESEND_API_KEY= # Optional if SMTP is configured (see below)
# JOB_TIMEOUT_SECONDS=320
# JOB_MAX_TRIES=3
# DEPLOYMENT_TIMEOUT_SECONDS=300
# RECONCILE_INTERVAL_SECONDS=60
# DEPLOYMENT_RESTART_POLICY=on-failure
# DEPLOYMENT_RESTART_MAX_RETRIES=5
# MAGIC_LINK_TTL_SECONDS=900
# AUTH_TOKEN_TTL_DAYS=30
# AUTH_TOKEN_REFRESH_THRESHOLD_DAYS=1
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,10 @@ See [ARCHITECTURE.md](ARCHITECTURE.md) for codebase structure.
| `MAX_MEMORY_MB` | Maximum allowed memory override per project. Used only when `DEFAULT_MEMORY_MB` is set. Required to let user customize memory. |
| `JOB_TIMEOUT_SECONDS` | Job timeout (seconds). Default: `320`. |
| `JOB_MAX_TRIES` | Max retries per background job. Default: `3`. |
| `RECONCILE_INTERVAL_SECONDS` | Reconcile interval (seconds) for observed deployment state. Default: `60`. |
| `DEPLOYMENT_TIMEOUT_SECONDS` | Deployment timeout (seconds). Default: `300`. |
| `DEPLOYMENT_RESTART_POLICY` | Docker restart policy for deployment containers. Default: `on-failure`. |
| `DEPLOYMENT_RESTART_MAX_RETRIES` | Max retries when restart policy is `on-failure`. Default: `5`. |
| `CONTAINER_DELETE_GRACE_SECONDS` | Wait before deleting containers after stop/failure to let logs ship. Default: `3`. |
| `LOG_STREAM_GRACE_SECONDS` | Grace window for deployment log streaming (when to connect/close SSE around terminal states). Default: `5`. |
| `LOG_LEVEL` | Logging level. Default: `WARNING`. |
Expand Down
3 changes: 3 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ class Settings(BaseSettings):
job_timeout_seconds: int = 320
job_completion_wait_seconds: int = 300
deployment_timeout_seconds: int = 300
reconcile_interval_seconds: int = 60
deployment_restart_policy: str = "on-failure"
deployment_restart_max_retries: int = 5
container_delete_grace_seconds: int = 15
log_stream_grace_seconds: int = 5
service_uid: int = 1000
Expand Down
75 changes: 75 additions & 0 deletions app/migrations/versions/6b0c7d2a9e1f_deployment_observed_state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Deployment observed state

Revision ID: 6b0c7d2a9e1f
Revises: 4fe4c96ad3dd
Create Date: 2026-01-29 00:00:00.000000

"""
from typing import Sequence, Union

import sqlalchemy as sa
from alembic import op


# revision identifiers, used by Alembic.
revision: str = "6b0c7d2a9e1f"
down_revision: Union[str, Sequence[str], None] = "4fe4c96ad3dd"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


observed_status_enum = sa.Enum(
"running",
"exited",
"dead",
"paused",
"not_found",
name="deployment_observed_status",
)


def upgrade() -> None:
"""Upgrade schema."""
observed_status_enum.create(op.get_bind(), checkfirst=True)

op.add_column(
"deployment",
sa.Column("observed_status", observed_status_enum, nullable=True),
)
op.add_column(
"deployment",
sa.Column("observed_exit_code", sa.Integer(), nullable=True),
)
op.add_column(
"deployment",
sa.Column("observed_at", sa.DateTime(), nullable=True),
)
op.add_column(
"deployment",
sa.Column("observed_reason", sa.Text(), nullable=True),
)
op.add_column(
"deployment",
sa.Column("observed_last_seen_at", sa.DateTime(), nullable=True),
)
op.add_column(
"deployment",
sa.Column(
"observed_missing_count",
sa.Integer(),
nullable=False,
server_default="0",
),
)


def downgrade() -> None:
"""Downgrade schema."""
op.drop_column("deployment", "observed_missing_count")
op.drop_column("deployment", "observed_last_seen_at")
op.drop_column("deployment", "observed_reason")
op.drop_column("deployment", "observed_at")
op.drop_column("deployment", "observed_exit_code")
op.drop_column("deployment", "observed_status")

observed_status_enum.drop(op.get_bind(), checkfirst=True)
44 changes: 44 additions & 0 deletions app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
BigInteger,
Boolean,
Enum as SQLAEnum,
Integer,
JSON,
String,
Text,
Expand Down Expand Up @@ -756,6 +757,24 @@ class Deployment(Base):
SQLAEnum("running", "stopped", "removed", name="deployment_container_status"),
nullable=True,
)
observed_status: Mapped[str | None] = mapped_column(
SQLAEnum(
"running",
"exited",
"dead",
"paused",
"not_found",
name="deployment_observed_status",
),
nullable=True,
)
observed_exit_code: Mapped[int | None] = mapped_column(Integer, nullable=True)
observed_at: Mapped[datetime | None] = mapped_column(nullable=True)
observed_reason: Mapped[str | None] = mapped_column(Text, nullable=True)
observed_last_seen_at: Mapped[datetime | None] = mapped_column(nullable=True)
observed_missing_count: Mapped[int] = mapped_column(
Integer, nullable=False, default=0
)
status: Mapped[str] = mapped_column(
SQLAEnum(
"prepare",
Expand Down Expand Up @@ -805,6 +824,31 @@ def __init__(self, *args, project: "Project", environment_id: str, **kwargs):
environment = project.get_environment_by_id(environment_id)
self.env_vars = project.get_env_vars(environment["slug"]) if environment else []

@property
def computed_status(self) -> str:
observed = self.observed_status
expected = self.container_status
if expected in {"stopped", "removed"}:
if observed == "running":
return "orphaned"
return expected

if expected == "running":
if observed == "not_found":
return "missing"
if observed == "exited":
return "stopped" if self.observed_exit_code == 0 else "crashed"
if observed in {"paused", "dead"}:
return observed
return "running"

if observed == "exited":
return "stopped" if self.observed_exit_code == 0 else "crashed"
if observed in {"running", "paused", "dead", "not_found"}:
return observed

return observed or expected

@property
def environment(self) -> dict | None:
"""Get environment configuration"""
Expand Down
42 changes: 42 additions & 0 deletions app/routers/project.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from fastapi import APIRouter, Depends, Request, Query, HTTPException
import httpx
import aiodocker
from fastapi.responses import Response, RedirectResponse
from sqlalchemy import select, update
from sqlalchemy.ext.asyncio import AsyncSession
Expand Down Expand Up @@ -66,6 +67,7 @@
from services.github import GitHubService
from services.github_installation import GitHubInstallationService
from services.deployment import DeploymentService
from services.reconcile import reconcile_deployments
from services.domain import DomainService
from services.preset_detector import PresetDetector
from services.registry import RegistryService
Expand Down Expand Up @@ -2231,6 +2233,46 @@ async def project_deployment(
)


@router.post(
"/{team_slug}/projects/{project_name}/deployments/{deployment_id}/reconcile",
name="project_deployment_reconcile",
)
async def project_deployment_reconcile(
project: Project = Depends(get_project_by_name),
current_user: User = Depends(get_current_user),
role: str = Depends(get_role),
team_and_membership: tuple[Team, TeamMember] = Depends(get_team_by_slug),
db: AsyncSession = Depends(get_db),
deployment: Deployment = Depends(get_deployment_by_id),
settings: Settings = Depends(get_settings),
):
team, membership = team_and_membership

if deployment.project_id != project.id:
raise HTTPException(status_code=404, detail="Deployment not found")

async with aiodocker.Docker(url=settings.docker_host) as docker_client:
await reconcile_deployments(db, docker_client, deployment_ids=[deployment.id])
await db.refresh(deployment)

def _dt(value: datetime | None) -> str | None:
if not value:
return None
return value.replace(tzinfo=timezone.utc).isoformat()

return {
"deployment_id": deployment.id,
"status": deployment.status,
"container_status": deployment.container_status,
"observed_status": deployment.observed_status,
"observed_exit_code": deployment.observed_exit_code,
"observed_at": _dt(deployment.observed_at),
"observed_last_seen_at": _dt(deployment.observed_last_seen_at),
"observed_missing_count": deployment.observed_missing_count,
"computed_status": deployment.computed_status,
}


@router.get(
"/{team_slug}/projects/{project_name}/logs",
name="project_logs",
Expand Down
Loading