Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions gateway/checkpoint_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def _extract_repo_from_remote(repo_path: str) -> str | None:
return None

remote_url = result.stdout.strip()
match = re.search(r"github\.com[:/]([^/]+)/([^/]+?)(?:\.git)?$", remote_url)
match = re.search(r"github\.com[:/]([^/]+)/([^/]+?)(?:\.git)?/?$", remote_url)
if match:
return f"{match.group(1)}/{match.group(2)}"
except Exception:
Expand Down Expand Up @@ -234,7 +234,7 @@ def _resolve_github_token(repo_path: str) -> str | None:
return None

remote_url = result.stdout.strip()
match = re.search(r"github\.com[:/]([^/]+)/([^/]+?)(?:\.git)?$", remote_url)
match = re.search(r"github\.com[:/]([^/]+)/([^/]+?)(?:\.git)?/?$", remote_url)
if not match:
logger.debug(
"Could not extract owner/repo from remote URL",
Expand Down Expand Up @@ -1302,7 +1302,7 @@ def _get_checkpoint_repo_for_path(repo_path: str) -> str | None:
from config.repo_config import get_checkpoint_repo

# Extract owner/repo from URL
match = re.search(r"github\.com[:/]([^/]+)/([^/]+?)(?:\.git)?$", remote_url)
match = re.search(r"github\.com[:/]([^/]+)/([^/]+?)(?:\.git)?/?$", remote_url)
if match:
repo = f"{match.group(1)}/{match.group(2)}"
return get_checkpoint_repo(repo)
Expand Down
155 changes: 138 additions & 17 deletions gateway/gateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -1593,9 +1593,12 @@ def checkpoint_list() -> tuple[Response, int] | Response:

handler = get_checkpoint_handler()
checkpoint_repo = _resolve_checkpoint_repo(repo_path)
github_token = _resolve_checkpoint_token(repo_path)

try:
index = handler.fetch_and_read_index(repo_path, checkpoint_repo=checkpoint_repo)
index = handler.fetch_and_read_index(
repo_path, checkpoint_repo=checkpoint_repo, github_token=github_token
)
except Exception as e:
logger.error("Checkpoint index fetch failed", error=str(e))
return make_error("Failed to fetch checkpoints", status_code=500)
Expand Down Expand Up @@ -1659,13 +1662,16 @@ def checkpoint_cost() -> tuple[Response, int] | Response:

handler = get_checkpoint_handler()
checkpoint_repo = _resolve_checkpoint_repo(repo_path)
github_token = _resolve_checkpoint_token(repo_path)

# fetch_and_read_index does ls-remote + fetch + read index in one pass.
# We then call ensure_ref to get a ref for read_checkpoint calls below.
# After the fetch in fetch_and_read_index, ensure_ref's fetch is a no-op
# (branch already up-to-date), so only the ls-remote is repeated.
try:
index = handler.fetch_and_read_index(repo_path, checkpoint_repo=checkpoint_repo)
index = handler.fetch_and_read_index(
repo_path, checkpoint_repo=checkpoint_repo, github_token=github_token
)
except Exception as e:
logger.error("Checkpoint index fetch failed", error=str(e))
return make_error("Failed to fetch checkpoint data", status_code=500)
Expand All @@ -1683,7 +1689,9 @@ def checkpoint_cost() -> tuple[Response, int] | Response:
)

try:
ref = handler.ensure_ref(repo_path, checkpoint_repo=checkpoint_repo)
ref = handler.ensure_ref(
repo_path, checkpoint_repo=checkpoint_repo, github_token=github_token
)
except Exception as e:
logger.error("Checkpoint ref resolution failed", error=str(e))
return make_error("Failed to fetch checkpoint data", status_code=500)
Expand Down Expand Up @@ -1813,9 +1821,12 @@ def checkpoint_show(identifier: str) -> tuple[Response, int] | Response:

handler = get_checkpoint_handler()
checkpoint_repo = _resolve_checkpoint_repo(repo_path)
github_token = _resolve_checkpoint_token(repo_path)

try:
ref = handler.ensure_ref(repo_path, checkpoint_repo=checkpoint_repo)
ref = handler.ensure_ref(
repo_path, checkpoint_repo=checkpoint_repo, github_token=github_token
)
except Exception as e:
logger.error("Checkpoint ref fetch failed", error=str(e))
return make_error("Failed to fetch checkpoint data", status_code=500)
Expand All @@ -1826,7 +1837,9 @@ def checkpoint_show(identifier: str) -> tuple[Response, int] | Response:
checkpoint_id: str | None = identifier
if not identifier.startswith("ckpt-"):
# Look up by commit SHA
index = handler.fetch_and_read_index(repo_path, checkpoint_repo=checkpoint_repo)
index = handler.fetch_and_read_index(
repo_path, checkpoint_repo=checkpoint_repo, github_token=github_token
)
if index:
checkpoint_id = index.get_by_commit(identifier)
if not checkpoint_id:
Expand All @@ -1843,48 +1856,156 @@ def checkpoint_show(identifier: str) -> tuple[Response, int] | Response:
def _resolve_checkpoint_repo(repo_path: str) -> str | None:
"""Resolve checkpoint_repo from query param or auto-detection.

Accepts an explicit ``checkpoint_repo`` query parameter in
``owner/repo`` format. Falls back to auto-detection via
``_get_checkpoint_repo_for_path`` when no explicit value is given.
Resolution order:
1. Explicit ``checkpoint_repo`` query parameter (owner/repo format).
2. Auto-detection from ``repo_path`` (git remote → config lookup).
3. ``source_repo`` query parameter looked up in config. This is the
fallback for sandbox containers where ``repositories.yaml`` is not
available and the sandbox repo path may not exist on the gateway.
"""
explicit = request.args.get("checkpoint_repo")
if explicit:
# Basic validation: must look like "owner/repo"
if re.match(r"^[A-Za-z0-9._-]+/[A-Za-z0-9._-]+$", explicit):
if re.match(r"^[A-Za-z0-9][A-Za-z0-9._-]*/[A-Za-z0-9][A-Za-z0-9._-]*$", explicit):
return explicit
logger.warning(
"Invalid checkpoint_repo format, falling back to auto-detection",
checkpoint_repo=explicit,
)
return None
return _get_checkpoint_repo_for_path(repo_path)

# Try path-based auto-detection (works when repo_path is a local git repo)
result = _get_checkpoint_repo_for_path(repo_path)
if result:
return result

# Fallback: use source_repo query param for config lookup.
# The sandbox CLI sends this when it can determine the source repo
# from git remote but cannot resolve checkpoint_repo locally
# (repositories.yaml is only mounted on the gateway).
source_repo = request.args.get("source_repo")
if source_repo and re.match(
r"^[A-Za-z0-9][A-Za-z0-9._-]*/[A-Za-z0-9][A-Za-z0-9._-]*$", source_repo
):
try:
from config.repo_config import get_checkpoint_repo

cp_repo = get_checkpoint_repo(source_repo)
if cp_repo:
logger.debug(
"Resolved checkpoint_repo from source_repo param",
source_repo=source_repo,
checkpoint_repo=cp_repo,
)
return cp_repo
except Exception as e:
logger.debug(
"Config lookup for source_repo failed",
source_repo=source_repo,
error=str(e),
)

return None


def _resolve_checkpoint_token(repo_path: str) -> str | None:
"""Resolve a GitHub token for checkpoint fetch operations.

Tries ``_resolve_github_token`` (which reads the git remote from
``repo_path``). When that fails — typically because ``repo_path``
is the scratch repo with no remotes — falls back to resolving a
token via the ``source_repo`` query parameter.
"""
from checkpoint_handler import _resolve_github_token

token: str | None = _resolve_github_token(repo_path)
if token:
return token

source_repo = request.args.get("source_repo")
if source_repo and re.match(
r"^[A-Za-z0-9][A-Za-z0-9._-]*/[A-Za-z0-9][A-Za-z0-9._-]*$", source_repo
):
token_str, _auth_mode, _error = get_token_for_repo(source_repo)
if token_str:
return token_str

return None


_CHECKPOINT_SCRATCH_DIR = "/home/egg/.egg-worktrees/.checkpoint-scratch"

_checkpoint_scratch_lock = __import__("threading").Lock()


def _ensure_checkpoint_scratch_repo() -> str | None:
"""Create or return a bare git repo for checkpoint fetch operations.

When the sandbox's repo path doesn't exist on the gateway filesystem,
we still need a valid git directory as cwd for ``git fetch`` and
``git ls-remote`` commands. This creates a minimal bare repo that
serves as that working directory.

Returns:
Path to the scratch repo, or None on failure.
"""
if os.path.isdir(os.path.join(_CHECKPOINT_SCRATCH_DIR, "objects")):
return _CHECKPOINT_SCRATCH_DIR
with _checkpoint_scratch_lock:
# Re-check after acquiring lock to avoid duplicate init
if os.path.isdir(os.path.join(_CHECKPOINT_SCRATCH_DIR, "objects")):
return _CHECKPOINT_SCRATCH_DIR
try:
os.makedirs(_CHECKPOINT_SCRATCH_DIR, exist_ok=True)
subprocess.run(
["git", "init", "--bare", _CHECKPOINT_SCRATCH_DIR],
capture_output=True,
text=True,
timeout=10,
check=True,
)
logger.debug("Created checkpoint scratch repo", path=_CHECKPOINT_SCRATCH_DIR)
return _CHECKPOINT_SCRATCH_DIR
except Exception as e:
logger.warning("Failed to create checkpoint scratch repo", error=str(e))
return None


def _resolve_repo_path_for_checkpoints() -> str | None:
"""Resolve repository path for checkpoint read operations.

Tries query param, then session's last_repo_path, then EGG_REPO_PATH.
Tries query param, then session's last_repo_path, then EGG_REPO_PATH,
then a checkpoint scratch repo. The scratch repo fallback handles
sandbox → gateway requests where the sandbox's filesystem is not
mounted on the gateway container.
"""
# Explicit query param — if provided, must be valid; don't silently
# fall through to fallbacks when the client explicitly requested a path.
# Explicit query param — if provided AND exists locally, use it.
repo_path = request.args.get("repo_path")
if repo_path:
path_valid, _err = validate_repo_path(repo_path)
if path_valid and os.path.isdir(repo_path):
return repo_path
return None
# Path is valid format but doesn't exist on this container.
# This is expected when the CLI runs in a sandbox whose filesystem
# is not mounted on the gateway. Fall through to other sources
# instead of returning None.
if not path_valid:
return None

# Session's last known repo path (set during push operations)
session = getattr(g, "session", None)
if session and getattr(session, "last_repo_path", None):
return str(session.last_repo_path)
if os.path.isdir(session.last_repo_path):
return str(session.last_repo_path)

# Environment variable
env_path = os.environ.get("EGG_REPO_PATH")
if env_path and os.path.isdir(env_path):
return env_path

return None
# Last resort: create a bare scratch repo for checkpoint fetching.
# This allows the gateway to serve checkpoint queries even without
# a local copy of the source repo.
return _ensure_checkpoint_scratch_repo()


@app.route("/api/v1/gh/pr/create", methods=["POST"])
Expand Down
Loading
Loading