Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions library/src/iqb/ghremote/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import logging
import re
from collections.abc import Callable, Iterator
from dataclasses import dataclass
Expand All @@ -10,6 +11,8 @@

from .cache import FileEntry, Manifest, compute_sha256

log = logging.getLogger("iqb.ghremote.diff")


class DiffState(str, Enum):
"""State of a diff entry comparing manifest vs local cache."""
Expand Down Expand Up @@ -108,6 +111,9 @@ def diff(
# Phase 2: iterate accepted manifest keys in sorted order
seen: set[str] = set()
for key in sorted(manifest.files):
if not _validate_cache_path(key):
log.warning("skipping invalid manifest key: %s", key)
continue
if acceptp is not None and not acceptp(key):
continue
seen.add(key)
Expand Down
23 changes: 23 additions & 0 deletions library/tests/iqb/cli/cache_pull_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,29 @@ def test_nothing_to_download(self, tmp_path: Path):
assert "Nothing to download" in result.output


class TestCachePullInvalidManifestKeys:
"""Invalid manifest keys are ignored for safety."""

@patch("iqb.cli.cache_pull.requests.Session")
def test_traversal_key_is_ignored(self, mock_session_cls: MagicMock, tmp_path: Path):
_write_manifest(
tmp_path,
{
"../../etc/passwd": {
"sha256": _sha256(b"malicious"),
"url": "https://example.com/a",
}
},
)

runner = CliRunner()
result = runner.invoke(cli, ["cache", "pull", "-d", str(tmp_path)])

assert result.exit_code == 0
assert "Nothing to download" in result.output
mock_session_cls.assert_not_called()


class TestCachePullOnlyRemote:
"""ONLY_REMOTE entries are downloaded."""

Expand Down
56 changes: 56 additions & 0 deletions library/tests/iqb/ghremote/diff_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,62 @@ def test_returns_iterator(self, tmp_path: Path):
assert not isinstance(result, (list, tuple))


class TestDiffInvalidManifestKeys:
"""Invalid manifest keys are ignored for safety."""

def test_traversal_manifest_key_is_ignored(self, tmp_path: Path):
manifest = Manifest(
v=0,
files={
"../../etc/passwd": FileEntry(
sha256="0" * 64,
url="https://example.com/passwd",
)
},
)

results = list(diff(manifest, tmp_path))

assert results == []

def test_dotdot_inside_cache_path_is_ignored(self, tmp_path: Path):
manifest = Manifest(
v=0,
files={
f"cache/v1/{_TS1}/{_TS2}/{_NAME}/../data.parquet": FileEntry(
sha256="0" * 64,
url="https://example.com/a",
)
},
)

results = list(diff(manifest, tmp_path))

assert results == []

def test_mixed_valid_and_invalid_manifest_keys(self, tmp_path: Path):
content = b"remote content"
manifest = Manifest(
v=0,
files={
"../../etc/passwd": FileEntry(
sha256="0" * 64,
url="https://example.com/passwd",
),
_FILE_A: FileEntry(
sha256=_sha256(content),
url="https://example.com/a.parquet",
),
},
)

results = list(diff(manifest, tmp_path))

assert len(results) == 1
assert results[0].file == _FILE_A
assert results[0].state == DiffState.ONLY_REMOTE


class TestValidateCachePath:
"""Tests for _validate_cache_path covering all branches."""

Expand Down