Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9b0157d
auto-claude: subtask-1-1 - Add CLI detection tests for Claude/Node/Py…
AndyMik90 Jan 19, 2026
39d2100
auto-claude: subtask-1-2 - Add path handling edge case tests
AndyMik90 Jan 19, 2026
bb0f68c
auto-claude: subtask-1-3 - Add token decryption tests for all platfor…
AndyMik90 Jan 19, 2026
0165dde
auto-claude: subtask-1-4 - Add frontend platform tests for npm/npx co…
AndyMik90 Jan 19, 2026
1404c0e
auto-claude: subtask-2-1 - Create test_agent_flow.py with planner to …
AndyMik90 Jan 19, 2026
b921de4
auto-claude: subtask-2-2 - Add subtask completion detection tests to …
AndyMik90 Jan 19, 2026
9e31a80
auto-claude: subtask-2-3 - Add QA loop tests for fixer interaction an…
AndyMik90 Jan 19, 2026
6301059
auto-claude: subtask-2-4 - Add worktree isolation tests to verify con…
AndyMik90 Jan 19, 2026
09cb52e
auto-claude: subtask-3-1 - Expand test_recovery.py with session check…
AndyMik90 Jan 19, 2026
b8db5f2
auto-claude: subtask-3-2 - Expand test_implementation_plan.py with JS…
AndyMik90 Jan 19, 2026
fd8f870
auto-claude: subtask-3-3 - Add tests for edge cases in plan state tra…
AndyMik90 Jan 19, 2026
f5e6285
auto-claude: subtask-4-1 - Create test_review_verdict.py with verdict…
AndyMik90 Jan 19, 2026
6f18751
auto-claude: subtask-4-2 - Expand test_finding_validation.py with evi…
AndyMik90 Jan 19, 2026
a28462f
auto-claude: subtask-4-3 - Add deduplication and severity mapping tes…
AndyMik90 Jan 19, 2026
3da1cc1
auto-claude: subtask-5-1 - Create E2E smoke test file with project cr…
AndyMik90 Jan 19, 2026
186f510
auto-claude: subtask-5-2 - Add task creation and execution E2E test
AndyMik90 Jan 19, 2026
ae831f2
auto-claude: subtask-5-3 - Add settings management E2E test
AndyMik90 Jan 19, 2026
44c17f5
auto-claude: subtask-6-1 - Run full backend test suite and verify all…
AndyMik90 Jan 19, 2026
f8a79da
auto-claude: subtask-6-2 - Fix flaky test by clearing CLI path env vars
AndyMik90 Jan 19, 2026
162f150
Merge branch 'develop' into auto-claude/141-simplify-testing-strategy…
AndyMik90 Jan 21, 2026
dfe72e7
fix(tests): resolve PR review findings for testing strategy
AndyMik90 Jan 21, 2026
198f0e6
fix(tests): resolve CI test failures
AndyMik90 Jan 21, 2026
deb587d
fix(tests): normalize paths in cross-platform tests
AndyMik90 Jan 21, 2026
5ebfd93
Merge branch 'develop' into auto-claude/141-simplify-testing-strategy…
AndyMik90 Jan 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions apps/backend/core/platform/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ def validate_cli_path(cli_path: str) -> bool:
Returns:
True if path is secure, False otherwise
"""
if not cli_path:
if not cli_path or not cli_path.strip():
return False

# Security validation: reject paths with shell metacharacters or other dangerous patterns
Expand All @@ -380,7 +380,7 @@ def validate_cli_path(cli_path: str) -> bool:
r"%[^%]+%", # Windows environment variable expansion
r"\.\./", # Unix directory traversal
r"\.\.\\", # Windows directory traversal
r"[\r\n]", # Newlines (command injection)
r"[\r\n\x00]", # Newlines (command injection), null bytes (path truncation)
]

for pattern in dangerous_patterns:
Expand Down
126 changes: 126 additions & 0 deletions apps/backend/runners/github/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,132 @@ class MergeVerdict(str, Enum):
)


# =============================================================================
# Verdict Helper Functions (testable logic extracted from orchestrator)
# =============================================================================


def verdict_from_severity_counts(
critical_count: int = 0,
high_count: int = 0,
medium_count: int = 0,
low_count: int = 0,
) -> MergeVerdict:
"""
Determine merge verdict based on finding severity counts.

This is the canonical implementation of severity-to-verdict mapping.
Extracted here so it can be tested directly and reused.

Args:
critical_count: Number of critical severity findings
high_count: Number of high severity findings
medium_count: Number of medium severity findings
low_count: Number of low severity findings

Returns:
MergeVerdict based on severity levels
"""
if critical_count > 0:
return MergeVerdict.BLOCKED
elif high_count > 0 or medium_count > 0:
return MergeVerdict.NEEDS_REVISION
# Low findings or no findings -> ready to merge
return MergeVerdict.READY_TO_MERGE


def apply_merge_conflict_override(
verdict: MergeVerdict,
has_merge_conflicts: bool,
) -> MergeVerdict:
"""
Apply merge conflict override to verdict.

Merge conflicts always result in BLOCKED, regardless of other verdicts.

Args:
verdict: The current verdict
has_merge_conflicts: Whether PR has merge conflicts

Returns:
BLOCKED if conflicts exist, otherwise original verdict
"""
if has_merge_conflicts:
return MergeVerdict.BLOCKED
return verdict


def apply_branch_behind_downgrade(
verdict: MergeVerdict,
merge_state_status: str,
) -> MergeVerdict:
"""
Apply branch-behind status downgrade to verdict.

BEHIND status downgrades READY_TO_MERGE and MERGE_WITH_CHANGES to NEEDS_REVISION.
BLOCKED verdict is preserved (not downgraded).

Args:
verdict: The current verdict
merge_state_status: The merge state status (e.g., "BEHIND", "CLEAN")

Returns:
Downgraded verdict if behind, otherwise original
"""
if merge_state_status == "BEHIND":
if verdict in (MergeVerdict.READY_TO_MERGE, MergeVerdict.MERGE_WITH_CHANGES):
return MergeVerdict.NEEDS_REVISION
return verdict


def apply_ci_status_override(
verdict: MergeVerdict,
failing_count: int = 0,
pending_count: int = 0,
) -> MergeVerdict:
"""
Apply CI status override to verdict.

Failing CI -> BLOCKED
Pending CI -> NEEDS_REVISION (if currently READY_TO_MERGE or MERGE_WITH_CHANGES)

Args:
verdict: The current verdict
failing_count: Number of failing CI checks
pending_count: Number of pending CI checks

Returns:
Updated verdict based on CI status
"""
if failing_count > 0:
if verdict in (MergeVerdict.READY_TO_MERGE, MergeVerdict.MERGE_WITH_CHANGES):
return MergeVerdict.BLOCKED
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CI failure doesn't block NEEDS_REVISION verdicts as documented

Medium Severity

The apply_ci_status_override function's docstring states "Failing CI -> BLOCKED" without any condition, but the code only returns BLOCKED when the current verdict is READY_TO_MERGE or MERGE_WITH_CHANGES. If the verdict is NEEDS_REVISION and CI is failing, the function incorrectly returns NEEDS_REVISION instead of BLOCKED. The asymmetry in the docstring (failing CI has no qualifier while pending CI explicitly has one) indicates failing CI was intended to always result in BLOCKED.

Fix in Cursor Fix in Web

elif pending_count > 0:
if verdict in (MergeVerdict.READY_TO_MERGE, MergeVerdict.MERGE_WITH_CHANGES):
return MergeVerdict.NEEDS_REVISION
return verdict


def verdict_to_github_status(verdict: MergeVerdict) -> str:
"""
Map merge verdict to GitHub review overall status.

Args:
verdict: The merge verdict

Returns:
GitHub review status: "approve", "comment", or "request_changes"
"""
if verdict == MergeVerdict.BLOCKED:
return "request_changes"
elif verdict == MergeVerdict.NEEDS_REVISION:
return "request_changes"
elif verdict == MergeVerdict.MERGE_WITH_CHANGES:
return "comment"
else:
return "approve"


class AICommentVerdict(str, Enum):
"""Verdict on AI tool comments (CodeRabbit, Cursor, Greptile, etc.)."""

Expand Down
Loading
Loading