diff --git a/pkg/cli/audit_report.go b/pkg/cli/audit_report.go index e591beb198..e114d6b245 100644 --- a/pkg/cli/audit_report.go +++ b/pkg/cli/audit_report.go @@ -512,8 +512,10 @@ func parseDurationString(s string) time.Duration { // steps such as lockdown validation, binary installation, or repository checkout failures. // // Step log files are stored in workflow-logs/{job}/{step_num}_{step_name}.txt after -// downloading via downloadWorkflowRunLogs. The function finds the last step that ran -// (highest step number) as that is most likely the step that caused the failure. +// downloading via downloadWorkflowRunLogs. The function first scans all step logs for +// ##[error] annotations (GitHub Actions error annotations), which are the most precise +// failure indicators. If none are found, it falls back to the content of the last step +// (highest step number) as a general failure indicator. func extractPreAgentStepErrors(logsPath string) []ErrorInfo { // If agent-stdio.log exists, the agent ran - don't scan step logs agentStdioPath := filepath.Join(logsPath, "agent-stdio.log") @@ -529,7 +531,8 @@ func extractPreAgentStepErrors(logsPath string) []ErrorInfo { return nil } - // Find the last step log file by scanning job subdirectories. + // Scan all job step log files in a single pass, collecting both ##[error] annotations + // and tracking the last step for fallback use. // GitHub Actions log zip structure: {job_name}/{step_num}_{step_name}.txt type stepLog struct { path string @@ -537,7 +540,10 @@ func extractPreAgentStepErrors(logsPath string) []ErrorInfo { stepKey string // job/step_name for display } + const maxMessageLen = 1500 + var lastStep *stepLog + var errorAnnotations []ErrorInfo jobDirs, err := os.ReadDir(workflowLogsDir) if err != nil { @@ -558,16 +564,59 @@ func extractPreAgentStepErrors(logsPath string) []ErrorInfo { continue } num, stepName := parseStepFilename(stepFile.Name()) - if num > 0 && (lastStep == nil || num > lastStep.num) { + if num <= 0 { + continue + } + stepFilePath := filepath.Join(jobDir, stepFile.Name()) + stepKey := jobEntry.Name() + "/" + stepName + + // Track the last step (highest step number) for fallback + if lastStep == nil || num > lastStep.num { lastStep = &stepLog{ - path: filepath.Join(jobDir, stepFile.Name()), + path: stepFilePath, num: num, - stepKey: jobEntry.Name() + "/" + stepName, + stepKey: stepKey, + } + } + + // Scan this step for ##[error] annotations + content, err := os.ReadFile(stepFilePath) + if err != nil { + auditReportLog.Printf("Failed to read step log %s: %v", stepFilePath, err) + continue + } + + var errorLines []string + for line := range strings.SplitSeq(string(content), "\n") { + if strings.Contains(line, "##[error]") { + stripped := stripGHALogTimestamps(line) + if stripped != "" { + errorLines = append(errorLines, stripped) + } } } + + if len(errorLines) > 0 { + message := strings.Join(errorLines, "\n") + if len(message) > maxMessageLen { + message = message[:maxMessageLen] + "..." + } + auditReportLog.Printf("Extracted ##[error] annotations from %s (step %d)", stepKey, num) + errorAnnotations = append(errorAnnotations, ErrorInfo{ + Type: "step_failure", + File: stepKey, + Message: message, + }) + } } } + // Prefer ##[error] annotations over generic last-step content + if len(errorAnnotations) > 0 { + return errorAnnotations + } + + // Fallback: return the content of the last step that ran if lastStep == nil { auditReportLog.Printf("No step log files found in %s", workflowLogsDir) return nil @@ -584,13 +633,11 @@ func extractPreAgentStepErrors(logsPath string) []ErrorInfo { return nil } - // Truncate to a reasonable size for the error summary - const maxMessageLen = 1500 if len(message) > maxMessageLen { message = message[:maxMessageLen] + "..." } - auditReportLog.Printf("Extracted pre-agent step error from %s (step %d)", lastStep.stepKey, lastStep.num) + auditReportLog.Printf("Extracted pre-agent step error from %s (step %d) as fallback", lastStep.stepKey, lastStep.num) return []ErrorInfo{{ Type: "step_failure", File: lastStep.stepKey, diff --git a/pkg/cli/audit_report_test.go b/pkg/cli/audit_report_test.go index 868b4f3e68..3397bd318a 100644 --- a/pkg/cli/audit_report_test.go +++ b/pkg/cli/audit_report_test.go @@ -1501,6 +1501,64 @@ func TestExtractPreAgentStepErrors(t *testing.T) { assert.True(t, strings.HasSuffix(errors[0].Message, "..."), "Truncated message should end with ellipsis") }) + t.Run("prioritizes ##[error] annotations over last step fallback", func(t *testing.T) { + dir := testutil.TempDir(t, "audit-step-*") + workflowLogsDir := filepath.Join(dir, "workflow-logs", "agent") + require.NoError(t, os.MkdirAll(workflowLogsDir, 0755)) + // Step 3 has a ##[error] annotation (the real failure) + lockdownLog := "2026-02-23T23:46:10.9523559Z ##[error]Lockdown mode is enabled (lockdown: true) but no custom GitHub token is configured.\n2026-02-23T23:46:10.9523560Z Please configure GH_AW_GITHUB_TOKEN" + require.NoError(t, os.WriteFile(filepath.Join(workflowLogsDir, "3_Validate lockdown mode.txt"), []byte(lockdownLog), 0600)) + // Step 15 is the "Complete job" step with unrelated cleanup content (higher step number) + completeJobLog := "2026-02-23T23:46:13.5790741Z Evaluate and set job outputs\n2026-02-23T23:46:13.5790742Z Set output 'checkout_pr_success'\n2026-02-23T23:46:13.5790743Z Set output 'has_patch'" + require.NoError(t, os.WriteFile(filepath.Join(workflowLogsDir, "15_Complete job.txt"), []byte(completeJobLog), 0600)) + + errors := extractPreAgentStepErrors(dir) + require.NotNil(t, errors, "Should return errors from ##[error] annotations") + require.Len(t, errors, 1, "Should return one error for the step with ##[error]") + assert.Equal(t, "agent/Validate lockdown mode", errors[0].File, "Should reference the step with ##[error], not Complete job") + assert.Contains(t, errors[0].Message, "Lockdown mode is enabled", "Message should contain the actual ##[error] annotation content") + assert.NotContains(t, errors[0].Message, "Evaluate and set job outputs", "Message should not contain Complete job cleanup content") + assert.NotContains(t, errors[0].Message, "2026-02-23T", "Should strip GHA timestamps from ##[error] lines") + }) + + t.Run("returns ##[error] annotations from multiple steps", func(t *testing.T) { + dir := testutil.TempDir(t, "audit-step-*") + workflowLogsDir := filepath.Join(dir, "workflow-logs", "agent") + require.NoError(t, os.MkdirAll(workflowLogsDir, 0755)) + // Two steps each with ##[error] annotations + require.NoError(t, os.WriteFile(filepath.Join(workflowLogsDir, "3_Step A.txt"), + []byte("2024-01-01T00:00:01Z ##[error]First error"), 0600)) + require.NoError(t, os.WriteFile(filepath.Join(workflowLogsDir, "5_Step B.txt"), + []byte("2024-01-01T00:00:02Z ##[error]Second error"), 0600)) + // Higher-numbered step with no ##[error] + require.NoError(t, os.WriteFile(filepath.Join(workflowLogsDir, "10_Complete job.txt"), + []byte("Cleanup content"), 0600)) + + errors := extractPreAgentStepErrors(dir) + require.NotNil(t, errors, "Should return errors") + assert.Len(t, errors, 2, "Should return one ErrorInfo per step with ##[error] annotations") + // All returned errors should be from steps with ##[error], not the cleanup step + for _, e := range errors { + assert.NotEqual(t, "agent/Complete job", e.File, "Should not include cleanup step in errors") + } + }) + + t.Run("falls back to last step when no ##[error] annotations exist", func(t *testing.T) { + dir := testutil.TempDir(t, "audit-step-*") + workflowLogsDir := filepath.Join(dir, "workflow-logs", "agent") + require.NoError(t, os.MkdirAll(workflowLogsDir, 0755)) + // Step 3 has non-annotated error content + require.NoError(t, os.WriteFile(filepath.Join(workflowLogsDir, "3_Some step.txt"), []byte("Some content"), 0600)) + // Step 7 is the last step and has the actual failure (no ##[error] prefix) + require.NoError(t, os.WriteFile(filepath.Join(workflowLogsDir, "7_Failing step.txt"), []byte("Error: installation failed"), 0600)) + + errors := extractPreAgentStepErrors(dir) + require.NotNil(t, errors, "Should fall back to last step content") + require.Len(t, errors, 1, "Fallback should return one error") + assert.Equal(t, "agent/Failing step", errors[0].File, "Fallback should use the last step") + assert.Contains(t, errors[0].Message, "Error: installation failed", "Fallback should use last step content") + }) + t.Run("builds error summary from step errors in buildAuditData", func(t *testing.T) { dir := testutil.TempDir(t, "audit-step-*") // No agent-stdio.log