From abf14b321542c5e8bf4770dadb458dacd1bd771f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 22 Feb 2026 00:03:26 +0000 Subject: [PATCH 1/2] Initial plan From 66166ab9f896112b5dc21b239aaa5e806e3f7860 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 22 Feb 2026 00:24:55 +0000 Subject: [PATCH 2/2] fix(audit): surface pre-agent step errors in failure analysis when agent never ran When a workflow run fails in a pre-agent step (e.g., lockdown validation, missing secrets), the audit tool now extracts the actual error message from the step log files in workflow-logs/{job}/{step_num}_{step_name}.txt instead of reporting "No specific errors identified". Also improves the error message for invalid run IDs to provide a human-readable message instead of the cryptic "exit status 1". Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/cli/audit.go | 4 + pkg/cli/audit_report.go | 149 ++++++++++++++++++++++++++ pkg/cli/audit_report_test.go | 201 +++++++++++++++++++++++++++++++++++ 3 files changed, 354 insertions(+) diff --git a/pkg/cli/audit.go b/pkg/cli/audit.go index 7b5c9e19e8..66276147c4 100644 --- a/pkg/cli/audit.go +++ b/pkg/cli/audit.go @@ -632,6 +632,10 @@ func fetchWorkflowRunMetadata(runID int64, owner, repo, hostname string, verbose if verbose { fmt.Fprintln(os.Stderr, console.FormatVerboseMessage(string(output))) } + // Provide a human-readable error when the run ID doesn't exist + if strings.Contains(string(output), "Not Found") || strings.Contains(string(output), "404") { + return WorkflowRun{}, fmt.Errorf("workflow run %d not found. Please verify the run ID is correct and that you have access to the repository", runID) + } return WorkflowRun{}, fmt.Errorf("failed to fetch run metadata: %w", err) } diff --git a/pkg/cli/audit_report.go b/pkg/cli/audit_report.go index adac2785f4..343c3885ac 100644 --- a/pkg/cli/audit_report.go +++ b/pkg/cli/audit_report.go @@ -5,6 +5,7 @@ import ( "encoding/json" "os" "path/filepath" + "strconv" "strings" "time" @@ -261,6 +262,14 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage var errors []ErrorInfo var warnings []ErrorInfo + // For failed workflows where the agent never ran (no agent-stdio.log), + // extract errors from step log files to surface the actual failure reason. + if run.Conclusion == "failure" && run.LogsPath != "" { + if stepErrors := extractPreAgentStepErrors(run.LogsPath); len(stepErrors) > 0 { + errors = stepErrors + } + } + // Build tool usage var toolUsage []ToolUsageInfo toolStats := make(map[string]*ToolUsageInfo) @@ -498,4 +507,144 @@ func parseDurationString(s string) time.Duration { return d } +// extractPreAgentStepErrors scans workflow step log files for failure content when the +// agent never executed (no agent-stdio.log present). This surfaces errors from pre-agent +// steps such as lockdown validation, binary installation, or repository checkout failures. +// +// Step log files are stored in workflow-logs/{job}/{step_num}_{step_name}.txt after +// downloading via downloadWorkflowRunLogs. The function finds the last step that ran +// (highest step number) as that is most likely the step that caused the failure. +func extractPreAgentStepErrors(logsPath string) []ErrorInfo { + // If agent-stdio.log exists, the agent ran - don't scan step logs + agentStdioPath := filepath.Join(logsPath, "agent-stdio.log") + if _, err := os.Stat(agentStdioPath); err == nil { + auditReportLog.Printf("agent-stdio.log found, skipping pre-agent step error extraction") + return nil + } + + // Look for step log files in workflow-logs subdirectory + workflowLogsDir := filepath.Join(logsPath, "workflow-logs") + if _, err := os.Stat(workflowLogsDir); err != nil { + auditReportLog.Printf("workflow-logs directory not found, skipping step log extraction") + return nil + } + + // Find the last step log file by scanning job subdirectories. + // GitHub Actions log zip structure: {job_name}/{step_num}_{step_name}.txt + type stepLog struct { + path string + num int + stepKey string // job/step_name for display + } + + var lastStep *stepLog + + jobDirs, err := os.ReadDir(workflowLogsDir) + if err != nil { + return nil + } + + for _, jobEntry := range jobDirs { + if !jobEntry.IsDir() { + continue + } + jobDir := filepath.Join(workflowLogsDir, jobEntry.Name()) + stepFiles, err := os.ReadDir(jobDir) + if err != nil { + continue + } + for _, stepFile := range stepFiles { + if stepFile.IsDir() || !strings.HasSuffix(stepFile.Name(), ".txt") { + continue + } + num, stepName := parseStepFilename(stepFile.Name()) + if num > 0 && (lastStep == nil || num > lastStep.num) { + lastStep = &stepLog{ + path: filepath.Join(jobDir, stepFile.Name()), + num: num, + stepKey: jobEntry.Name() + "/" + stepName, + } + } + } + } + + if lastStep == nil { + auditReportLog.Printf("No step log files found in %s", workflowLogsDir) + return nil + } + + content, err := os.ReadFile(lastStep.path) + if err != nil { + auditReportLog.Printf("Failed to read step log %s: %v", lastStep.path, err) + return nil + } + + message := stripGHALogTimestamps(strings.TrimSpace(string(content))) + if message == "" { + return nil + } + + // Truncate to a reasonable size for the error summary + const maxMessageLen = 1500 + if len(message) > maxMessageLen { + message = message[:maxMessageLen] + "..." + } + + auditReportLog.Printf("Extracted pre-agent step error from %s (step %d)", lastStep.stepKey, lastStep.num) + return []ErrorInfo{{ + Type: "step_failure", + File: lastStep.stepKey, + Message: message, + }} +} + +// parseStepFilename extracts the step number and name from a GitHub Actions step log +// filename in the format "{step_num}_{step_name}.txt" (e.g. "12_Validate lockdown mode.txt"). +// Returns (0, filename) if the filename does not match the expected format. +func parseStepFilename(filename string) (int, string) { + base := strings.TrimSuffix(filename, ".txt") + idx := strings.IndexByte(base, '_') + if idx <= 0 { + return 0, base + } + num, err := strconv.Atoi(base[:idx]) + if err != nil { + return 0, base + } + return num, base[idx+1:] +} + +// stripGHALogTimestamps removes GitHub Actions timestamp prefixes from each line of a log. +// GitHub Actions step log files prefix each line with an RFC3339 timestamp followed by a space, +// e.g. "2024-01-01T10:00:00.1234567Z message here". This function strips those prefixes so the +// returned string contains only the actual log content. +func stripGHALogTimestamps(content string) string { + lines := strings.Split(content, "\n") + stripped := make([]string, 0, len(lines)) + for _, line := range lines { + // GHA timestamp format: YYYY-MM-DDTHH:MM:SS[.sss...]Z + // The 'T' separator is always at position 10. Search for the terminating 'Z' after 'T' + // in a generous window (positions 11-35) to handle any fractional seconds length. + if len(line) > 19 && line[4] == '-' && line[7] == '-' && line[10] == 'T' { + // Find the Z that ends the timestamp within a reasonable range + searchBound := 35 + if searchBound > len(line) { + searchBound = len(line) + } + if zIdx := strings.IndexByte(line[11:searchBound], 'Z'); zIdx >= 0 { + zPos := 11 + zIdx + if zPos+1 <= len(line) { + line = line[zPos+1:] + // Skip leading space after the timestamp + if len(line) > 0 && line[0] == ' ' { + line = line[1:] + } + } + } + } + stripped = append(stripped, line) + } + return strings.Join(stripped, "\n") +} + // renderJSON outputs the audit data as JSON diff --git a/pkg/cli/audit_report_test.go b/pkg/cli/audit_report_test.go index 02267ae0fc..868b4f3e68 100644 --- a/pkg/cli/audit_report_test.go +++ b/pkg/cli/audit_report_test.go @@ -1326,3 +1326,204 @@ not-valid-json assert.Nil(t, items, "should return nil for empty manifest") }) } + +func TestParseStepFilename(t *testing.T) { + tests := []struct { + name string + filename string + expectedNum int + expectedName string + }{ + { + name: "typical step filename", + filename: "12_Validate lockdown mode requirements.txt", + expectedNum: 12, + expectedName: "Validate lockdown mode requirements", + }, + { + name: "single digit step", + filename: "1_Set up job.txt", + expectedNum: 1, + expectedName: "Set up job", + }, + { + name: "step with underscores in name", + filename: "5_Run_shell_script.txt", + expectedNum: 5, + expectedName: "Run_shell_script", + }, + { + name: "no underscore separator", + filename: "nomatch.txt", + expectedNum: 0, + expectedName: "nomatch", + }, + { + name: "non-numeric prefix", + filename: "abc_Step name.txt", + expectedNum: 0, + expectedName: "abc_Step name", + }, + { + name: "leading underscore only", + filename: "_Step name.txt", + expectedNum: 0, + expectedName: "_Step name", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + num, name := parseStepFilename(tt.filename) + assert.Equal(t, tt.expectedNum, num, "Step number should match") + assert.Equal(t, tt.expectedName, name, "Step name should match") + }) + } +} + +func TestStripGHALogTimestamps(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "typical GHA log line with timestamp", + input: "2024-01-15T10:30:45.1234567Z This is the log message", + expected: "This is the log message", + }, + { + name: "short fractional seconds", + input: "2024-01-15T10:30:45.123Z Short fractional message", + expected: "Short fractional message", + }, + { + name: "no fractional seconds", + input: "2024-01-15T10:30:45Z No fractional message", + expected: "No fractional message", + }, + { + name: "multiple lines with timestamps", + input: "2024-01-15T10:30:45.1234567Z Line one\n2024-01-15T10:30:46.9876543Z Line two", + expected: "Line one\nLine two", + }, + { + name: "line without timestamp unchanged", + input: "This line has no timestamp", + expected: "This line has no timestamp", + }, + { + name: "mixed lines", + input: "2024-01-15T10:30:45.1234567Z Timestamped\nNot timestamped", + expected: "Timestamped\nNot timestamped", + }, + { + name: "empty string", + input: "", + expected: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := stripGHALogTimestamps(tt.input) + assert.Equal(t, tt.expected, result, "Stripped content should match expected") + }) + } +} + +func TestExtractPreAgentStepErrors(t *testing.T) { + t.Run("returns nil when agent-stdio.log exists", func(t *testing.T) { + dir := testutil.TempDir(t, "audit-step-*") + // Create agent-stdio.log to indicate agent ran + require.NoError(t, os.WriteFile(filepath.Join(dir, "agent-stdio.log"), []byte("agent output"), 0600)) + // Create workflow-logs with a step log that has content + workflowLogsDir := filepath.Join(dir, "workflow-logs", "agent") + require.NoError(t, os.MkdirAll(workflowLogsDir, 0755)) + require.NoError(t, os.WriteFile(filepath.Join(workflowLogsDir, "12_Validate lockdown mode requirements.txt"), []byte("Error: lockdown failed"), 0600)) + + errors := extractPreAgentStepErrors(dir) + assert.Nil(t, errors, "Should return nil when agent-stdio.log exists") + }) + + t.Run("returns nil when workflow-logs directory missing", func(t *testing.T) { + dir := testutil.TempDir(t, "audit-step-*") + // No agent-stdio.log and no workflow-logs directory + errors := extractPreAgentStepErrors(dir) + assert.Nil(t, errors, "Should return nil when no workflow-logs directory") + }) + + t.Run("extracts error from last step log file", func(t *testing.T) { + dir := testutil.TempDir(t, "audit-step-*") + // No agent-stdio.log + workflowLogsDir := filepath.Join(dir, "workflow-logs", "agent") + require.NoError(t, os.MkdirAll(workflowLogsDir, 0755)) + // Create multiple step logs - last one should be selected + require.NoError(t, os.WriteFile(filepath.Join(workflowLogsDir, "1_Set up job.txt"), []byte("Setup complete"), 0600)) + require.NoError(t, os.WriteFile(filepath.Join(workflowLogsDir, "5_Install binary.txt"), []byte("Install complete"), 0600)) + lockdownErr := "Lockdown mode is enabled (lockdown: true) but no custom GitHub token is configured.\n\nPlease configure one of the following as a repository secret:\n - GH_AW_GITHUB_TOKEN (recommended)" + require.NoError(t, os.WriteFile(filepath.Join(workflowLogsDir, "12_Validate lockdown mode requirements.txt"), []byte(lockdownErr), 0600)) + + errors := extractPreAgentStepErrors(dir) + require.NotNil(t, errors, "Should return errors from step logs") + require.Len(t, errors, 1, "Should return exactly one error info") + assert.Equal(t, "step_failure", errors[0].Type, "Error type should be step_failure") + assert.Equal(t, "agent/Validate lockdown mode requirements", errors[0].File, "File should include job and step name") + assert.Contains(t, errors[0].Message, "Lockdown mode is enabled", "Message should contain lockdown error text") + assert.Contains(t, errors[0].Message, "GH_AW_GITHUB_TOKEN", "Message should contain token suggestion") + }) + + t.Run("strips GHA timestamps from step log content", func(t *testing.T) { + dir := testutil.TempDir(t, "audit-step-*") + workflowLogsDir := filepath.Join(dir, "workflow-logs", "agent") + require.NoError(t, os.MkdirAll(workflowLogsDir, 0755)) + // Step log with GHA-style timestamps + logContent := "2024-01-15T10:30:45.1234567Z Error: something went wrong\n2024-01-15T10:30:46.9876543Z Please check your configuration" + require.NoError(t, os.WriteFile(filepath.Join(workflowLogsDir, "3_Run validation.txt"), []byte(logContent), 0600)) + + errors := extractPreAgentStepErrors(dir) + require.NotNil(t, errors, "Should extract errors from step log") + assert.Contains(t, errors[0].Message, "Error: something went wrong", "Should strip timestamps") + assert.NotContains(t, errors[0].Message, "2024-01-15T", "Should not contain timestamp prefix") + }) + + t.Run("truncates long step log content", func(t *testing.T) { + dir := testutil.TempDir(t, "audit-step-*") + workflowLogsDir := filepath.Join(dir, "workflow-logs", "agent") + require.NoError(t, os.MkdirAll(workflowLogsDir, 0755)) + // Create a very long log message + longContent := strings.Repeat("This is a very long error message. ", 100) + require.NoError(t, os.WriteFile(filepath.Join(workflowLogsDir, "7_Long step.txt"), []byte(longContent), 0600)) + + errors := extractPreAgentStepErrors(dir) + require.NotNil(t, errors, "Should extract errors from long step log") + assert.LessOrEqual(t, len(errors[0].Message), 1503, "Message should be truncated to maxMessageLen (1500) + len(\"...\") (3)") + assert.True(t, strings.HasSuffix(errors[0].Message, "..."), "Truncated message should end with ellipsis") + }) + + t.Run("builds error summary from step errors in buildAuditData", func(t *testing.T) { + dir := testutil.TempDir(t, "audit-step-*") + // No agent-stdio.log + workflowLogsDir := filepath.Join(dir, "workflow-logs", "agent") + require.NoError(t, os.MkdirAll(workflowLogsDir, 0755)) + require.NoError(t, os.WriteFile(filepath.Join(workflowLogsDir, "12_Validate lockdown mode.txt"), + []byte("Lockdown mode is enabled but no token configured"), 0600)) + + run := WorkflowRun{ + DatabaseID: 123, + Conclusion: "failure", + LogsPath: dir, + WorkflowName: "Test Workflow", + } + processedRun := ProcessedRun{Run: run} + metrics := LogMetrics{} + + data := buildAuditData(processedRun, metrics, nil) + + require.NotNil(t, data.FailureAnalysis, "Should have failure analysis for failed run") + assert.NotEqual(t, "No specific errors identified", data.FailureAnalysis.ErrorSummary, + "Error summary should be extracted from step logs, not the default message") + assert.Contains(t, data.FailureAnalysis.ErrorSummary, "Lockdown mode is enabled", + "Error summary should contain the actual error from the step log") + }) +}