diff --git a/pkg/cli/add_command.go b/pkg/cli/add_command.go index 1f2ae6dd8f..75dd15bd7c 100644 --- a/pkg/cli/add_command.go +++ b/pkg/cli/add_command.go @@ -457,6 +457,14 @@ func addWorkflowWithTracking(resolved *ResolvedWorkflow, tracker *FileTracker, o fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to fetch include dependencies: %v", err))) } } + // Also fetch and save frontmatter 'imports:' dependencies so they are available + // locally during compilation. Keeping these as relative paths (not workflowspecs) + // ensures the compiler resolves them from disk rather than downloading from GitHub. + if err := fetchAndSaveRemoteFrontmatterImports(string(sourceContent), workflowSpec, githubWorkflowsDir, opts.Verbose, opts.Force, tracker); err != nil { + if opts.Verbose { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to fetch frontmatter import dependencies: %v", err))) + } + } } else if sourceInfo != nil && sourceInfo.IsLocal { // For local workflows, collect and copy include dependencies from local paths // The source directory is derived from the workflow's path @@ -501,15 +509,9 @@ func addWorkflowWithTracking(resolved *ResolvedWorkflow, tracker *FileTracker, o content = updatedContent } - // Process imports field and replace with workflowspec - processedImportsContent, err := processImportsWithWorkflowSpec(content, workflowSpec, commitSHA, opts.Verbose) - if err != nil { - if opts.Verbose { - fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to process imports: %v", err))) - } - } else { - content = processedImportsContent - } + // Note: frontmatter 'imports:' are intentionally kept as relative paths here. + // fetchAndSaveRemoteFrontmatterImports already downloaded those files locally, so + // the compiler can resolve them from disk without any GitHub API calls. // Process @include directives and replace with workflowspec // For local workflows, use the workflow's directory as the base path diff --git a/pkg/cli/remote_workflow.go b/pkg/cli/remote_workflow.go index 061b594891..1ac587a881 100644 --- a/pkg/cli/remote_workflow.go +++ b/pkg/cli/remote_workflow.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "os" + "path" "path/filepath" "regexp" "strings" @@ -243,6 +244,231 @@ func FetchIncludeFromSource(includePath string, baseSpec *WorkflowSpec, verbose return nil, section, fmt.Errorf("cannot resolve include path: %s (no base spec provided)", includePath) } +// fetchAndSaveRemoteFrontmatterImports fetches and saves files referenced in the frontmatter +// 'imports:' field of a remote workflow. These relative-path imports are resolved against +// the workflow's location in the source repository and saved locally so compilation can find them. +// This is analogous to fetchAndSaveRemoteIncludes, which handles @include directives in the +// markdown body; this function handles the YAML frontmatter 'imports:' field. +// Import failures are non-fatal (best-effort); the compiler will report any still-missing files. +func fetchAndSaveRemoteFrontmatterImports(content string, spec *WorkflowSpec, targetDir string, verbose bool, force bool, tracker *FileTracker) error { + if spec.RepoSlug == "" { + return nil + } + + parts := strings.SplitN(spec.RepoSlug, "/", 2) + if len(parts) != 2 { + return nil + } + owner, repo := parts[0], parts[1] + ref := spec.Version + if ref == "" { + // Resolve the actual default branch of the source repo rather than assuming "main" + defaultBranch, err := getRepoDefaultBranch(spec.RepoSlug) + if err != nil { + remoteWorkflowLog.Printf("Failed to resolve default branch for %s, falling back to 'main': %v", spec.RepoSlug, err) + ref = "main" + } else { + ref = defaultBranch + } + // Persist the resolved default ref so other callers do not need to re-resolve it + spec.Version = ref + } + + // workflowBaseDir is the directory of the top-level workflow in the source repo + // (e.g. ".github/workflows"). It serves as both the starting point for resolving + // relative imports and as the prefix to strip when computing local target paths. + workflowBaseDir := getParentDir(spec.WorkflowPath) + + // seen is keyed by fully-resolved remote file path. It is shared across all recursion + // levels so that every import (at any depth) is downloaded at most once and import + // cycles (A imports B, B imports A) are broken without infinite recursion. + seen := make(map[string]bool) + fetchFrontmatterImportsRecursive(content, owner, repo, ref, workflowBaseDir, workflowBaseDir, targetDir, verbose, force, tracker, seen) + return nil +} + +// fetchFrontmatterImportsRecursive is the internal worker for fetchAndSaveRemoteFrontmatterImports. +// +// Parameters that change per recursion level: +// - content: the text of the file whose imports are being processed +// - currentBaseDir: directory of that file inside the source repo (used to resolve relative paths) +// +// Parameters that remain constant across all recursion levels: +// - owner, repo, ref: source repository coordinates +// - originalBaseDir: directory of the top-level workflow (used to map remote paths → local paths) +// - targetDir: the `.github/workflows` directory in the user's repo +// - seen: shared visited set (keyed by fully-resolved remote path) — prevents cycles & duplicates +func fetchFrontmatterImportsRecursive(content, owner, repo, ref, currentBaseDir, originalBaseDir, targetDir string, verbose, force bool, tracker *FileTracker, seen map[string]bool) { + result, err := parser.ExtractFrontmatterFromContent(content) + if err != nil || result.Frontmatter == nil { + return + } + + importsField, exists := result.Frontmatter["imports"] + if !exists { + return + } + + var importPaths []string + switch v := importsField.(type) { + case []any: + for _, item := range v { + if s, ok := item.(string); ok { + importPaths = append(importPaths, s) + } + } + case []string: + importPaths = v + } + + if len(importPaths) == 0 { + return + } + + // Pre-compute the absolute target directory once for path-traversal boundary checks. + absTargetDir, err := filepath.Abs(targetDir) + if err != nil { + return + } + + for _, importPath := range importPaths { + // Skip workflowspec-format imports (already pinned to a remote ref) + if isWorkflowSpecFormat(importPath) { + continue + } + + // Strip any section reference (file.md#Section → file.md) + filePath := importPath + if before, _, hasSec := strings.Cut(importPath, "#"); hasSec { + filePath = before + } + if filePath == "" { + continue + } + + // Resolve the remote file path relative to the current file's directory. + // Use path (not filepath) because this is always a forward-slash URL/API path. + var remoteFilePath string + if rest, ok := strings.CutPrefix(filePath, "/"); ok { + // Absolute path from repo root (e.g. "/scripts/helper.md") + remoteFilePath = rest + } else if currentBaseDir != "" { + remoteFilePath = path.Join(currentBaseDir, filePath) + } else { + remoteFilePath = filePath + } + remoteFilePath = path.Clean(remoteFilePath) + + // Reject paths that try to escape the repository root (e.g. "../../etc/passwd") + if remoteFilePath == ".." || strings.HasPrefix(remoteFilePath, "../") { + if verbose { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Skipping import with unsafe path: %q", importPath))) + } + continue + } + + // Cycle/duplicate prevention: use the fully-resolved remote path as the key. + if seen[remoteFilePath] { + continue + } + seen[remoteFilePath] = true + + // Derive the local path relative to targetDir by stripping the original base-dir + // prefix from the remote path. This ensures that imports in nested files resolve + // to the correct location regardless of how many levels deep the recursion goes. + // + // Example: originalBaseDir=".github/workflows" + // remoteFilePath=".github/workflows/shared/analysis.md" → localRelPath="shared/analysis.md" + // (nested) remoteFilePath=".github/workflows/other.md" → localRelPath="other.md" + var localRelPath string + if originalBaseDir != "" && strings.HasPrefix(remoteFilePath, originalBaseDir+"/") { + localRelPath = remoteFilePath[len(originalBaseDir)+1:] + } else { + // Workflow at repo root, or import outside the original base dir: + // use the full remote path relative to targetDir. + localRelPath = remoteFilePath + } + localRelPath = filepath.Clean(filepath.FromSlash(localRelPath)) + // Strip any leading separator produced by Clean on root-relative paths. + localRelPath = strings.TrimLeft(localRelPath, string(filepath.Separator)) + // Reject empty or "." paths (would point to targetDir itself) as a safety guard. + // ".." cannot appear here because remoteFilePath was already rejected above if it + // started with "..", and path.Clean cannot introduce new ".." components. + if localRelPath == "" || localRelPath == "." { + continue + } + targetPath := filepath.Join(targetDir, localRelPath) + + // Belt-and-suspenders: verify the resolved path is inside targetDir + absTargetPath, absErr := filepath.Abs(targetPath) + if absErr != nil { + continue + } + if rel, relErr := filepath.Rel(absTargetDir, absTargetPath); relErr != nil || strings.HasPrefix(rel, "..") { + if verbose { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Refusing to write import outside target directory: %q", importPath))) + } + continue + } + + // Check existence before downloading: if the file already exists and force=false, + // skip the download entirely (no unnecessary network round-trip). + fileExists := false + if _, statErr := os.Stat(targetPath); statErr == nil { + fileExists = true + if !force { + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage("Import file already exists, skipping: "+targetPath)) + } + continue + } + } + + // Download from the source repository + importContent, err := parser.DownloadFileFromGitHub(owner, repo, remoteFilePath, ref) + if err != nil { + if verbose { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to fetch import %s: %v", remoteFilePath, err))) + } + continue + } + + // Create the parent directory if needed + if err := os.MkdirAll(filepath.Dir(targetPath), 0755); err != nil { + if verbose { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to create directory for import %s: %v", remoteFilePath, err))) + } + continue + } + + // Write the file + if err := os.WriteFile(targetPath, importContent, 0600); err != nil { + if verbose { + fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to write import %s: %v", remoteFilePath, err))) + } + continue + } + + if verbose { + fmt.Fprintln(os.Stderr, console.FormatSuccessMessage("Fetched import: "+targetPath)) + } + + // Track the file for git staging and potential rollback + if tracker != nil { + if fileExists { + tracker.TrackModified(targetPath) + } else { + tracker.TrackCreated(targetPath) + } + } + + // Recurse into the imported file's imports. Use the imported file's directory as + // currentBaseDir so that relative paths inside it resolve correctly. + importedBaseDir := path.Dir(remoteFilePath) + fetchFrontmatterImportsRecursive(string(importContent), owner, repo, ref, importedBaseDir, originalBaseDir, targetDir, verbose, force, tracker, seen) + } +} + // fetchAndSaveRemoteIncludes parses the workflow content for @include directives and fetches them from the remote source func fetchAndSaveRemoteIncludes(content string, spec *WorkflowSpec, targetDir string, verbose bool, force bool, tracker *FileTracker) error { remoteWorkflowLog.Printf("Fetching remote includes for workflow: %s", spec.String()) diff --git a/pkg/cli/remote_workflow_test.go b/pkg/cli/remote_workflow_test.go index 61eea95a3a..f03dd36efb 100644 --- a/pkg/cli/remote_workflow_test.go +++ b/pkg/cli/remote_workflow_test.go @@ -3,6 +3,7 @@ package cli import ( + "fmt" "os" "path/filepath" "testing" @@ -288,3 +289,269 @@ func TestGetParentDir(t *testing.T) { }) } } + +// TestFetchAndSaveRemoteFrontmatterImports_NoImports verifies that the function +// is a no-op when the workflow has no imports field. +func TestFetchAndSaveRemoteFrontmatterImports_NoImports(t *testing.T) { + content := `--- +engine: copilot +--- + +# Workflow with no imports +` + spec := &WorkflowSpec{ + RepoSpec: RepoSpec{ + RepoSlug: "github/gh-aw", + Version: "main", + }, + WorkflowPath: ".github/workflows/ci-coach.md", + } + + tmpDir := t.TempDir() + err := fetchAndSaveRemoteFrontmatterImports(content, spec, tmpDir, false, false, nil) + require.NoError(t, err, "should not error when no imports are present") + + // No files should have been created + entries, readErr := os.ReadDir(tmpDir) + require.NoError(t, readErr) + assert.Empty(t, entries, "no files should be created when no imports are present") +} + +// TestFetchAndSaveRemoteFrontmatterImports_EmptyRepoSlug verifies that the function +// is a no-op when the spec has no remote repo (local workflow). +func TestFetchAndSaveRemoteFrontmatterImports_EmptyRepoSlug(t *testing.T) { + content := `--- +engine: copilot +imports: + - shared/ci-data-analysis.md +--- + +# Workflow +` + spec := &WorkflowSpec{ + RepoSpec: RepoSpec{ + RepoSlug: "", // local workflow – no remote repo + }, + WorkflowPath: ".github/workflows/ci-coach.md", + } + + tmpDir := t.TempDir() + err := fetchAndSaveRemoteFrontmatterImports(content, spec, tmpDir, false, false, nil) + require.NoError(t, err, "should not error for local workflow with empty RepoSlug") + + entries, readErr := os.ReadDir(tmpDir) + require.NoError(t, readErr) + assert.Empty(t, entries, "no files should be created for local workflows") +} + +// TestFetchAndSaveRemoteFrontmatterImports_WorkflowSpecSkipped verifies that imports +// that are already in workflowspec format (owner/repo/path@ref) are skipped. +func TestFetchAndSaveRemoteFrontmatterImports_WorkflowSpecSkipped(t *testing.T) { + content := `--- +engine: copilot +imports: + - github/gh-aw/.github/workflows/shared/ci-data-analysis.md@abc123 +--- + +# Workflow +` + spec := &WorkflowSpec{ + RepoSpec: RepoSpec{ + RepoSlug: "github/gh-aw", + Version: "main", + }, + WorkflowPath: ".github/workflows/ci-coach.md", + } + + tmpDir := t.TempDir() + // This should not attempt any network calls; already-pinned imports are skipped. + err := fetchAndSaveRemoteFrontmatterImports(content, spec, tmpDir, false, false, nil) + require.NoError(t, err, "should not error for workflowspec imports") + + entries, readErr := os.ReadDir(tmpDir) + require.NoError(t, readErr) + assert.Empty(t, entries, "already-pinned workflowspec imports should not be downloaded") +} + +// TestFetchAndSaveRemoteFrontmatterImports_NoImportsNoOpTracker verifies that a workflow with +// no imports field leaves the FileTracker completely untouched. +func TestFetchAndSaveRemoteFrontmatterImports_NoImportsNoOpTracker(t *testing.T) { + // Build a minimal FileTracker without calling NewFileTracker (which requires a real + // git repository). We only need the tracking lists populated. + tracker := &FileTracker{ + OriginalContent: make(map[string][]byte), + gitRoot: t.TempDir(), + } + + content := `--- +engine: copilot +--- + +# No imports +` + spec := &WorkflowSpec{ + RepoSpec: RepoSpec{ + RepoSlug: "github/gh-aw", + Version: "v1.0.0", + }, + WorkflowPath: ".github/workflows/test.md", + } + + err := fetchAndSaveRemoteFrontmatterImports(content, spec, tracker.gitRoot, false, false, tracker) + require.NoError(t, err) + assert.Empty(t, tracker.CreatedFiles, "no files should be created when there are no imports") + assert.Empty(t, tracker.ModifiedFiles, "no files should be modified when there are no imports") +} + +// TestFetchAndSaveRemoteFrontmatterImports_SectionStrippedDedup verifies that two imports +// pointing to the same file via different #section fragments are treated as one file +// (deduplication via the shared seen set). +func TestFetchAndSaveRemoteFrontmatterImports_SectionStrippedDedup(t *testing.T) { + // Both imports resolve to the same base file after stripping the #section fragment. + // The first triggers a (failing) download attempt; the second is deduplicated and never + // even reaches the download step. Both use relative paths so the workflowspec-format + // skip path is not taken. + content := `--- +engine: copilot +imports: + - shared/reporting.md#SectionA + - shared/reporting.md#SectionB +--- + +# Workflow +` + spec := &WorkflowSpec{ + RepoSpec: RepoSpec{ + RepoSlug: "github/gh-aw", + Version: "v1.0.0", + }, + WorkflowPath: ".github/workflows/ci-coach.md", + } + + tmpDir := t.TempDir() + // No network in unit tests: the download attempt for the first import will fail silently + // (verbose=false). The second import must be deduplicated without a second download. + err := fetchAndSaveRemoteFrontmatterImports(content, spec, tmpDir, false, false, nil) + require.NoError(t, err, "section-fragment deduplication should not error") + + entries, readErr := os.ReadDir(tmpDir) + require.NoError(t, readErr) + assert.Empty(t, entries, "no files should be written (download fails in unit tests)") +} + +// TestFetchAndSaveRemoteFrontmatterImports_SkipExistingWithoutForce verifies that a relative +// import whose target file already exists on disk is skipped (not re-downloaded) when force=false. +// Because the existence check happens before the download, this test requires no network access. +func TestFetchAndSaveRemoteFrontmatterImports_SkipExistingWithoutForce(t *testing.T) { + tmpDir := t.TempDir() + sharedDir := filepath.Join(tmpDir, "shared") + require.NoError(t, os.MkdirAll(sharedDir, 0755)) + existingContent := []byte("existing content") + existingFile := filepath.Join(sharedDir, "ci-data-analysis.md") + require.NoError(t, os.WriteFile(existingFile, existingContent, 0600)) + + tracker := &FileTracker{ + OriginalContent: make(map[string][]byte), + gitRoot: tmpDir, + } + + // Relative import: resolves to tmpDir/shared/ci-data-analysis.md which already exists. + // With force=false, the function detects the file via os.Stat *before* attempting a + // download, so no network call is made and the file is preserved unchanged. + content := `--- +engine: copilot +imports: + - shared/ci-data-analysis.md +--- +# Workflow +` + spec := &WorkflowSpec{ + RepoSpec: RepoSpec{ + RepoSlug: "github/gh-aw", + Version: "v1.0.0", + }, + WorkflowPath: ".github/workflows/ci-coach.md", + } + + err := fetchAndSaveRemoteFrontmatterImports(content, spec, tmpDir, false, false, tracker) + require.NoError(t, err) + + // The existing file must be untouched and not added to the tracker. + gotContent, readErr := os.ReadFile(existingFile) + require.NoError(t, readErr) + assert.Equal(t, existingContent, gotContent, "pre-existing file must not be modified when force=false") + assert.Empty(t, tracker.CreatedFiles, "pre-existing file must not appear in CreatedFiles") + assert.Empty(t, tracker.ModifiedFiles, "pre-existing file must not appear in ModifiedFiles") +} + +// TestFetchAndSaveRemoteFrontmatterImports_PathTraversal verifies that import paths that +// attempt to escape the repository root via ".." sequences are rejected by the +// remoteFilePath safety check (not just because of a download failure). +// The workflow is placed at the repo root (WorkflowPath="ci-coach.md") so that +// workflowBaseDir="" and path.Join("", "../etc/passwd") = "../etc/passwd", which +// triggers the explicit ".." rejection before any network call. +func TestFetchAndSaveRemoteFrontmatterImports_PathTraversal(t *testing.T) { + tests := []struct { + name string + importPath string + }{ + {name: "parent directory traversal", importPath: "../etc/passwd"}, + {name: "deep traversal", importPath: "../../tmp/evil.md"}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + content := fmt.Sprintf(`--- +engine: copilot +imports: + - %s +--- +# Workflow +`, tc.importPath) + // WorkflowPath at repo root → workflowBaseDir="" → path.Join("","../etc/passwd")="../etc/passwd" + // which triggers the explicit ".." rejection before any network call. + spec := &WorkflowSpec{ + RepoSpec: RepoSpec{ + RepoSlug: "github/gh-aw", + Version: "v1.0.0", + }, + WorkflowPath: "ci-coach.md", + } + + tmpDir := t.TempDir() + err := fetchAndSaveRemoteFrontmatterImports(content, spec, tmpDir, false, false, nil) + require.NoError(t, err, "path traversal should be silently rejected, not return an error") + + // No file must have been written anywhere + entries, readErr := os.ReadDir(tmpDir) + require.NoError(t, readErr) + assert.Empty(t, entries, "traversal import %q must not write any file", tc.importPath) + }) + } +} + +// TestFetchAndSaveRemoteFrontmatterImports_InvalidRepoSlug verifies that an invalid +// RepoSlug (not in owner/repo format) causes the function to return early without error. +func TestFetchAndSaveRemoteFrontmatterImports_InvalidRepoSlug(t *testing.T) { + content := `--- +engine: copilot +imports: + - shared/ci-data-analysis.md +--- +# Workflow +` + spec := &WorkflowSpec{ + RepoSpec: RepoSpec{ + RepoSlug: "not-a-valid-slug", // missing slash → only one part + }, + WorkflowPath: ".github/workflows/ci-coach.md", + } + + tmpDir := t.TempDir() + err := fetchAndSaveRemoteFrontmatterImports(content, spec, tmpDir, false, false, nil) + require.NoError(t, err, "invalid RepoSlug should return nil without error") + + entries, readErr := os.ReadDir(tmpDir) + require.NoError(t, readErr) + assert.Empty(t, entries, "no files should be created for an invalid RepoSlug") +}