diff --git a/.github/workflows/dev.lock.yml b/.github/workflows/dev.lock.yml index 5c6346ba8a..3666d500d0 100644 --- a/.github/workflows/dev.lock.yml +++ b/.github/workflows/dev.lock.yml @@ -319,8 +319,7 @@ jobs: Before returning the poem: - store generated poem in memory - + --- diff --git a/pkg/workflow/compiler.go b/pkg/workflow/compiler.go index 7b990f37e7..d34e5f92a1 100644 --- a/pkg/workflow/compiler.go +++ b/pkg/workflow/compiler.go @@ -3267,6 +3267,170 @@ func (c *Compiler) generateUploadAccessLogs(yaml *strings.Builder, tools map[str yaml.WriteString(" if-no-files-found: warn\n") } +// extractCodeBlockMarker extracts the marker string and language from a code block line +// Returns marker string (e.g., "```", "~~~~") and language specifier +func extractCodeBlockMarker(trimmedLine string) (string, string) { + if len(trimmedLine) < 3 { + return "", "" + } + + var count int + + // Check for backticks + if strings.HasPrefix(trimmedLine, "```") { + for i, r := range trimmedLine { + if r == '`' { + count++ + } else { + // Found language specifier or other content + return strings.Repeat("`", count), strings.TrimSpace(trimmedLine[i:]) + } + } + // All characters are backticks + return strings.Repeat("`", count), "" + } + + // Check for tildes + if strings.HasPrefix(trimmedLine, "~~~") { + for i, r := range trimmedLine { + if r == '~' { + count++ + } else { + // Found language specifier or other content + return strings.Repeat("~", count), strings.TrimSpace(trimmedLine[i:]) + } + } + // All characters are tildes + return strings.Repeat("~", count), "" + } + + return "", "" +} + +// isValidCodeBlockMarker checks if a trimmed line is a valid code block marker (3 or more ` or ~) +func isValidCodeBlockMarker(trimmedLine string) bool { + marker, _ := extractCodeBlockMarker(trimmedLine) + return len(marker) >= 3 +} + +// isMatchingCodeBlockMarker checks if the trimmed line matches the opening marker +func isMatchingCodeBlockMarker(trimmedLine string, openMarker string) bool { + marker, _ := extractCodeBlockMarker(trimmedLine) + if len(marker) == 0 || len(openMarker) == 0 { + return false + } + + // Markers must be the same type (both backticks or both tildes) + if marker[0] != openMarker[0] { + return false + } + + // Closing marker must have at least as many characters as opening marker + return len(marker) >= len(openMarker) +} + +// removeXMLComments removes XML comments () from markdown content +// while preserving comments that appear within code blocks +func removeXMLComments(content string) string { + // Track if we're inside a code block to avoid removing comments in code + lines := strings.Split(content, "\n") + var result []string + inCodeBlock := false + var openMarker string + inXMLComment := false + + for _, line := range lines { + // Check for code block markers (3 or more ` or ~) + trimmedLine := strings.TrimSpace(line) + + if !inCodeBlock && isValidCodeBlockMarker(trimmedLine) { + // Opening a code block + openMarker, _ = extractCodeBlockMarker(trimmedLine) + inCodeBlock = true + result = append(result, line) + continue + } else if inCodeBlock && isMatchingCodeBlockMarker(trimmedLine, openMarker) { + // Closing the code block with matching marker + inCodeBlock = false + openMarker = "" + result = append(result, line) + continue + } + + // If we're in a code block, preserve the line as-is + if inCodeBlock { + result = append(result, line) + continue + } + + // Process the line for XML comments + processedLine, wasInComment, isInComment := removeXMLCommentsFromLine(line, inXMLComment) + inXMLComment = isInComment + + if !wasInComment && !isInComment { + // Line had no comment involvement, keep as-is + result = append(result, processedLine) + } else if !wasInComment && isInComment { + // Line started a multiline comment, keep the processed part and add empty line + if strings.TrimSpace(processedLine) != "" { + result = append(result, processedLine) + } + result = append(result, "") + } else if wasInComment && !isInComment { + // Line ended a multiline comment, keep the processed part + if strings.TrimSpace(processedLine) != "" { + result = append(result, processedLine) + } + } + // If wasInComment && isInComment, we're in the middle of a comment, skip the line + } + + return strings.Join(result, "\n") +} + +// removeXMLCommentsFromLine removes XML comments from a single line +// Returns: processed line, was initially in comment, is now in comment +func removeXMLCommentsFromLine(line string, inXMLComment bool) (string, bool, bool) { + result := line + wasInComment := inXMLComment + + for { + if inXMLComment { + // We're in a multiline comment, look for closing tag + if closeIndex := strings.Index(result, "-->"); closeIndex != -1 { + // Found closing tag, remove everything up to and including it + result = result[closeIndex+3:] + inXMLComment = false + // Continue processing in case there are more comments on this line + } else { + // No closing tag found, entire line is part of the comment + return "", wasInComment, inXMLComment + } + } else { + // Not in a comment, look for opening tag + if openIndex := strings.Index(result, ""); closeIndex != -1 { + // Complete comment on same line + actualCloseIndex := openIndex + closeIndex + 3 + result = result[:openIndex] + result[actualCloseIndex:] + // Continue processing in case there are more comments on this line + } else { + // Start of multiline comment + result = result[:openIndex] + inXMLComment = true + break + } + } else { + // No opening tag found, done processing this line + break + } + } + } + + return result, wasInComment, inXMLComment +} + func (c *Compiler) generatePrompt(yaml *strings.Builder, data *WorkflowData) { yaml.WriteString(" - name: Create prompt\n") @@ -3283,8 +3447,9 @@ func (c *Compiler) generatePrompt(yaml *strings.Builder, data *WorkflowData) { yaml.WriteString(" mkdir -p /tmp/aw-prompts\n") yaml.WriteString(" cat > $GITHUB_AW_PROMPT << 'EOF'\n") - // Add markdown content with proper indentation - for _, line := range strings.Split(data.MarkdownContent, "\n") { + // Add markdown content with proper indentation (removing XML comments) + cleanedMarkdownContent := removeXMLComments(data.MarkdownContent) + for _, line := range strings.Split(cleanedMarkdownContent, "\n") { yaml.WriteString(" " + line + "\n") } diff --git a/pkg/workflow/xml_comments_test.go b/pkg/workflow/xml_comments_test.go new file mode 100644 index 0000000000..24a6085f04 --- /dev/null +++ b/pkg/workflow/xml_comments_test.go @@ -0,0 +1,256 @@ +package workflow + +import ( + "strings" + "testing" +) + +func TestRemoveXMLComments(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "No XML comments", + input: "This is regular markdown content", + expected: "This is regular markdown content", + }, + { + name: "Single line XML comment", + input: "Before after", + expected: "Before after", + }, + { + name: "XML comment at start of line", + input: " content", + expected: " content", + }, + { + name: "XML comment at end of line", + input: "content ", + expected: "content ", + }, + { + name: "Entire line is XML comment", + input: "", + expected: "", + }, + { + name: "Multiple XML comments on same line", + input: " middle end", + expected: " middle end", + }, + { + name: "Multiline XML comment", + input: `Before comment + +After comment`, + expected: `Before comment + +After comment`, + }, + { + name: "Multiple separate XML comments", + input: `First line + +Middle line + +Last line`, + expected: `First line + +Middle line + +Last line`, + }, + { + name: "XML comment with special characters", + input: "Text more text", + expected: "Text more text", + }, + { + name: "Nested-like XML comment (not actually nested)", + input: " -->", + expected: " -->", + }, + { + name: "XML comment in code block should be preserved", + input: `Regular text +` + "```" + ` + +` + "```" + ` + +More text`, + expected: `Regular text +` + "```" + ` + +` + "```" + ` + +More text`, + }, + { + name: "XML comment in code block with 4 backticks should be preserved", + input: `Regular text +` + "````" + `python + +` + "````" + ` + +More text`, + expected: `Regular text +` + "````" + `python + +` + "````" + ` + +More text`, + }, + { + name: "XML comment in code block with tildes should be preserved", + input: `Regular text +~~~bash + +~~~ + +More text`, + expected: `Regular text +~~~bash + +~~~ + +More text`, + }, + { + name: "XML comment in code block with 5 tildes should be preserved", + input: `Regular text +~~~~~ + +~~~~~ + +More text`, + expected: `Regular text +~~~~~ + +~~~~~ + +More text`, + }, + { + name: "Empty XML comment", + input: "Before after", + expected: "Before after", + }, + { + name: "XML comment with only whitespace", + input: "Before after", + expected: "Before after", + }, + { + name: "Mixed code block markers should not interfere", + input: `Regular text +` + "````python" + ` +some code +` + "~~~" + ` +this is still in the same python block, not a new tilde block +` + "````" + ` + +More text`, + expected: `Regular text +` + "````python" + ` +some code +` + "~~~" + ` +this is still in the same python block, not a new tilde block +` + "````" + ` + +More text`, + }, + { + name: "Different marker types should not close each other", + input: `Text before +` + "~~~bash" + ` +code in tilde block +` + "```" + ` +this is still in the tilde block, backticks don't close it +` + "~~~" + ` + +Final text`, + expected: `Text before +` + "~~~bash" + ` +code in tilde block +` + "```" + ` +this is still in the tilde block, backticks don't close it +` + "~~~" + ` + +Final text`, + }, + { + name: "Nested same-type markers with proper count matching", + input: `Content +` + "```" + ` +code block +` + "```" + ` + +End`, + expected: `Content +` + "```" + ` +code block +` + "```" + ` + +End`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := removeXMLComments(tt.input) + if result != tt.expected { + t.Errorf("removeXMLComments() = %q, want %q", result, tt.expected) + } + }) + } +} + +func TestGeneratePromptRemovesXMLComments(t *testing.T) { + compiler := NewCompiler(false, "", "test") + + data := &WorkflowData{ + MarkdownContent: `# Workflow Title + +This is some content. + +More content here. + + + +Final content.`, + } + + var yaml strings.Builder + compiler.generatePrompt(&yaml, data) + + output := yaml.String() + + // Check that XML comments are not present in the generated output + if strings.Contains(output, "") { + t.Error("Expected single-line XML comment to be removed from prompt generation") + } + + if strings.Contains(output, "