From a573306db492c33113a9b7630271293ca7e6b39c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Sep 2025 14:53:25 +0000 Subject: [PATCH 1/5] Initial plan From de9fd8f4ba7ab261c7e5ae08717e081a506b0bc4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Sep 2025 15:09:47 +0000 Subject: [PATCH 2/5] Implement XML comment removal from generated prompts (fixes #822) Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/workflows/dev.lock.yml | 3 +- pkg/workflow/compiler.go | 97 +++++++++++++++++- pkg/workflow/xml_comments_test.go | 158 ++++++++++++++++++++++++++++++ 3 files changed, 254 insertions(+), 4 deletions(-) create mode 100644 pkg/workflow/xml_comments_test.go diff --git a/.github/workflows/dev.lock.yml b/.github/workflows/dev.lock.yml index 5c6346ba8a..3666d500d0 100644 --- a/.github/workflows/dev.lock.yml +++ b/.github/workflows/dev.lock.yml @@ -319,8 +319,7 @@ jobs: Before returning the poem: - store generated poem in memory - + --- diff --git a/pkg/workflow/compiler.go b/pkg/workflow/compiler.go index 7b990f37e7..a537478cae 100644 --- a/pkg/workflow/compiler.go +++ b/pkg/workflow/compiler.go @@ -3267,6 +3267,98 @@ func (c *Compiler) generateUploadAccessLogs(yaml *strings.Builder, tools map[str yaml.WriteString(" if-no-files-found: warn\n") } +// removeXMLComments removes XML comments () from markdown content +// while preserving comments that appear within code blocks +func removeXMLComments(content string) string { + // Track if we're inside a code block to avoid removing comments in code + lines := strings.Split(content, "\n") + var result []string + inCodeBlock := false + inXMLComment := false + + for _, line := range lines { + // Check for code block markers (``` or ~~~) + trimmedLine := strings.TrimSpace(line) + if strings.HasPrefix(trimmedLine, "```") || strings.HasPrefix(trimmedLine, "~~~") { + inCodeBlock = !inCodeBlock + result = append(result, line) + continue + } + + // If we're in a code block, preserve the line as-is + if inCodeBlock { + result = append(result, line) + continue + } + + // Process the line for XML comments + processedLine, wasInComment, isInComment := removeXMLCommentsFromLine(line, inXMLComment) + inXMLComment = isInComment + + if !wasInComment && !isInComment { + // Line had no comment involvement, keep as-is + result = append(result, processedLine) + } else if !wasInComment && isInComment { + // Line started a multiline comment, keep the processed part and add empty line + if strings.TrimSpace(processedLine) != "" { + result = append(result, processedLine) + } + result = append(result, "") + } else if wasInComment && !isInComment { + // Line ended a multiline comment, keep the processed part + if strings.TrimSpace(processedLine) != "" { + result = append(result, processedLine) + } + } + // If wasInComment && isInComment, we're in the middle of a comment, skip the line + } + + return strings.Join(result, "\n") +} + +// removeXMLCommentsFromLine removes XML comments from a single line +// Returns: processed line, was initially in comment, is now in comment +func removeXMLCommentsFromLine(line string, inXMLComment bool) (string, bool, bool) { + result := line + wasInComment := inXMLComment + + for { + if inXMLComment { + // We're in a multiline comment, look for closing tag + if closeIndex := strings.Index(result, "-->"); closeIndex != -1 { + // Found closing tag, remove everything up to and including it + result = result[closeIndex+3:] + inXMLComment = false + // Continue processing in case there are more comments on this line + } else { + // No closing tag found, entire line is part of the comment + return "", wasInComment, inXMLComment + } + } else { + // Not in a comment, look for opening tag + if openIndex := strings.Index(result, ""); closeIndex != -1 { + // Complete comment on same line + actualCloseIndex := openIndex + closeIndex + 3 + result = result[:openIndex] + result[actualCloseIndex:] + // Continue processing in case there are more comments on this line + } else { + // Start of multiline comment + result = result[:openIndex] + inXMLComment = true + break + } + } else { + // No opening tag found, done processing this line + break + } + } + } + + return result, wasInComment, inXMLComment +} + func (c *Compiler) generatePrompt(yaml *strings.Builder, data *WorkflowData) { yaml.WriteString(" - name: Create prompt\n") @@ -3283,8 +3375,9 @@ func (c *Compiler) generatePrompt(yaml *strings.Builder, data *WorkflowData) { yaml.WriteString(" mkdir -p /tmp/aw-prompts\n") yaml.WriteString(" cat > $GITHUB_AW_PROMPT << 'EOF'\n") - // Add markdown content with proper indentation - for _, line := range strings.Split(data.MarkdownContent, "\n") { + // Add markdown content with proper indentation (removing XML comments) + cleanedMarkdownContent := removeXMLComments(data.MarkdownContent) + for _, line := range strings.Split(cleanedMarkdownContent, "\n") { yaml.WriteString(" " + line + "\n") } diff --git a/pkg/workflow/xml_comments_test.go b/pkg/workflow/xml_comments_test.go new file mode 100644 index 0000000000..c0e5eb8f42 --- /dev/null +++ b/pkg/workflow/xml_comments_test.go @@ -0,0 +1,158 @@ +package workflow + +import ( + "strings" + "testing" +) + +func TestRemoveXMLComments(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "No XML comments", + input: "This is regular markdown content", + expected: "This is regular markdown content", + }, + { + name: "Single line XML comment", + input: "Before after", + expected: "Before after", + }, + { + name: "XML comment at start of line", + input: " content", + expected: " content", + }, + { + name: "XML comment at end of line", + input: "content ", + expected: "content ", + }, + { + name: "Entire line is XML comment", + input: "", + expected: "", + }, + { + name: "Multiple XML comments on same line", + input: " middle end", + expected: " middle end", + }, + { + name: "Multiline XML comment", + input: `Before comment + +After comment`, + expected: `Before comment + +After comment`, + }, + { + name: "Multiple separate XML comments", + input: `First line + +Middle line + +Last line`, + expected: `First line + +Middle line + +Last line`, + }, + { + name: "XML comment with special characters", + input: "Text more text", + expected: "Text more text", + }, + { + name: "Nested-like XML comment (not actually nested)", + input: " -->", + expected: " -->", + }, + { + name: "XML comment in code block should be preserved", + input: `Regular text +` + "```" + ` + +` + "```" + ` + +More text`, + expected: `Regular text +` + "```" + ` + +` + "```" + ` + +More text`, + }, + { + name: "Empty XML comment", + input: "Before after", + expected: "Before after", + }, + { + name: "XML comment with only whitespace", + input: "Before after", + expected: "Before after", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := removeXMLComments(tt.input) + if result != tt.expected { + t.Errorf("removeXMLComments() = %q, want %q", result, tt.expected) + } + }) + } +} + +func TestGeneratePromptRemovesXMLComments(t *testing.T) { + compiler := NewCompiler(false, "", "test") + + data := &WorkflowData{ + MarkdownContent: `# Workflow Title + +This is some content. + +More content here. + + + +Final content.`, + } + + var yaml strings.Builder + compiler.generatePrompt(&yaml, data) + + output := yaml.String() + + // Check that XML comments are not present in the generated output + if strings.Contains(output, "") { + t.Error("Expected single-line XML comment to be removed from prompt generation") + } + + if strings.Contains(output, ") from markdown content // while preserving comments that appear within code blocks func removeXMLComments(content string) string { @@ -3277,9 +3308,9 @@ func removeXMLComments(content string) string { inXMLComment := false for _, line := range lines { - // Check for code block markers (``` or ~~~) + // Check for code block markers (3 or more ` or ~) trimmedLine := strings.TrimSpace(line) - if strings.HasPrefix(trimmedLine, "```") || strings.HasPrefix(trimmedLine, "~~~") { + if isCodeBlockMarker(trimmedLine) { inCodeBlock = !inCodeBlock result = append(result, line) continue diff --git a/pkg/workflow/xml_comments_test.go b/pkg/workflow/xml_comments_test.go index c0e5eb8f42..860f192d24 100644 --- a/pkg/workflow/xml_comments_test.go +++ b/pkg/workflow/xml_comments_test.go @@ -88,6 +88,51 @@ More text`, ` + "```" + ` +More text`, + }, + { + name: "XML comment in code block with 4 backticks should be preserved", + input: `Regular text +` + "````" + `python + +` + "````" + ` + +More text`, + expected: `Regular text +` + "````" + `python + +` + "````" + ` + +More text`, + }, + { + name: "XML comment in code block with tildes should be preserved", + input: `Regular text +~~~bash + +~~~ + +More text`, + expected: `Regular text +~~~bash + +~~~ + +More text`, + }, + { + name: "XML comment in code block with 5 tildes should be preserved", + input: `Regular text +~~~~~ + +~~~~~ + +More text`, + expected: `Regular text +~~~~~ + +~~~~~ + More text`, }, { From bfab6fb45f8c2d2260348d004ab38a9ae99a90f7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Sep 2025 15:53:10 +0000 Subject: [PATCH 4/5] Fix code block marker matching to prevent incorrect closing with different marker types Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/compiler.go | 78 ++++++++++++++++++++++--------- pkg/workflow/xml_comments_test.go | 53 +++++++++++++++++++++ 2 files changed, 110 insertions(+), 21 deletions(-) diff --git a/pkg/workflow/compiler.go b/pkg/workflow/compiler.go index 1cc163c915..f3ce4f59f5 100644 --- a/pkg/workflow/compiler.go +++ b/pkg/workflow/compiler.go @@ -3267,35 +3267,59 @@ func (c *Compiler) generateUploadAccessLogs(yaml *strings.Builder, tools map[str yaml.WriteString(" if-no-files-found: warn\n") } -// isCodeBlockMarker checks if a trimmed line is a code block marker (3 or more ` or ~) -func isCodeBlockMarker(trimmedLine string) bool { - // Check for backtick markers (3 or more) - if len(trimmedLine) >= 3 && strings.HasPrefix(trimmedLine, "```") { - // Ensure all characters are backticks (until we hit a language specifier) +// extractCodeBlockMarker extracts the marker type and count from a code block line +// Returns marker character ('`' or '~'), count, and language specifier +func extractCodeBlockMarker(trimmedLine string) (rune, int, string) { + if len(trimmedLine) < 3 { + return 0, 0, "" + } + + var marker rune + var count int + + // Check for backticks + if strings.HasPrefix(trimmedLine, "```") { + marker = '`' for i, r := range trimmedLine { - if r != '`' { - // If we've seen at least 3 backticks, this is valid - return i >= 3 + if r == '`' { + count++ + } else { + // Found language specifier or other content + return marker, count, strings.TrimSpace(trimmedLine[i:]) } } - // All characters are backticks and we have at least 3 - return true + // All characters are backticks + return marker, count, "" } - // Check for tilde markers (3 or more) - if len(trimmedLine) >= 3 && strings.HasPrefix(trimmedLine, "~~~") { - // Ensure all characters are tildes (until we hit a language specifier) + // Check for tildes + if strings.HasPrefix(trimmedLine, "~~~") { + marker = '~' for i, r := range trimmedLine { - if r != '~' { - // If we've seen at least 3 tildes, this is valid - return i >= 3 + if r == '~' { + count++ + } else { + // Found language specifier or other content + return marker, count, strings.TrimSpace(trimmedLine[i:]) } } - // All characters are tildes and we have at least 3 - return true + // All characters are tildes + return marker, count, "" } - return false + return 0, 0, "" +} + +// isValidCodeBlockMarker checks if a trimmed line is a valid code block marker (3 or more ` or ~) +func isValidCodeBlockMarker(trimmedLine string) bool { + marker, count, _ := extractCodeBlockMarker(trimmedLine) + return marker != 0 && count >= 3 +} + +// isMatchingCodeBlockMarker checks if the trimmed line matches the opening marker +func isMatchingCodeBlockMarker(trimmedLine string, openMarker rune, openCount int) bool { + marker, count, _ := extractCodeBlockMarker(trimmedLine) + return marker == openMarker && count >= openCount } // removeXMLComments removes XML comments () from markdown content @@ -3305,13 +3329,25 @@ func removeXMLComments(content string) string { lines := strings.Split(content, "\n") var result []string inCodeBlock := false + var openMarker rune + var openCount int inXMLComment := false for _, line := range lines { // Check for code block markers (3 or more ` or ~) trimmedLine := strings.TrimSpace(line) - if isCodeBlockMarker(trimmedLine) { - inCodeBlock = !inCodeBlock + + if !inCodeBlock && isValidCodeBlockMarker(trimmedLine) { + // Opening a code block + openMarker, openCount, _ = extractCodeBlockMarker(trimmedLine) + inCodeBlock = true + result = append(result, line) + continue + } else if inCodeBlock && isMatchingCodeBlockMarker(trimmedLine, openMarker, openCount) { + // Closing the code block with matching marker + inCodeBlock = false + openMarker = 0 + openCount = 0 result = append(result, line) continue } diff --git a/pkg/workflow/xml_comments_test.go b/pkg/workflow/xml_comments_test.go index 860f192d24..24a6085f04 100644 --- a/pkg/workflow/xml_comments_test.go +++ b/pkg/workflow/xml_comments_test.go @@ -145,6 +145,59 @@ More text`, input: "Before after", expected: "Before after", }, + { + name: "Mixed code block markers should not interfere", + input: `Regular text +` + "````python" + ` +some code +` + "~~~" + ` +this is still in the same python block, not a new tilde block +` + "````" + ` + +More text`, + expected: `Regular text +` + "````python" + ` +some code +` + "~~~" + ` +this is still in the same python block, not a new tilde block +` + "````" + ` + +More text`, + }, + { + name: "Different marker types should not close each other", + input: `Text before +` + "~~~bash" + ` +code in tilde block +` + "```" + ` +this is still in the tilde block, backticks don't close it +` + "~~~" + ` + +Final text`, + expected: `Text before +` + "~~~bash" + ` +code in tilde block +` + "```" + ` +this is still in the tilde block, backticks don't close it +` + "~~~" + ` + +Final text`, + }, + { + name: "Nested same-type markers with proper count matching", + input: `Content +` + "```" + ` +code block +` + "```" + ` + +End`, + expected: `Content +` + "```" + ` +code block +` + "```" + ` + +End`, + }, } for _, tt := range tests { From 53c2139c791fb266fd83f7c9138438bf696625df Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Sep 2025 16:05:06 +0000 Subject: [PATCH 5/5] Merge marker and count into single string return value in extractCodeBlockMarker Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/compiler.go | 51 ++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/pkg/workflow/compiler.go b/pkg/workflow/compiler.go index f3ce4f59f5..d34e5f92a1 100644 --- a/pkg/workflow/compiler.go +++ b/pkg/workflow/compiler.go @@ -3267,59 +3267,66 @@ func (c *Compiler) generateUploadAccessLogs(yaml *strings.Builder, tools map[str yaml.WriteString(" if-no-files-found: warn\n") } -// extractCodeBlockMarker extracts the marker type and count from a code block line -// Returns marker character ('`' or '~'), count, and language specifier -func extractCodeBlockMarker(trimmedLine string) (rune, int, string) { +// extractCodeBlockMarker extracts the marker string and language from a code block line +// Returns marker string (e.g., "```", "~~~~") and language specifier +func extractCodeBlockMarker(trimmedLine string) (string, string) { if len(trimmedLine) < 3 { - return 0, 0, "" + return "", "" } - var marker rune var count int // Check for backticks if strings.HasPrefix(trimmedLine, "```") { - marker = '`' for i, r := range trimmedLine { if r == '`' { count++ } else { // Found language specifier or other content - return marker, count, strings.TrimSpace(trimmedLine[i:]) + return strings.Repeat("`", count), strings.TrimSpace(trimmedLine[i:]) } } // All characters are backticks - return marker, count, "" + return strings.Repeat("`", count), "" } // Check for tildes if strings.HasPrefix(trimmedLine, "~~~") { - marker = '~' for i, r := range trimmedLine { if r == '~' { count++ } else { // Found language specifier or other content - return marker, count, strings.TrimSpace(trimmedLine[i:]) + return strings.Repeat("~", count), strings.TrimSpace(trimmedLine[i:]) } } // All characters are tildes - return marker, count, "" + return strings.Repeat("~", count), "" } - return 0, 0, "" + return "", "" } // isValidCodeBlockMarker checks if a trimmed line is a valid code block marker (3 or more ` or ~) func isValidCodeBlockMarker(trimmedLine string) bool { - marker, count, _ := extractCodeBlockMarker(trimmedLine) - return marker != 0 && count >= 3 + marker, _ := extractCodeBlockMarker(trimmedLine) + return len(marker) >= 3 } // isMatchingCodeBlockMarker checks if the trimmed line matches the opening marker -func isMatchingCodeBlockMarker(trimmedLine string, openMarker rune, openCount int) bool { - marker, count, _ := extractCodeBlockMarker(trimmedLine) - return marker == openMarker && count >= openCount +func isMatchingCodeBlockMarker(trimmedLine string, openMarker string) bool { + marker, _ := extractCodeBlockMarker(trimmedLine) + if len(marker) == 0 || len(openMarker) == 0 { + return false + } + + // Markers must be the same type (both backticks or both tildes) + if marker[0] != openMarker[0] { + return false + } + + // Closing marker must have at least as many characters as opening marker + return len(marker) >= len(openMarker) } // removeXMLComments removes XML comments () from markdown content @@ -3329,8 +3336,7 @@ func removeXMLComments(content string) string { lines := strings.Split(content, "\n") var result []string inCodeBlock := false - var openMarker rune - var openCount int + var openMarker string inXMLComment := false for _, line := range lines { @@ -3339,15 +3345,14 @@ func removeXMLComments(content string) string { if !inCodeBlock && isValidCodeBlockMarker(trimmedLine) { // Opening a code block - openMarker, openCount, _ = extractCodeBlockMarker(trimmedLine) + openMarker, _ = extractCodeBlockMarker(trimmedLine) inCodeBlock = true result = append(result, line) continue - } else if inCodeBlock && isMatchingCodeBlockMarker(trimmedLine, openMarker, openCount) { + } else if inCodeBlock && isMatchingCodeBlockMarker(trimmedLine, openMarker) { // Closing the code block with matching marker inCodeBlock = false - openMarker = 0 - openCount = 0 + openMarker = "" result = append(result, line) continue }