diff --git a/.changeset/patch-max-bot-mentions.md b/.changeset/patch-max-bot-mentions.md new file mode 100644 index 0000000000..ab7b8058e2 --- /dev/null +++ b/.changeset/patch-max-bot-mentions.md @@ -0,0 +1,5 @@ +--- +"gh-aw": patch +--- + +Add optional `safe-outputs.max-bot-mentions` field to configure the maximum number of bot trigger references (e.g. `fixes #123`) allowed before they are neutralized. Default is 10. Supports integer or GitHub Actions expression (e.g. `${{ inputs.max-bot-mentions }}`). diff --git a/actions/setup/js/collect_ndjson_output.cjs b/actions/setup/js/collect_ndjson_output.cjs index e326145c6f..cd30fbf53a 100644 --- a/actions/setup/js/collect_ndjson_output.cjs +++ b/actions/setup/js/collect_ndjson_output.cjs @@ -6,6 +6,7 @@ const { repairJson, sanitizePrototypePollution } = require("./json_repair_helper const { AGENT_OUTPUT_FILENAME, TMP_GH_AW_PATH } = require("./constants.cjs"); const { ERR_API, ERR_PARSE } = require("./error_codes.cjs"); const { isPayloadUserBot } = require("./resolve_mentions.cjs"); +const { parseIntTemplatable } = require("./templatable.cjs"); async function main() { try { @@ -36,6 +37,10 @@ async function main() { // This determines which @mentions are allowed in the agent output const allowedMentions = await resolveAllowedMentionsFromPayload(context, github, core, mentionsConfig); + // maxBotMentions is populated after safeOutputsConfig is read below + /** @type {number | undefined} */ + let maxBotMentions; + function validateFieldWithInputSchema(value, fieldName, inputSchema, lineNum) { if (inputSchema.required && (value === undefined || value === null)) { return { @@ -59,7 +64,7 @@ async function main() { error: `Line ${lineNum}: ${fieldName} must be a string`, }; } - normalizedValue = sanitizeContent(value, { allowedAliases: allowedMentions }); + normalizedValue = sanitizeContent(value, { allowedAliases: allowedMentions, maxBotMentions }); break; case "boolean": if (typeof value !== "boolean") { @@ -90,11 +95,11 @@ async function main() { error: `Line ${lineNum}: ${fieldName} must be one of: ${inputSchema.options.join(", ")}`, }; } - normalizedValue = sanitizeContent(value, { allowedAliases: allowedMentions }); + normalizedValue = sanitizeContent(value, { allowedAliases: allowedMentions, maxBotMentions }); break; default: if (typeof value === "string") { - normalizedValue = sanitizeContent(value, { allowedAliases: allowedMentions }); + normalizedValue = sanitizeContent(value, { allowedAliases: allowedMentions, maxBotMentions }); } break; } @@ -195,6 +200,13 @@ async function main() { expectedOutputTypes = Object.fromEntries(Object.entries(safeOutputsConfig).map(([key, value]) => [key.replace(/-/g, "_"), value])); core.info(`[INGESTION] Expected output types after normalization: ${JSON.stringify(Object.keys(expectedOutputTypes))}`); core.info(`[INGESTION] Expected output types full config: ${JSON.stringify(expectedOutputTypes)}`); + // Extract max-bot-mentions from config (defaults to undefined, using neutralizeBotTriggers default) + const rawMaxBotMentions = parseIntTemplatable(expectedOutputTypes.max_bot_mentions, 0); + if (rawMaxBotMentions > 0) { + maxBotMentions = rawMaxBotMentions; + } + // Remove global config keys so they are not treated as valid output types + delete expectedOutputTypes.max_bot_mentions; } catch (error) { const errorMsg = getErrorMessage(error); core.info(`Warning: Could not parse safe-outputs config: ${errorMsg}`); @@ -282,7 +294,7 @@ async function main() { // Use the validation engine to validate the item if (hasValidationConfig(itemType)) { - const validationResult = validateItem(item, itemType, i + 1, { allowedAliases: allowedMentions }); + const validationResult = validateItem(item, itemType, i + 1, { allowedAliases: allowedMentions, maxBotMentions }); if (!validationResult.isValid) { if (validationResult.error) { errors.push(validationResult.error); diff --git a/actions/setup/js/collect_ndjson_output.test.cjs b/actions/setup/js/collect_ndjson_output.test.cjs index ba2f678f09..07379f2a80 100644 --- a/actions/setup/js/collect_ndjson_output.test.cjs +++ b/actions/setup/js/collect_ndjson_output.test.cjs @@ -1228,7 +1228,7 @@ describe("collect_ndjson_output.cjs", () => { (fs.mkdirSync("/opt/gh-aw/safeoutputs", { recursive: !0 }), fs.writeFileSync(configPath, __config), await eval(`(async () => { ${collectScript}; await main(); })()`)); const outputCall = mockCore.setOutput.mock.calls.find(call => "output" === call[0]), parsedOutput = JSON.parse(outputCall[1]); - expect(parsedOutput.items[0].body).toBe("This `fixes #123` and `closes #456`, also `resolves #789`"); + expect(parsedOutput.items[0].body).toBe("This fixes #123 and closes #456, also resolves #789"); }), it("should remove ANSI escape sequences", async () => { const testFile = "/tmp/gh-aw/test-ndjson-output.txt", @@ -1488,7 +1488,7 @@ describe("collect_ndjson_output.cjs", () => { outputCall = setOutputCalls.find(call => "output" === call[0]); expect(outputCall).toBeDefined(); const parsedOutput = JSON.parse(outputCall[1]); - (expect(parsedOutput.items).toHaveLength(1), expect(parsedOutput.items[0].message).toContain("`@mention`"), expect(parsedOutput.items[0].message).toContain("`fixes #123`")); + (expect(parsedOutput.items).toHaveLength(1), expect(parsedOutput.items[0].message).toContain("`@mention`"), expect(parsedOutput.items[0].message).toContain("fixes #123")); }), it("should handle multiple noop messages", async () => { const testFile = "/tmp/gh-aw/test-ndjson-output.txt", diff --git a/actions/setup/js/compute_text.test.cjs b/actions/setup/js/compute_text.test.cjs index 4ec6a49be9..d60fd03e75 100644 --- a/actions/setup/js/compute_text.test.cjs +++ b/actions/setup/js/compute_text.test.cjs @@ -50,9 +50,9 @@ const mockCore = { const result = sanitizeIncomingTextFunction("Hello @user and @org/team"); (expect(result).toContain("`@user`"), expect(result).toContain("`@org/team`")); }), - it("should neutralize bot trigger phrases", () => { - const result = sanitizeIncomingTextFunction("This fixes #123 and closes #456"); - (expect(result).toContain("`fixes #123`"), expect(result).toContain("`closes #456`")); + it("should neutralize bot trigger phrases when count exceeds threshold", () => { + const result = sanitizeIncomingTextFunction("fixes #1 closes #2 resolves #3 fixes #4 closes #5 resolves #6 fixes #7 closes #8 resolves #9 fixes #10 closes #11"); + (expect(result).not.toContain("`fixes #1`"), expect(result).toContain("`closes #11`")); }), it("should remove control characters", () => { const result = sanitizeIncomingTextFunction("Hello\0\bworld"); @@ -203,7 +203,7 @@ const mockCore = { it("should sanitize extracted text before output", async () => { ((mockContext.eventName = "issues"), (mockContext.payload = { issue: { title: "Test @user fixes #123", body: "Visit https://evil.com" } }), await testMain()); const outputCall = mockCore.setOutput.mock.calls[0]; - (expect(outputCall[1]).toContain("`@user`"), expect(outputCall[1]).toContain("`fixes #123`"), expect(outputCall[1]).toContain("/redacted")); + (expect(outputCall[1]).toContain("`@user`"), expect(outputCall[1]).toContain("fixes #123"), expect(outputCall[1]).toContain("/redacted")); }), it("should handle missing title and body gracefully", async () => { ((mockContext.eventName = "issues"), diff --git a/actions/setup/js/safe_output_type_validator.cjs b/actions/setup/js/safe_output_type_validator.cjs index 59e267baa8..34d2517a42 100644 --- a/actions/setup/js/safe_output_type_validator.cjs +++ b/actions/setup/js/safe_output_type_validator.cjs @@ -25,6 +25,10 @@ const MAX_BODY_LENGTH = 65000; */ const MAX_GITHUB_USERNAME_LENGTH = 39; +/** + * @typedef {{ allowedAliases?: string[], maxBotMentions?: number }} ValidateOptions + */ + /** * @typedef {Object} FieldValidation * @property {boolean} [required] - Whether the field is required @@ -241,8 +245,7 @@ function validateIssueNumberOrTemporaryId(value, fieldName, lineNum) { * @param {FieldValidation} validation - The validation configuration * @param {string} itemType - The item type for error messages * @param {number} lineNum - Line number for error messages - * @param {Object} [options] - Optional sanitization options - * @param {string[]} [options.allowedAliases] - List of allowed @mentions + * @param {ValidateOptions} [options] - Optional sanitization options * @returns {{isValid: boolean, normalizedValue?: any, error?: string}} */ function validateField(value, fieldName, validation, itemType, lineNum, options) { @@ -338,6 +341,7 @@ function validateField(value, fieldName, validation, itemType, lineNum, options) normalizedResult = sanitizeContent(normalizedResult, { maxLength: validation.maxLength, allowedAliases: options?.allowedAliases || [], + maxBotMentions: options?.maxBotMentions, }); } return { isValid: true, normalizedValue: normalizedResult }; @@ -350,6 +354,7 @@ function validateField(value, fieldName, validation, itemType, lineNum, options) const sanitized = sanitizeContent(processedValue, { maxLength: validation.maxLength || MAX_BODY_LENGTH, allowedAliases: options?.allowedAliases || [], + maxBotMentions: options?.maxBotMentions, }); return { isValid: true, normalizedValue: sanitized }; } @@ -389,6 +394,7 @@ function validateField(value, fieldName, validation, itemType, lineNum, options) ? sanitizeContent(item, { maxLength: validation.itemMaxLength || 128, allowedAliases: options?.allowedAliases || [], + maxBotMentions: options?.maxBotMentions, }) : item ); @@ -480,8 +486,7 @@ function executeCustomValidation(item, customValidation, lineNum, itemType) { * @param {Object} item - The item to validate * @param {string} itemType - The item type (e.g., "create_issue") * @param {number} lineNum - Line number for error messages - * @param {Object} [options] - Optional sanitization options - * @param {string[]} [options.allowedAliases] - List of allowed @mentions + * @param {ValidateOptions} [options] - Optional sanitization options * @returns {{isValid: boolean, normalizedItem?: Object, error?: string}} */ function validateItem(item, itemType, lineNum, options) { diff --git a/actions/setup/js/sanitize_content.cjs b/actions/setup/js/sanitize_content.cjs index 8d1672b1c2..dd585ff482 100644 --- a/actions/setup/js/sanitize_content.cjs +++ b/actions/setup/js/sanitize_content.cjs @@ -30,6 +30,7 @@ const { balanceCodeRegions } = require("./markdown_code_region_balancer.cjs"); * @typedef {Object} SanitizeOptions * @property {number} [maxLength] - Maximum length of content (default: 524288) * @property {string[]} [allowedAliases] - List of aliases (@mentions) that should not be neutralized + * @property {number} [maxBotMentions] - Maximum bot trigger references before filtering (default: 10) */ /** @@ -44,6 +45,8 @@ function sanitizeContent(content, maxLengthOrOptions) { let maxLength; /** @type {string[]} */ let allowedAliasesLowercase = []; + /** @type {number | undefined} */ + let maxBotMentions; if (typeof maxLengthOrOptions === "number") { maxLength = maxLengthOrOptions; @@ -51,11 +54,12 @@ function sanitizeContent(content, maxLengthOrOptions) { maxLength = maxLengthOrOptions.maxLength; // Pre-process allowed aliases to lowercase for efficient comparison allowedAliasesLowercase = (maxLengthOrOptions.allowedAliases || []).map(alias => alias.toLowerCase()); + maxBotMentions = maxLengthOrOptions.maxBotMentions; } // If no allowed aliases specified, use core sanitization (which neutralizes all mentions) if (allowedAliasesLowercase.length === 0) { - return sanitizeContentCore(content, maxLength); + return sanitizeContentCore(content, maxLength, maxBotMentions); } // If allowed aliases are specified, we need custom mention filtering @@ -103,7 +107,7 @@ function sanitizeContent(content, maxLengthOrOptions) { sanitized = neutralizeGitHubReferences(sanitized, allowedGitHubRefs); // Neutralize bot triggers - sanitized = neutralizeBotTriggers(sanitized); + sanitized = neutralizeBotTriggers(sanitized, maxBotMentions); // Balance markdown code regions to fix improperly nested fences // This repairs markdown where AI models generate nested code blocks at the same indentation diff --git a/actions/setup/js/sanitize_content.test.cjs b/actions/setup/js/sanitize_content.test.cjs index 668663c1c1..8c38358862 100644 --- a/actions/setup/js/sanitize_content.test.cjs +++ b/actions/setup/js/sanitize_content.test.cjs @@ -746,32 +746,61 @@ describe("sanitize_content.cjs", () => { }); describe("bot trigger neutralization", () => { - it("should neutralize 'fixes #123' patterns", () => { + it("should not neutralize 'fixes #123' when there are 10 or fewer references", () => { const result = sanitizeContent("This fixes #123"); - expect(result).toBe("This `fixes #123`"); + expect(result).toBe("This fixes #123"); }); - it("should neutralize 'closes #456' patterns", () => { + it("should not neutralize 'closes #456' when there are 10 or fewer references", () => { const result = sanitizeContent("PR closes #456"); - expect(result).toBe("PR `closes #456`"); + expect(result).toBe("PR closes #456"); }); - it("should neutralize 'resolves #789' patterns", () => { + it("should not neutralize 'resolves #789' when there are 10 or fewer references", () => { const result = sanitizeContent("This resolves #789"); - expect(result).toBe("This `resolves #789`"); + expect(result).toBe("This resolves #789"); }); - it("should handle various bot trigger verbs", () => { + it("should not neutralize various bot trigger verbs when count is within limit", () => { const triggers = ["fix", "fixes", "close", "closes", "resolve", "resolves"]; triggers.forEach(verb => { const result = sanitizeContent(`This ${verb} #123`); - expect(result).toBe(`This \`${verb} #123\``); + expect(result).toBe(`This ${verb} #123`); }); }); - it("should neutralize alphanumeric issue references", () => { + it("should not neutralize alphanumeric issue references when count is within limit", () => { const result = sanitizeContent("fixes #abc123def"); - expect(result).toBe("`fixes #abc123def`"); + expect(result).toBe("fixes #abc123def"); + }); + + it("should neutralize excess references beyond the 10-occurrence threshold", () => { + const input = Array.from({ length: 11 }, (_, i) => `fixes #${i + 1}`).join(" "); + const result = sanitizeContent(input); + // First 10 are left unchanged + for (let i = 1; i <= 10; i++) { + expect(result).not.toContain(`\`fixes #${i}\``); + } + // 11th is wrapped + expect(result).toContain("`fixes #11`"); + }); + + it("should not requote already-quoted entries", () => { + // Build a string with 12 entries where one is already quoted and 11 are unquoted + // (11 unquoted entries exceed the MAX_BOT_TRIGGER_REFERENCES threshold of 10) + const alreadyQuoted = "`fixes #1`"; + const unquoted = Array.from({ length: 11 }, (_, i) => `fixes #${i + 2}`).join(" "); + const input = `${alreadyQuoted} ${unquoted}`; + const result = sanitizeContent(input); + // The already-quoted entry must not be double-quoted + expect(result).not.toContain("``fixes #1``"); + expect(result).toContain("`fixes #1`"); + // The first 10 unquoted entries are left unchanged (only the 11th is wrapped) + for (let i = 2; i <= 11; i++) { + expect(result).not.toContain(`\`fixes #${i}\``); + } + // The 12th entry (11th unquoted) is wrapped + expect(result).toContain("`fixes #12`"); }); }); @@ -1063,7 +1092,7 @@ describe("sanitize_content.cjs", () => { expect(result).toContain("https://github.com"); expect(result).not.toContain("