Skip to content
Merged
5 changes: 5 additions & 0 deletions .changeset/patch-max-bot-mentions.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 16 additions & 4 deletions actions/setup/js/collect_ndjson_output.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const { repairJson, sanitizePrototypePollution } = require("./json_repair_helper
const { AGENT_OUTPUT_FILENAME, TMP_GH_AW_PATH } = require("./constants.cjs");
const { ERR_API, ERR_PARSE } = require("./error_codes.cjs");
const { isPayloadUserBot } = require("./resolve_mentions.cjs");
const { parseIntTemplatable } = require("./templatable.cjs");

async function main() {
try {
Expand Down Expand Up @@ -36,6 +37,10 @@ async function main() {
// This determines which @mentions are allowed in the agent output
const allowedMentions = await resolveAllowedMentionsFromPayload(context, github, core, mentionsConfig);

// maxBotMentions is populated after safeOutputsConfig is read below
/** @type {number | undefined} */
let maxBotMentions;

function validateFieldWithInputSchema(value, fieldName, inputSchema, lineNum) {
if (inputSchema.required && (value === undefined || value === null)) {
return {
Expand All @@ -59,7 +64,7 @@ async function main() {
error: `Line ${lineNum}: ${fieldName} must be a string`,
};
}
normalizedValue = sanitizeContent(value, { allowedAliases: allowedMentions });
normalizedValue = sanitizeContent(value, { allowedAliases: allowedMentions, maxBotMentions });
break;
case "boolean":
if (typeof value !== "boolean") {
Expand Down Expand Up @@ -90,11 +95,11 @@ async function main() {
error: `Line ${lineNum}: ${fieldName} must be one of: ${inputSchema.options.join(", ")}`,
};
}
normalizedValue = sanitizeContent(value, { allowedAliases: allowedMentions });
normalizedValue = sanitizeContent(value, { allowedAliases: allowedMentions, maxBotMentions });
break;
default:
if (typeof value === "string") {
normalizedValue = sanitizeContent(value, { allowedAliases: allowedMentions });
normalizedValue = sanitizeContent(value, { allowedAliases: allowedMentions, maxBotMentions });
}
break;
}
Expand Down Expand Up @@ -195,6 +200,13 @@ async function main() {
expectedOutputTypes = Object.fromEntries(Object.entries(safeOutputsConfig).map(([key, value]) => [key.replace(/-/g, "_"), value]));
core.info(`[INGESTION] Expected output types after normalization: ${JSON.stringify(Object.keys(expectedOutputTypes))}`);
core.info(`[INGESTION] Expected output types full config: ${JSON.stringify(expectedOutputTypes)}`);
// Extract max-bot-mentions from config (defaults to undefined, using neutralizeBotTriggers default)
const rawMaxBotMentions = parseIntTemplatable(expectedOutputTypes.max_bot_mentions, 0);
if (rawMaxBotMentions > 0) {
maxBotMentions = rawMaxBotMentions;
}
// Remove global config keys so they are not treated as valid output types
delete expectedOutputTypes.max_bot_mentions;
} catch (error) {
const errorMsg = getErrorMessage(error);
core.info(`Warning: Could not parse safe-outputs config: ${errorMsg}`);
Expand Down Expand Up @@ -282,7 +294,7 @@ async function main() {

// Use the validation engine to validate the item
if (hasValidationConfig(itemType)) {
const validationResult = validateItem(item, itemType, i + 1, { allowedAliases: allowedMentions });
const validationResult = validateItem(item, itemType, i + 1, { allowedAliases: allowedMentions, maxBotMentions });
if (!validationResult.isValid) {
if (validationResult.error) {
errors.push(validationResult.error);
Expand Down
4 changes: 2 additions & 2 deletions actions/setup/js/collect_ndjson_output.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -1228,7 +1228,7 @@ describe("collect_ndjson_output.cjs", () => {
(fs.mkdirSync("/opt/gh-aw/safeoutputs", { recursive: !0 }), fs.writeFileSync(configPath, __config), await eval(`(async () => { ${collectScript}; await main(); })()`));
const outputCall = mockCore.setOutput.mock.calls.find(call => "output" === call[0]),
parsedOutput = JSON.parse(outputCall[1]);
expect(parsedOutput.items[0].body).toBe("This `fixes #123` and `closes #456`, also `resolves #789`");
expect(parsedOutput.items[0].body).toBe("This fixes #123 and closes #456, also resolves #789");
}),
it("should remove ANSI escape sequences", async () => {
const testFile = "/tmp/gh-aw/test-ndjson-output.txt",
Expand Down Expand Up @@ -1488,7 +1488,7 @@ describe("collect_ndjson_output.cjs", () => {
outputCall = setOutputCalls.find(call => "output" === call[0]);
expect(outputCall).toBeDefined();
const parsedOutput = JSON.parse(outputCall[1]);
(expect(parsedOutput.items).toHaveLength(1), expect(parsedOutput.items[0].message).toContain("`@mention`"), expect(parsedOutput.items[0].message).toContain("`fixes #123`"));
(expect(parsedOutput.items).toHaveLength(1), expect(parsedOutput.items[0].message).toContain("`@mention`"), expect(parsedOutput.items[0].message).toContain("fixes #123"));
}),
it("should handle multiple noop messages", async () => {
const testFile = "/tmp/gh-aw/test-ndjson-output.txt",
Expand Down
8 changes: 4 additions & 4 deletions actions/setup/js/compute_text.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ const mockCore = {
const result = sanitizeIncomingTextFunction("Hello @user and @org/team");
(expect(result).toContain("`@user`"), expect(result).toContain("`@org/team`"));
}),
it("should neutralize bot trigger phrases", () => {
const result = sanitizeIncomingTextFunction("This fixes #123 and closes #456");
(expect(result).toContain("`fixes #123`"), expect(result).toContain("`closes #456`"));
it("should neutralize bot trigger phrases when count exceeds threshold", () => {
const result = sanitizeIncomingTextFunction("fixes #1 closes #2 resolves #3 fixes #4 closes #5 resolves #6 fixes #7 closes #8 resolves #9 fixes #10 closes #11");
(expect(result).not.toContain("`fixes #1`"), expect(result).toContain("`closes #11`"));
}),
it("should remove control characters", () => {
const result = sanitizeIncomingTextFunction("Hello\0\bworld");
Expand Down Expand Up @@ -203,7 +203,7 @@ const mockCore = {
it("should sanitize extracted text before output", async () => {
((mockContext.eventName = "issues"), (mockContext.payload = { issue: { title: "Test @user fixes #123", body: "Visit https://evil.com" } }), await testMain());
const outputCall = mockCore.setOutput.mock.calls[0];
(expect(outputCall[1]).toContain("`@user`"), expect(outputCall[1]).toContain("`fixes #123`"), expect(outputCall[1]).toContain("/redacted"));
(expect(outputCall[1]).toContain("`@user`"), expect(outputCall[1]).toContain("fixes #123"), expect(outputCall[1]).toContain("/redacted"));
}),
it("should handle missing title and body gracefully", async () => {
((mockContext.eventName = "issues"),
Expand Down
13 changes: 9 additions & 4 deletions actions/setup/js/safe_output_type_validator.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ const MAX_BODY_LENGTH = 65000;
*/
const MAX_GITHUB_USERNAME_LENGTH = 39;

/**
* @typedef {{ allowedAliases?: string[], maxBotMentions?: number }} ValidateOptions
*/

/**
* @typedef {Object} FieldValidation
* @property {boolean} [required] - Whether the field is required
Expand Down Expand Up @@ -241,8 +245,7 @@ function validateIssueNumberOrTemporaryId(value, fieldName, lineNum) {
* @param {FieldValidation} validation - The validation configuration
* @param {string} itemType - The item type for error messages
* @param {number} lineNum - Line number for error messages
* @param {Object} [options] - Optional sanitization options
* @param {string[]} [options.allowedAliases] - List of allowed @mentions
* @param {ValidateOptions} [options] - Optional sanitization options
* @returns {{isValid: boolean, normalizedValue?: any, error?: string}}
*/
function validateField(value, fieldName, validation, itemType, lineNum, options) {
Expand Down Expand Up @@ -338,6 +341,7 @@ function validateField(value, fieldName, validation, itemType, lineNum, options)
normalizedResult = sanitizeContent(normalizedResult, {
maxLength: validation.maxLength,
allowedAliases: options?.allowedAliases || [],
maxBotMentions: options?.maxBotMentions,
});
}
return { isValid: true, normalizedValue: normalizedResult };
Expand All @@ -350,6 +354,7 @@ function validateField(value, fieldName, validation, itemType, lineNum, options)
const sanitized = sanitizeContent(processedValue, {
maxLength: validation.maxLength || MAX_BODY_LENGTH,
allowedAliases: options?.allowedAliases || [],
maxBotMentions: options?.maxBotMentions,
});
return { isValid: true, normalizedValue: sanitized };
}
Expand Down Expand Up @@ -389,6 +394,7 @@ function validateField(value, fieldName, validation, itemType, lineNum, options)
? sanitizeContent(item, {
maxLength: validation.itemMaxLength || 128,
allowedAliases: options?.allowedAliases || [],
maxBotMentions: options?.maxBotMentions,
})
: item
);
Expand Down Expand Up @@ -480,8 +486,7 @@ function executeCustomValidation(item, customValidation, lineNum, itemType) {
* @param {Object} item - The item to validate
* @param {string} itemType - The item type (e.g., "create_issue")
* @param {number} lineNum - Line number for error messages
* @param {Object} [options] - Optional sanitization options
* @param {string[]} [options.allowedAliases] - List of allowed @mentions
* @param {ValidateOptions} [options] - Optional sanitization options
* @returns {{isValid: boolean, normalizedItem?: Object, error?: string}}
*/
function validateItem(item, itemType, lineNum, options) {
Expand Down
8 changes: 6 additions & 2 deletions actions/setup/js/sanitize_content.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ const { balanceCodeRegions } = require("./markdown_code_region_balancer.cjs");
* @typedef {Object} SanitizeOptions
* @property {number} [maxLength] - Maximum length of content (default: 524288)
* @property {string[]} [allowedAliases] - List of aliases (@mentions) that should not be neutralized
* @property {number} [maxBotMentions] - Maximum bot trigger references before filtering (default: 10)
*/

/**
Expand All @@ -44,18 +45,21 @@ function sanitizeContent(content, maxLengthOrOptions) {
let maxLength;
/** @type {string[]} */
let allowedAliasesLowercase = [];
/** @type {number | undefined} */
let maxBotMentions;

if (typeof maxLengthOrOptions === "number") {
maxLength = maxLengthOrOptions;
} else if (maxLengthOrOptions && typeof maxLengthOrOptions === "object") {
maxLength = maxLengthOrOptions.maxLength;
// Pre-process allowed aliases to lowercase for efficient comparison
allowedAliasesLowercase = (maxLengthOrOptions.allowedAliases || []).map(alias => alias.toLowerCase());
maxBotMentions = maxLengthOrOptions.maxBotMentions;
}

// If no allowed aliases specified, use core sanitization (which neutralizes all mentions)
if (allowedAliasesLowercase.length === 0) {
return sanitizeContentCore(content, maxLength);
return sanitizeContentCore(content, maxLength, maxBotMentions);
}

// If allowed aliases are specified, we need custom mention filtering
Expand Down Expand Up @@ -103,7 +107,7 @@ function sanitizeContent(content, maxLengthOrOptions) {
sanitized = neutralizeGitHubReferences(sanitized, allowedGitHubRefs);

// Neutralize bot triggers
sanitized = neutralizeBotTriggers(sanitized);
sanitized = neutralizeBotTriggers(sanitized, maxBotMentions);

// Balance markdown code regions to fix improperly nested fences
// This repairs markdown where AI models generate nested code blocks at the same indentation
Expand Down
51 changes: 40 additions & 11 deletions actions/setup/js/sanitize_content.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -746,32 +746,61 @@ describe("sanitize_content.cjs", () => {
});

describe("bot trigger neutralization", () => {
it("should neutralize 'fixes #123' patterns", () => {
it("should not neutralize 'fixes #123' when there are 10 or fewer references", () => {
const result = sanitizeContent("This fixes #123");
expect(result).toBe("This `fixes #123`");
expect(result).toBe("This fixes #123");
});

it("should neutralize 'closes #456' patterns", () => {
it("should not neutralize 'closes #456' when there are 10 or fewer references", () => {
const result = sanitizeContent("PR closes #456");
expect(result).toBe("PR `closes #456`");
expect(result).toBe("PR closes #456");
});

it("should neutralize 'resolves #789' patterns", () => {
it("should not neutralize 'resolves #789' when there are 10 or fewer references", () => {
const result = sanitizeContent("This resolves #789");
expect(result).toBe("This `resolves #789`");
expect(result).toBe("This resolves #789");
});

it("should handle various bot trigger verbs", () => {
it("should not neutralize various bot trigger verbs when count is within limit", () => {
const triggers = ["fix", "fixes", "close", "closes", "resolve", "resolves"];
triggers.forEach(verb => {
const result = sanitizeContent(`This ${verb} #123`);
expect(result).toBe(`This \`${verb} #123\``);
expect(result).toBe(`This ${verb} #123`);
});
});

it("should neutralize alphanumeric issue references", () => {
it("should not neutralize alphanumeric issue references when count is within limit", () => {
const result = sanitizeContent("fixes #abc123def");
expect(result).toBe("`fixes #abc123def`");
expect(result).toBe("fixes #abc123def");
});

it("should neutralize excess references beyond the 10-occurrence threshold", () => {
const input = Array.from({ length: 11 }, (_, i) => `fixes #${i + 1}`).join(" ");
const result = sanitizeContent(input);
// First 10 are left unchanged
for (let i = 1; i <= 10; i++) {
expect(result).not.toContain(`\`fixes #${i}\``);
}
// 11th is wrapped
expect(result).toContain("`fixes #11`");
});

it("should not requote already-quoted entries", () => {
// Build a string with 12 entries where one is already quoted and 11 are unquoted
// (11 unquoted entries exceed the MAX_BOT_TRIGGER_REFERENCES threshold of 10)
const alreadyQuoted = "`fixes #1`";
const unquoted = Array.from({ length: 11 }, (_, i) => `fixes #${i + 2}`).join(" ");
const input = `${alreadyQuoted} ${unquoted}`;
const result = sanitizeContent(input);
// The already-quoted entry must not be double-quoted
expect(result).not.toContain("``fixes #1``");
expect(result).toContain("`fixes #1`");
// The first 10 unquoted entries are left unchanged (only the 11th is wrapped)
for (let i = 2; i <= 11; i++) {
expect(result).not.toContain(`\`fixes #${i}\``);
}
// The 12th entry (11th unquoted) is wrapped
expect(result).toContain("`fixes #12`");
});
});

Expand Down Expand Up @@ -1063,7 +1092,7 @@ describe("sanitize_content.cjs", () => {
expect(result).toContain("https://github.com");
expect(result).not.toContain("<script>");
expect(result).toContain("(script)");
expect(result).toContain("`fixes #123`");
expect(result).toContain("fixes #123");
expect(result).not.toContain("\x1b[31m");
expect(result).toContain("Red text");
});
Expand Down
38 changes: 31 additions & 7 deletions actions/setup/js/sanitize_content_core.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -406,13 +406,35 @@ function convertXmlTags(s) {
}

/**
* Neutralizes bot trigger phrases by wrapping them in backticks
* Maximum number of bot trigger references allowed before filtering is applied.
*/
const MAX_BOT_TRIGGER_REFERENCES = 10;

/**
* Neutralizes bot trigger phrases by wrapping them in backticks.
* The first `maxBotMentions` unquoted trigger references are left unchanged;
* any occurrences beyond that threshold are wrapped in backticks.
* Already-quoted entries are never re-quoted.
* @param {string} s - The string to process
* @returns {string} The string with neutralized bot triggers
* @param {number} [maxBotMentions] - Number of occurrences to allow before escaping (default: MAX_BOT_TRIGGER_REFERENCES)
* @returns {string} The string with excess bot triggers neutralized
*/
function neutralizeBotTriggers(s) {
// Neutralize common bot trigger phrases like "fixes #123", "closes #asdfs", etc.
return s.replace(/\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi, (match, action, ref) => `\`${action} #${ref}\``);
function neutralizeBotTriggers(s, maxBotMentions = MAX_BOT_TRIGGER_REFERENCES) {
// Match unquoted bot trigger phrases like "fixes #123", "closes #asdfs", etc.
// The negative lookbehind (?<!`) skips already-quoted entries.
const pattern = /(?<!`)\b(fixes?|closes?|resolves?|fix|close|resolve)\s+#(\w+)/gi;
const matches = s.match(pattern);
if (!matches || matches.length <= maxBotMentions) {
return s;
}
let count = 0;
return s.replace(pattern, (match, action, ref) => {
count++;
if (count <= maxBotMentions) {
return match;
}
return `\`${action} #${ref}\``;
});
}

/**
Expand Down Expand Up @@ -717,9 +739,10 @@ function hardenUnicodeText(text) {
* Core sanitization function without mention filtering
* @param {string} content - The content to sanitize
* @param {number} [maxLength] - Maximum length of content (default: 524288)
* @param {number} [maxBotMentions] - Max bot trigger references before filtering (default: MAX_BOT_TRIGGER_REFERENCES)
* @returns {string} The sanitized content
*/
function sanitizeContentCore(content, maxLength) {
function sanitizeContentCore(content, maxLength, maxBotMentions) {
if (!content || typeof content !== "string") {
return "";
}
Expand Down Expand Up @@ -768,7 +791,7 @@ function sanitizeContentCore(content, maxLength) {
sanitized = neutralizeGitHubReferences(sanitized, allowedGitHubRefs);

// Neutralize common bot trigger phrases
sanitized = neutralizeBotTriggers(sanitized);
sanitized = neutralizeBotTriggers(sanitized, maxBotMentions);

// Neutralize template syntax delimiters (defense-in-depth)
// This prevents potential issues if content is processed by downstream template engines
Expand Down Expand Up @@ -801,6 +824,7 @@ module.exports = {
removeXmlComments,
convertXmlTags,
neutralizeBotTriggers,
MAX_BOT_TRIGGER_REFERENCES,
neutralizeTemplateDelimiters,
applyTruncation,
hardenUnicodeText,
Expand Down
Loading
Loading