From fefc1287831bab607d587877dc9034a91f4658d0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 22 May 2026 03:15:50 +0000 Subject: [PATCH] chore: sync actions from gh-aw@v0.75.0 --- setup/js/awf_reflect.cjs | 5 +- setup/js/close_discussion.cjs | 4 +- setup/js/close_entity_helpers.cjs | 4 +- setup/js/create_pull_request.cjs | 279 +++-------------------- setup/js/create_pull_request_helpers.cjs | 273 ++++++++++++++++++++++ setup/js/effective_tokens.cjs | 56 +++-- setup/js/emit_outcome_spans.cjs | 39 +++- setup/js/evaluate_outcomes.cjs | 64 ++++++ setup/js/generate_safe_outputs_tools.cjs | 6 +- setup/js/model_multipliers.json | 48 +++- setup/js/pi_provider.cjs | 34 +-- setup/js/safe_output_handler_manager.cjs | 30 +++ setup/js/safe_outputs_action_outputs.cjs | 12 +- setup/js/send_otlp_span.cjs | 63 ++++- setup/sh/mask_otlp_attributes.sh | 53 +++++ 15 files changed, 652 insertions(+), 318 deletions(-) create mode 100644 setup/js/create_pull_request_helpers.cjs create mode 100755 setup/sh/mask_otlp_attributes.sh diff --git a/setup/js/awf_reflect.cjs b/setup/js/awf_reflect.cjs index dad907b9..2a0e0273 100644 --- a/setup/js/awf_reflect.cjs +++ b/setup/js/awf_reflect.cjs @@ -142,8 +142,9 @@ async function enrichReflectModels(reflectData, timeoutMs, logger) { /** * Fetch the AWF API proxy /reflect endpoint and persist the response to disk. * - * The /reflect endpoint is exposed by the api-proxy sidecar on its management port (10000) - * and returns the list of configured LLM providers together with their available model lists. + * The /reflect endpoint is exposed by the api-proxy sidecar on each started provider port. + * The active provider's gateway port should be used rather than a hardcoded port, since + * port 10000 (the OpenAI sidecar) is only started when OpenAI credentials are configured. * This information is saved to AWF_REFLECT_OUTPUT_PATH so the post-run GitHub Actions step * (awf_reflect_summary.cjs) can include it in the step summary without requiring the * containers to still be running. diff --git a/setup/js/close_discussion.cjs b/setup/js/close_discussion.cjs index 711a46b5..08483755 100644 --- a/setup/js/close_discussion.cjs +++ b/setup/js/close_discussion.cjs @@ -284,9 +284,7 @@ async function main(config = {}) { if (!allowBody) { // allow-body: false — drop any body the agent provided and close without a comment if (item.body) { - core.warning( - `close_discussion: allow-body is false — dropping non-empty body (length=${item.body.length}) and closing without a comment` - ); + core.warning(`close_discussion: allow-body is false — dropping non-empty body (length=${item.body.length}) and closing without a comment`); } else { core.info("close_discussion: allow-body is false — closing without a comment"); } diff --git a/setup/js/close_entity_helpers.cjs b/setup/js/close_entity_helpers.cjs index bf960b1e..b917b2d9 100644 --- a/setup/js/close_entity_helpers.cjs +++ b/setup/js/close_entity_helpers.cjs @@ -302,9 +302,7 @@ function createCloseEntityHandler(config, entityConfig, callbacks, githubClient) if (!allowBody) { // allow-body: false — drop any body the agent provided and skip the comment if (typeof item.body === "string" && item.body.trim() !== "") { - core.warning( - `${entityConfig.itemType}: allow-body is false — dropping non-empty body (length=${item.body.length}) and closing without a comment` - ); + core.warning(`${entityConfig.itemType}: allow-body is false — dropping non-empty body (length=${item.body.length}) and closing without a comment`); } else { core.info(`${entityConfig.itemType}: allow-body is false — closing without a comment`); } diff --git a/setup/js/create_pull_request.cjs b/setup/js/create_pull_request.cjs index 3031f792..50d4a08c 100644 --- a/setup/js/create_pull_request.cjs +++ b/setup/js/create_pull_request.cjs @@ -27,16 +27,32 @@ const { getBaseBranch } = require("./get_base_branch.cjs"); const { createAuthenticatedGitHubClient } = require("./handler_auth.cjs"); const { buildWorkflowRunUrl } = require("./workflow_metadata_helpers.cjs"); const { checkFileProtection } = require("./manifest_file_helpers.cjs"); -const { renderTemplateFromFile, buildProtectedFileList, encodePathSegments, getPromptPath } = require("./messages_core.cjs"); -const { COPILOT_REVIEWER_BOT, FAQ_CREATE_PR_PERMISSIONS_URL, MAX_ASSIGNEES } = require("./constants.cjs"); +const { renderTemplateFromFile, buildProtectedFileList, getPromptPath } = require("./messages_core.cjs"); +const { COPILOT_REVIEWER_BOT, FAQ_CREATE_PR_PERMISSIONS_URL } = require("./constants.cjs"); const { isStagedMode } = require("./safe_output_helpers.cjs"); -const { withRetry, isTransientError, RATE_LIMIT_RETRY_CONFIG } = require("./error_recovery.cjs"); -const { tryEnforceArrayLimit } = require("./limit_enforcement_helpers.cjs"); +const { withRetry, RATE_LIMIT_RETRY_CONFIG } = require("./error_recovery.cjs"); const { findAgent, getIssueDetails, assignAgentToIssue } = require("./assign_agent_helpers.cjs"); -const { globPatternToRegex } = require("./glob_pattern_helpers.cjs"); const { ensureFullHistoryForBundle, extractBundlePrerequisiteCommits } = require("./git_helpers.cjs"); const { parseDiffGitHeader: parseDiffGitHeaderPaths, extractDiffGitHeaderEntries } = require("./patch_path_helpers.cjs"); const { resolveAllowedMentionsFromPayload } = require("./resolve_mentions_from_payload.cjs"); +const { + MANAGED_FALLBACK_ISSUE_LABEL, + LABEL_MAX_RETRIES, + LABEL_INITIAL_DELAY_MS, + LABEL_MAX_DELAY_MS, + summarizeListForLog, + createBundleTempRef, + isLabelTransientError, + parseAllowedBaseBranches, + isBaseBranchAllowed, + parseStringListConfig, + mergeFallbackIssueLabels, + sanitizeFallbackAssignees, + neutralizeClosingKeywordsForIssueBody, + generatePatchPreview, + buildManifestProtectionCreatePrUrl, + renderManifestProtectionFallbackBody, +} = require("./create_pull_request_helpers.cjs"); /** * @typedef {import('./types/handler-factory').HandlerFactoryFunction} HandlerFactoryFunction @@ -68,35 +84,8 @@ async function createCopilotAssignmentClient(config) { /** @type {string} Safe output type handled by this module */ const HANDLER_TYPE = "create_pull_request"; -/** @type {string} Label always added to fallback issues so the triage system can find them */ -const MANAGED_FALLBACK_ISSUE_LABEL = "agentic-workflows"; - -/** - * Creates a temporary refs/bundles ref for applying create_pull_request bundles. - * Branch names are sanitized for ref compatibility, and a short crypto-random - * suffix avoids collisions between branches that sanitize to the same value. - * - * @param {string} branchName - Target branch name - * @returns {string} Temporary bundle ref name - */ -function createBundleTempRef(branchName) { - const suffix = crypto.randomBytes(4).toString("hex"); - return `refs/bundles/create-pr-${branchName.replace(/[^a-zA-Z0-9-]/g, "-")}-${suffix}`; -} - -/** - * Summarize a list for log output to avoid excessively long lines. - * @param {string[]} values - * @param {number} limit - * @returns {string} - */ -function summarizeListForLog(values, limit = 10) { - if (!Array.isArray(values) || values.length === 0) { - return "(none)"; - } - const preview = values.slice(0, limit).join(", "); - return values.length > limit ? `${preview} ... and ${values.length - limit} more` : preview; -} +// NOTE: MANAGED_FALLBACK_ISSUE_LABEL, createBundleTempRef, and summarizeListForLog +// are imported from create_pull_request_helpers.cjs above. /** * Attempt automatic recovery for git am add/add conflicts by preferring the patch version. @@ -319,131 +308,11 @@ async function rewriteBundleBranchAsSingleCommit(baseBranch, execApi) { } } -/** - * Determines if a label API error is transient and worth retrying. - * Returns true for: - * - The GitHub race condition where a newly-created PR's node ID is not immediately - * resolvable via the REST/GraphQL bridge (unprocessable validation error). - * - Any standard transient error matched by {@link isTransientError} (network issues, - * rate limits, 5xx gateway errors, etc.). - * @param {any} error - The error to check - * @returns {boolean} True if the error is transient and should be retried - */ -function isLabelTransientError(error) { - const msg = getErrorMessage(error); - if (msg.includes("Could not resolve to a node with the global id")) { - return true; - } - return isTransientError(error); -} - -/** @type {number} Number of retry attempts for label operations */ -const LABEL_MAX_RETRIES = 5; -/** @type {number} Base delay in ms used to calculate label retry backoff (3 seconds) */ -const LABEL_INITIAL_DELAY_MS = 3000; -/** @type {number} Maximum delay in ms between label retries (30 seconds) */ -const LABEL_MAX_DELAY_MS = 30000; - -/** - * Parse allowed base branch patterns from config value (array or comma-separated string) - * @param {string[]|string|undefined} allowedBaseBranchesValue - * @returns {Set} - */ -function parseAllowedBaseBranches(allowedBaseBranchesValue) { - const set = new Set(); - if (Array.isArray(allowedBaseBranchesValue)) { - allowedBaseBranchesValue - .map(branch => String(branch).trim()) - .filter(Boolean) - .forEach(branch => set.add(branch)); - } else if (typeof allowedBaseBranchesValue === "string") { - allowedBaseBranchesValue - .split(",") - .map(branch => branch.trim()) - .filter(Boolean) - .forEach(branch => set.add(branch)); - } - return set; -} - -/** - * Check if a base branch matches an allowed pattern. - * Supports exact matches and "*" glob patterns (e.g. "release/*"). - * @param {string} baseBranch - * @param {Set} allowedBaseBranches - * @returns {boolean} - */ -function isBaseBranchAllowed(baseBranch, allowedBaseBranches) { - if (allowedBaseBranches.has(baseBranch)) { - return true; - } - for (const pattern of allowedBaseBranches) { - if (pattern === "*") { - return true; - } - if (pattern.includes("*") && globPatternToRegex(pattern, { pathMode: true, caseSensitive: true }).test(baseBranch)) { - return true; - } - } - return false; -} - -/** - * Parse config values that may be arrays or comma-separated strings. - * @param {string[]|string|undefined} value - * @returns {string[]} - */ -function parseStringListConfig(value) { - if (!value) { - return []; - } - const raw = Array.isArray(value) ? value : String(value).split(","); - return raw.map(item => String(item).trim()).filter(Boolean); -} - -/** - * Merges the required fallback label with any workflow-configured labels, - * deduplicating and filtering empty values. - * @param {string[]} [labels] - * @returns {string[]} - */ -function mergeFallbackIssueLabels(labels = []) { - const normalizedLabels = labels - .filter(label => !!label) - .map(label => String(label).trim()) - .filter(label => label); - return [...new Set([MANAGED_FALLBACK_ISSUE_LABEL, ...normalizedLabels])]; -} - -/** - * Sanitizes configured assignees for fallback issue creation. - * Filters invalid values, removes the special "copilot" username (not a valid GitHub user - * for issue assignment), and enforces the MAX_ASSIGNEES limit. - * Returns null (no assignees field) if the sanitized list is empty. - * @param {string[]} assignees - Raw assignees from config - * @returns {string[] | null} Sanitized assignees or null if none remain - */ -function sanitizeFallbackAssignees(assignees) { - if (!assignees || assignees.length === 0) { - return null; - } - const sanitized = assignees - .filter(a => typeof a === "string") - .map(a => a.trim()) - .filter(a => a.length > 0 && a.toLowerCase() !== "copilot"); - - if (sanitized.length === 0) { - return null; - } - - const limitResult = tryEnforceArrayLimit(sanitized, MAX_ASSIGNEES, "assignees"); - if (!limitResult.success) { - core.warning(`Assignees limit exceeded for fallback issue: ${limitResult.error}. Using first ${MAX_ASSIGNEES}.`); - return sanitized.slice(0, MAX_ASSIGNEES); - } - - return sanitized; -} +// NOTE: isLabelTransientError, LABEL_MAX_RETRIES, LABEL_INITIAL_DELAY_MS, LABEL_MAX_DELAY_MS, +// parseAllowedBaseBranches, isBaseBranchAllowed, parseStringListConfig, mergeFallbackIssueLabels, +// sanitizeFallbackAssignees, neutralizeClosingKeywordsForIssueBody, generatePatchPreview, +// buildManifestProtectionCreatePrUrl, and renderManifestProtectionFallbackBody +// are imported from create_pull_request_helpers.cjs above. /** * Creates a fallback GitHub issue, retrying on rate-limit and other transient errors @@ -494,61 +363,6 @@ async function createFallbackIssue(githubClient, repoParts, title, body, labels, ); } -/** - * Builds a compare URL used in protected-files fallback issue bodies. - * Optionally appends a prefilled PR body that closes the fallback issue. - * @param {string} githubServer - * @param {{owner: string, repo: string}} repoParts - * @param {string} baseBranch - * @param {string} branchName - * @param {string} title - * @param {number} [fallbackIssueNumber] - * @returns {string} - */ -function buildManifestProtectionCreatePrUrl(githubServer, repoParts, baseBranch, branchName, title, fallbackIssueNumber) { - const encodedBase = encodePathSegments(baseBranch); - const encodedHead = encodePathSegments(branchName); - let createPrUrl = `${githubServer}/${repoParts.owner}/${repoParts.repo}/compare/${encodedBase}...${encodedHead}?expand=1&title=${encodeURIComponent(title)}`; - if (typeof fallbackIssueNumber === "number") { - createPrUrl += `&body=${encodeURIComponent(`Closes #${fallbackIssueNumber}`)}`; - } - return createPrUrl; -} - -/** - * Renders protected-files fallback issue body with a prefilled compare URL. - * @param {string} mainBodyContent - * @param {string} footerContent - * @param {string} fileList - * @param {string} createPrUrl - * @returns {string} - */ -function renderManifestProtectionFallbackBody(mainBodyContent, footerContent, fileList, createPrUrl) { - const templatePath = getPromptPath("manifest_protection_create_pr_fallback.md"); - return renderTemplateFromFile(templatePath, { - main_body: mainBodyContent, - footer: footerContent, - files: fileList, - create_pr_url: createPrUrl, - }); -} - -/** - * Neutralizes issue-closing keywords in body text to avoid unintended cross-issue closure - * when PR content is reused in fallback issue bodies. - * - * Example: "Closes #123" -> "Closes \\#123" - * - * @param {string} content - * @returns {string} - */ -function neutralizeClosingKeywordsForIssueBody(content) { - if (!content) { - return content; - } - return String(content).replace(/\b(fix|fixes|fixed|close|closes|closed|resolve|resolves|resolved)\s+((?:[a-z0-9_.-]+\/[a-z0-9_.-]+)?#\d+)\b/gi, (_match, keyword, issueRef) => `${keyword} ${String(issueRef).replace("#", "\\#")}`); -} - /** * Maximum limits for pull request parameters to prevent resource exhaustion. * These limits align with GitHub's API constraints and security best practices. @@ -632,35 +446,7 @@ function enforcePullRequestLimits(patchContent, maxFiles = MAX_FILES) { } } -/** - * Generate a patch preview with max 500 lines and 2000 chars for issue body - * @param {string} patchContent - The full patch content - * @returns {string} Formatted patch preview - */ -function generatePatchPreview(patchContent) { - if (!patchContent || !patchContent.trim()) { - return ""; - } - - const lines = patchContent.split("\n"); - const maxLines = 500; - const maxChars = 2000; - - // Apply line limit first - let preview = lines.length <= maxLines ? patchContent : lines.slice(0, maxLines).join("\n"); - const lineTruncated = lines.length > maxLines; - - // Apply character limit - const charTruncated = preview.length > maxChars; - if (charTruncated) { - preview = preview.slice(0, maxChars); - } - - const truncated = lineTruncated || charTruncated; - const summary = truncated ? `Show patch preview (${Math.min(maxLines, lines.length)} of ${lines.length} lines)` : `Show patch (${lines.length} lines)`; - - return `\n\n
${summary}\n\n\`\`\`diff\n${preview}${truncated ? "\n... (truncated)" : ""}\n\`\`\`\n\n
`; -} +// NOTE: generatePatchPreview is imported from create_pull_request_helpers.cjs above. /** * Check whether the remote branch already exists and, if so, either reuse it @@ -2303,10 +2089,11 @@ ${patchPreview}`; // Return success with PR details return { success: true, - pull_request_number: pullRequest.number, - pull_request_url: pullRequest.html_url, + number: pullRequest.number, + url: pullRequest.html_url, + managedBody: body, branch_name: branchName, - temporary_id: temporaryId, + temporaryId: temporaryId, repo: itemRepo, }; } catch (prError) { diff --git a/setup/js/create_pull_request_helpers.cjs b/setup/js/create_pull_request_helpers.cjs new file mode 100644 index 00000000..a634f146 --- /dev/null +++ b/setup/js/create_pull_request_helpers.cjs @@ -0,0 +1,273 @@ +// @ts-check +/// + +/** @type {typeof import("crypto")} */ +const crypto = require("crypto"); +const { globPatternToRegex } = require("./glob_pattern_helpers.cjs"); +const { getErrorMessage } = require("./error_helpers.cjs"); +const { isTransientError } = require("./error_recovery.cjs"); +const { tryEnforceArrayLimit } = require("./limit_enforcement_helpers.cjs"); +const { MAX_ASSIGNEES } = require("./constants.cjs"); +const { encodePathSegments, renderTemplateFromFile, getPromptPath } = require("./messages_core.cjs"); + +/** @type {string} Label always added to fallback issues so the triage system can find them */ +const MANAGED_FALLBACK_ISSUE_LABEL = "agentic-workflows"; + +/** @type {number} Number of retry attempts for label operations */ +const LABEL_MAX_RETRIES = 5; +/** @type {number} Base delay in ms used to calculate label retry backoff (3 seconds) */ +const LABEL_INITIAL_DELAY_MS = 3000; +/** @type {number} Maximum delay in ms between label retries (30 seconds) */ +const LABEL_MAX_DELAY_MS = 30000; + +/** + * Summarize a list for log output to avoid excessively long lines. + * @param {string[]} values + * @param {number} limit + * @returns {string} + */ +function summarizeListForLog(values, limit = 10) { + if (!Array.isArray(values) || values.length === 0) { + return "(none)"; + } + const preview = values.slice(0, limit).join(", "); + return values.length > limit ? `${preview} ... and ${values.length - limit} more` : preview; +} + +/** + * Creates a temporary refs/bundles ref for applying create_pull_request bundles. + * Branch names are sanitized for ref compatibility, and a short crypto-random + * suffix avoids collisions between branches that sanitize to the same value. + * + * @param {string} branchName - Target branch name + * @returns {string} Temporary bundle ref name + */ +function createBundleTempRef(branchName) { + const suffix = crypto.randomBytes(4).toString("hex"); + return `refs/bundles/create-pr-${branchName.replace(/[^a-zA-Z0-9-]/g, "-")}-${suffix}`; +} + +/** + * Determines if a label API error is transient and worth retrying. + * Returns true for: + * - The GitHub race condition where a newly-created PR's node ID is not immediately + * resolvable via the REST/GraphQL bridge (unprocessable validation error). + * - Any standard transient error matched by {@link isTransientError} (network issues, + * rate limits, 5xx gateway errors, etc.). + * @param {any} error - The error to check + * @returns {boolean} True if the error is transient and should be retried + */ +function isLabelTransientError(error) { + const msg = getErrorMessage(error); + if (msg.includes("Could not resolve to a node with the global id")) { + return true; + } + return isTransientError(error); +} + +/** + * Parse allowed base branch patterns from config value (array or comma-separated string) + * @param {string[]|string|undefined} allowedBaseBranchesValue + * @returns {Set} + */ +function parseAllowedBaseBranches(allowedBaseBranchesValue) { + const set = new Set(); + if (Array.isArray(allowedBaseBranchesValue)) { + allowedBaseBranchesValue + .map(branch => String(branch).trim()) + .filter(Boolean) + .forEach(branch => set.add(branch)); + } else if (typeof allowedBaseBranchesValue === "string") { + allowedBaseBranchesValue + .split(",") + .map(branch => branch.trim()) + .filter(Boolean) + .forEach(branch => set.add(branch)); + } + return set; +} + +/** + * Check if a base branch matches an allowed pattern. + * Supports exact matches and "*" glob patterns (e.g. "release/*"). + * @param {string} baseBranch + * @param {Set} allowedBaseBranches + * @returns {boolean} + */ +function isBaseBranchAllowed(baseBranch, allowedBaseBranches) { + if (allowedBaseBranches.has(baseBranch)) { + return true; + } + for (const pattern of allowedBaseBranches) { + if (pattern === "*") { + return true; + } + if (pattern.includes("*") && globPatternToRegex(pattern, { pathMode: true, caseSensitive: true }).test(baseBranch)) { + return true; + } + } + return false; +} + +/** + * Parse config values that may be arrays or comma-separated strings. + * @param {string[]|string|undefined} value + * @returns {string[]} + */ +function parseStringListConfig(value) { + if (!value) { + return []; + } + const raw = Array.isArray(value) ? value : String(value).split(","); + return raw.map(item => String(item).trim()).filter(Boolean); +} + +/** + * Merges the required fallback label with any workflow-configured labels, + * deduplicating and filtering empty values. + * @param {string[]} [labels] + * @returns {string[]} + */ +function mergeFallbackIssueLabels(labels = []) { + const normalizedLabels = labels + .filter(label => !!label) + .map(label => String(label).trim()) + .filter(label => label); + return [...new Set([MANAGED_FALLBACK_ISSUE_LABEL, ...normalizedLabels])]; +} + +/** + * Sanitizes configured assignees for fallback issue creation. + * Filters invalid values, removes the special "copilot" username (not a valid GitHub user + * for issue assignment), and enforces the MAX_ASSIGNEES limit. + * Returns null (no assignees field) if the sanitized list is empty. + * @param {string[]} assignees - Raw assignees from config + * @returns {string[] | null} Sanitized assignees or null if none remain + */ +function sanitizeFallbackAssignees(assignees) { + if (!assignees || assignees.length === 0) { + return null; + } + const sanitized = assignees + .filter(a => typeof a === "string") + .map(a => a.trim()) + .filter(a => a.length > 0 && a.toLowerCase() !== "copilot"); + + if (sanitized.length === 0) { + return null; + } + + const limitResult = tryEnforceArrayLimit(sanitized, MAX_ASSIGNEES, "assignees"); + if (!limitResult.success) { + core.warning(`Assignees limit exceeded for fallback issue: ${limitResult.error}. Using first ${MAX_ASSIGNEES}.`); + return sanitized.slice(0, MAX_ASSIGNEES); + } + + return sanitized; +} + +/** + * Neutralizes issue-closing keywords in body text to avoid unintended cross-issue closure + * when PR content is reused in fallback issue bodies. + * + * Example: "Closes #123" -> "Closes \\#123" + * + * @param {string} content + * @returns {string} + */ +function neutralizeClosingKeywordsForIssueBody(content) { + if (!content) { + return content; + } + const closingKeywordPattern = /\b(fix|fixes|fixed|close|closes|closed|resolve|resolves|resolved)\s+((?:[a-z0-9_.-]+\/[a-z0-9_.-]+)?#\d+)\b/gi; + const escapeIssueRef = (_match, keyword, issueRef) => `${keyword} ${String(issueRef).replace("#", "\\#")}`; + return String(content).replace(closingKeywordPattern, escapeIssueRef); +} + +/** + * Generate a patch preview with max 500 lines and 2000 chars for issue body + * @param {string} patchContent - The full patch content + * @returns {string} Formatted patch preview + */ +function generatePatchPreview(patchContent) { + if (!patchContent || !patchContent.trim()) { + return ""; + } + + const lines = patchContent.split("\n"); + const maxLines = 500; + const maxChars = 2000; + + // Apply line limit first + let preview = lines.length <= maxLines ? patchContent : lines.slice(0, maxLines).join("\n"); + const lineTruncated = lines.length > maxLines; + + // Apply character limit + const charTruncated = preview.length > maxChars; + if (charTruncated) { + preview = preview.slice(0, maxChars); + } + + const truncated = lineTruncated || charTruncated; + const summary = truncated ? `Show patch preview (${Math.min(maxLines, lines.length)} of ${lines.length} lines)` : `Show patch (${lines.length} lines)`; + + return `\n\n
${summary}\n\n\`\`\`diff\n${preview}${truncated ? "\n... (truncated)" : ""}\n\`\`\`\n\n
`; +} + +/** + * Builds a compare URL used in protected-files fallback issue bodies. + * Optionally appends a prefilled PR body that closes the fallback issue. + * @param {string} githubServer + * @param {{owner: string, repo: string}} repoParts + * @param {string} baseBranch + * @param {string} branchName + * @param {string} title + * @param {number} [fallbackIssueNumber] + * @returns {string} + */ +function buildManifestProtectionCreatePrUrl(githubServer, repoParts, baseBranch, branchName, title, fallbackIssueNumber) { + const encodedBase = encodePathSegments(baseBranch); + const encodedHead = encodePathSegments(branchName); + let createPrUrl = `${githubServer}/${repoParts.owner}/${repoParts.repo}/compare/${encodedBase}...${encodedHead}?expand=1&title=${encodeURIComponent(title)}`; + if (typeof fallbackIssueNumber === "number") { + createPrUrl += `&body=${encodeURIComponent(`Closes #${fallbackIssueNumber}`)}`; + } + return createPrUrl; +} + +/** + * Renders protected-files fallback issue body with a prefilled compare URL. + * @param {string} mainBodyContent + * @param {string} footerContent + * @param {string} fileList + * @param {string} createPrUrl + * @returns {string} + */ +function renderManifestProtectionFallbackBody(mainBodyContent, footerContent, fileList, createPrUrl) { + const templatePath = getPromptPath("manifest_protection_create_pr_fallback.md"); + return renderTemplateFromFile(templatePath, { + main_body: mainBodyContent, + footer: footerContent, + files: fileList, + create_pr_url: createPrUrl, + }); +} + +module.exports = { + MANAGED_FALLBACK_ISSUE_LABEL, + LABEL_MAX_RETRIES, + LABEL_INITIAL_DELAY_MS, + LABEL_MAX_DELAY_MS, + summarizeListForLog, + createBundleTempRef, + isLabelTransientError, + parseAllowedBaseBranches, + isBaseBranchAllowed, + parseStringListConfig, + mergeFallbackIssueLabels, + sanitizeFallbackAssignees, + neutralizeClosingKeywordsForIssueBody, + generatePatchPreview, + buildManifestProtectionCreatePrUrl, + renderManifestProtectionFallbackBody, +}; diff --git a/setup/js/effective_tokens.cjs b/setup/js/effective_tokens.cjs index 8d5c2d75..2f3f8cd6 100644 --- a/setup/js/effective_tokens.cjs +++ b/setup/js/effective_tokens.cjs @@ -10,7 +10,8 @@ const fs = require("fs"); * docs/src/content/docs/reference/effective-tokens-specification.md. * * Formula: - * base_weighted_tokens = (w_in × I) + (w_cache × C) + (w_out × O) + (w_reason × R) + (w_cache_write × W) + * effective_input_tokens = max(I - C, 0) + * base_weighted_tokens = (w_in × effective_input_tokens) + (w_cache × C) + (w_out × O) + (w_reason × R) + (w_cache_write × W) * effective_tokens = m × base_weighted_tokens * * Token class default weights (from spec Section 4.2): @@ -64,20 +65,24 @@ function getMultipliersData() { _parsedMultipliers = null; return null; } - const weights = { ...defaultTokenClassWeights(), ...(parsed.token_class_weights || {}) }; - // Ensure missing or invalid weights fall back to defaults, but preserve explicit 0 overrides + const defaults = defaultTokenClassWeights(); + const weights = { ...defaults, ...(parsed.token_class_weights || {}) }; + + // Ensure missing or invalid weights fall back to defaults, but preserve explicit 0 overrides for (const key of Object.keys(defaults)) { const value = weights[key]; if (value == null || !Number.isFinite(value)) { weights[key] = defaults[key]; } } + /** @type {Record} */ const multipliers = {}; for (const [model, mult] of Object.entries(parsed.multipliers || {})) { multipliers[model.toLowerCase()] = Number(mult); } + _parsedMultipliers = { token_class_weights: weights, multipliers }; return _parsedMultipliers; } catch { @@ -109,11 +114,11 @@ function getTokenClassWeights() { */ function getModelMultiplier(model) { const data = getMultipliersData(); - if (!data || !model) { + if (!data) { return 1.0; } - const key = model.toLowerCase().trim(); + const key = model?.toLowerCase().trim(); if (!key) { return 1.0; } @@ -126,28 +131,29 @@ function getModelMultiplier(model) { } // Longest prefix match - let best = ""; - let bestMult = 1.0; + let longestMatch = ""; + let longestMatchMultiplier = 1.0; for (const [name, mult] of Object.entries(multipliers)) { - if (key.startsWith(name) && name.length > best.length) { - best = name; - bestMult = mult; + if (key.startsWith(name) && name.length > longestMatch.length) { + longestMatch = name; + longestMatchMultiplier = mult; } } - return bestMult; + return longestMatchMultiplier; } /** * Computes the base weighted token count for a single invocation. * * Formula (base spec Section 4.3 + cache_write implementation extension): - * base = (w_in × I) + (w_cache × C) + (w_out × O) + (w_reason × R) + (w_cache_write × W) + * effective_input = max(I - C, 0) + * base = (w_in × effective_input) + (w_cache × C) + (w_out × O) + (w_reason × R) + (w_cache_write × W) * * Note: cache_write (W) with weight w_cache_write is an implementation extension; * the core spec formula covers I, C, O, and R only. * - * @param {number} inputTokens - Raw input tokens (I) + * @param {number} inputTokens - Raw input tokens (I), including cached input when reported by provider * @param {number} outputTokens - Raw output tokens (O) * @param {number} cacheReadTokens - Cached input tokens (C) * @param {number} cacheWriteTokens - Cache write tokens (W) @@ -156,7 +162,11 @@ function getModelMultiplier(model) { */ function computeBaseWeightedTokens(inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens, reasoningTokens = 0) { const w = getTokenClassWeights(); - return w.input * (inputTokens || 0) + w.cached_input * (cacheReadTokens || 0) + w.output * (outputTokens || 0) + w.reasoning * (reasoningTokens || 0) + w.cache_write * (cacheWriteTokens || 0); + const input = inputTokens || 0; + const cached = cacheReadTokens || 0; + const effectiveInput = Math.max(input - cached, 0); + + return w.input * effectiveInput + w.cached_input * cached + w.output * (outputTokens || 0) + w.reasoning * (reasoningTokens || 0) + w.cache_write * (cacheWriteTokens || 0); } /** @@ -217,8 +227,9 @@ function _resetCache() { * @returns {string} Suffix string, e.g. " · ● 12.5K" or "" */ function getEffectiveTokensSuffix() { - const raw = process.env.GH_AW_EFFECTIVE_TOKENS; - const parsed = raw ? parseInt(raw, 10) : NaN; + const raw = process.env.GH_AW_EFFECTIVE_TOKENS ?? ""; + const parsed = parseInt(raw, 10); + if (!isNaN(parsed) && parsed > 0) { return ` · ● ${formatET(parsed)}`; } @@ -263,7 +274,7 @@ function buildETComputationTable(effectiveTokens, tokenUsageMarkdown = null) { const lines = []; lines.push("
"); - lines.push(`ET computation details (formula: ${w.input}×input + ${w.cached_input}×cached + ${w.output}×output + ${w.reasoning}×reasoning + ${w.cache_write}×cache_write, then ×model multiplier)`); + lines.push(`ET computation details (formula: ${w.input}×max(input-cached,0) + ${w.cached_input}×cached + ${w.output}×output + ${w.reasoning}×reasoning + ${w.cache_write}×cache_write, then ×model multiplier)`); lines.push(""); if (tokenUsageMarkdown) { @@ -273,8 +284,11 @@ function buildETComputationTable(effectiveTokens, tokenUsageMarkdown = null) { } else { const usage = readAgentUsage(); if (usage) { - const inputWeighted = w.input * (usage.input_tokens || 0); - const cachedWeighted = w.cached_input * (usage.cache_read_tokens || 0); + const inputTokens = usage.input_tokens || 0; + const cachedInputTokens = usage.cache_read_tokens || 0; + const effectiveInputTokens = Math.max(inputTokens - cachedInputTokens, 0); + const inputWeighted = w.input * effectiveInputTokens; + const cachedWeighted = w.cached_input * cachedInputTokens; const outputWeighted = w.output * (usage.output_tokens || 0); const cacheWriteWeighted = w.cache_write * (usage.cache_write_tokens || 0); // Reasoning tokens are not tracked in agent_usage.json (they are captured per-model in @@ -284,8 +298,8 @@ function buildETComputationTable(effectiveTokens, tokenUsageMarkdown = null) { lines.push("| Token class | Count | Weight | Weighted tokens |"); lines.push("|-------------|------:|------:|---------------:|"); - lines.push(`| Input | ${(usage.input_tokens || 0).toLocaleString()} | ×${w.input} | ${Math.round(inputWeighted).toLocaleString()} |`); - lines.push(`| Cached input | ${(usage.cache_read_tokens || 0).toLocaleString()} | ×${w.cached_input} | ${Math.round(cachedWeighted).toLocaleString()} |`); + lines.push(`| Input (minus cached) | ${effectiveInputTokens.toLocaleString()} | ×${w.input} | ${Math.round(inputWeighted).toLocaleString()} |`); + lines.push(`| Cached input | ${cachedInputTokens.toLocaleString()} | ×${w.cached_input} | ${Math.round(cachedWeighted).toLocaleString()} |`); lines.push(`| Output | ${(usage.output_tokens || 0).toLocaleString()} | ×${w.output} | ${Math.round(outputWeighted).toLocaleString()} |`); lines.push(`| Cache write | ${(usage.cache_write_tokens || 0).toLocaleString()} | ×${w.cache_write} | ${Math.round(cacheWriteWeighted).toLocaleString()} |`); lines.push(`| **Base weighted** | | | **${Math.round(baseWeighted).toLocaleString()}** |`); diff --git a/setup/js/emit_outcome_spans.cjs b/setup/js/emit_outcome_spans.cjs index c857223e..928f3072 100644 --- a/setup/js/emit_outcome_spans.cjs +++ b/setup/js/emit_outcome_spans.cjs @@ -33,6 +33,7 @@ const { sendOTLPToAllEndpoints, appendToOTLPJSONL, readJSONIfExists, + buildCustomOTLPAttributes, } = require("./send_otlp_span.cjs"); const AW_INFO_PATH = "/tmp/gh-aw/aw_info.json"; @@ -148,6 +149,11 @@ async function main() { const changedFiles = typeof eval_.changed_files === "number" ? eval_.changed_files : null; const additions = typeof eval_.additions === "number" ? eval_.additions : null; const deletions = typeof eval_.deletions === "number" ? eval_.deletions : null; + const reactionsTotal = typeof eval_.reactions_total === "number" ? eval_.reactions_total : null; + const reactionsPositive = typeof eval_.reactions_positive === "number" ? eval_.reactions_positive : null; + const reactionsNegative = typeof eval_.reactions_negative === "number" ? eval_.reactions_negative : null; + const comments = typeof eval_.comments === "number" ? eval_.comments : null; + const zeroTouch = eval_.zero_touch === true; const attributes = [ buildAttr("gh-aw.exporter.name", "outcome-collector"), @@ -168,6 +174,11 @@ async function main() { if (changedFiles !== null) attributes.push(buildAttr("gh-aw.outcome.changed_files", changedFiles)); if (additions !== null) attributes.push(buildAttr("gh-aw.outcome.additions", additions)); if (deletions !== null) attributes.push(buildAttr("gh-aw.outcome.deletions", deletions)); + if (reactionsTotal !== null) attributes.push(buildAttr("gh-aw.outcome.reactions_total", reactionsTotal)); + if (reactionsPositive !== null) attributes.push(buildAttr("gh-aw.outcome.reactions_positive", reactionsPositive)); + if (reactionsNegative !== null) attributes.push(buildAttr("gh-aw.outcome.reactions_negative", reactionsNegative)); + if (comments !== null) attributes.push(buildAttr("gh-aw.outcome.comments", comments)); + if (zeroTouch) attributes.push(buildAttr("gh-aw.outcome.zero_touch", true)); // Map result to OTLP status: accepted=OK, rejected=ERROR, noop=UNSET, pending/ignored=UNSET const statusCode = result === "rejected" ? 2 : result === "accepted" ? 1 : 0; @@ -205,6 +216,8 @@ async function main() { buildAttr("gh-aw.outcome.acceptance_rate", getSummaryNumber("acceptance_rate", 0)), buildAttr("gh-aw.outcome.waste_rate", getSummaryNumber("waste_rate", 0)), buildAttr("gh-aw.outcome.noop_rate", getSummaryNumber("noop_rate", 0)), + buildAttr("gh-aw.outcome.zero_touch_count", getSummaryNumber("zero_touch", 0)), + buildAttr("gh-aw.outcome.zero_touch_rate", getSummaryNumber("zero_touch_rate", 0)), buildAttr("gh-aw.outcome.item_count", evaluations.length), ]; @@ -212,15 +225,20 @@ async function main() { summaryAttributes.push(buildAttr("gh-aw.outcome.date", summary.date)); } - // Median time-to-resolution for resolved items - const resolutionTimes = evaluations - .filter(e => typeof e.resolution_sec === "number" && e.resolution_sec > 0) - .map(e => e.resolution_sec) - .sort((a, b) => a - b); - if (resolutionTimes.length > 0) { - const mid = Math.floor(resolutionTimes.length / 2); - const median = resolutionTimes.length % 2 !== 0 ? resolutionTimes[mid] : Math.round((resolutionTimes[mid - 1] + resolutionTimes[mid]) / 2); - summaryAttributes.push(buildAttr("gh-aw.outcome.median_resolution_sec", median)); + // Median time-to-resolution: prefer summary value, fall back to local computation + const summaryMedian = summary && typeof summary.median_resolution_sec === "number" ? summary.median_resolution_sec : null; + if (summaryMedian !== null) { + summaryAttributes.push(buildAttr("gh-aw.outcome.median_resolution_sec", summaryMedian)); + } else { + const resolutionTimes = evaluations + .filter(e => typeof e.resolution_sec === "number" && e.resolution_sec > 0) + .map(e => e.resolution_sec) + .sort((a, b) => a - b); + if (resolutionTimes.length > 0) { + const mid = Math.floor(resolutionTimes.length / 2); + const median = resolutionTimes.length % 2 !== 0 ? resolutionTimes[mid] : Math.round((resolutionTimes[mid - 1] + resolutionTimes[mid]) / 2); + summaryAttributes.push(buildAttr("gh-aw.outcome.median_resolution_sec", median)); + } } // Trigger type distribution @@ -241,6 +259,9 @@ async function main() { summaryAttributes.push(buildAttr("gh-aw.outcome.types", types.join(","))); } + // Append user-defined custom attributes from observability.otlp.attributes. + summaryAttributes.push(...buildCustomOTLPAttributes()); + const summarySpan = buildOTLPSpan({ traceId, spanId: summarySpanId, diff --git a/setup/js/evaluate_outcomes.cjs b/setup/js/evaluate_outcomes.cjs index eef8a218..3fd30d87 100644 --- a/setup/js/evaluate_outcomes.cjs +++ b/setup/js/evaluate_outcomes.cjs @@ -163,6 +163,11 @@ function secondsBetween(from, to) { * @property {number | null} changed_files * @property {number | null} additions * @property {number | null} deletions + * @property {number | null} reactions_total + * @property {number | null} reactions_positive + * @property {number | null} reactions_negative + * @property {number | null} comments + * @property {boolean} zero_touch */ /** @@ -186,6 +191,11 @@ function evaluateItem(item, defaultRepo) { changed_files: null, additions: null, deletions: null, + reactions_total: null, + reactions_positive: null, + reactions_negative: null, + comments: null, + zero_touch: false, }; if (!url) { @@ -206,6 +216,18 @@ function evaluateItem(item, defaultRepo) { } out.result = "accepted"; out.detail = data.state; + out.comments = typeof data.comments === "number" ? data.comments : null; + + // Reactions on issues + if (data.reactions && typeof data.reactions === "object") { + const r = data.reactions; + const positive = (r["+1"] || 0) + (r.heart || 0) + (r.hooray || 0) + (r.rocket || 0); + const negative = (r["-1"] || 0) + (r.confused || 0); + out.reactions_total = r.total_count != null ? r.total_count : positive + negative + (r.laugh || 0) + (r.eyes || 0); + out.reactions_positive = positive; + out.reactions_negative = negative; + } + if (data.state === "closed" && data.created_at && data.closed_at) { out.resolution_sec = secondsBetween(data.created_at, data.closed_at); } @@ -228,6 +250,22 @@ function evaluateItem(item, defaultRepo) { out.changed_files = typeof data.changed_files === "number" ? data.changed_files : null; out.additions = typeof data.additions === "number" ? data.additions : null; out.deletions = typeof data.deletions === "number" ? data.deletions : null; + out.comments = typeof data.comments === "number" ? data.comments : null; + + // Reactions + if (data.reactions && typeof data.reactions === "object") { + const r = data.reactions; + const positive = (r["+1"] || 0) + (r.heart || 0) + (r.hooray || 0) + (r.rocket || 0); + const negative = (r["-1"] || 0) + (r.confused || 0); + out.reactions_total = r.total_count != null ? r.total_count : positive + negative + (r.laugh || 0) + (r.eyes || 0); + out.reactions_positive = positive; + out.reactions_negative = negative; + } + + // Zero-touch: merged with no human review comments and no issue-level comments + if (data.merged === true && out.review_comments === 0 && out.comments === 0) { + out.zero_touch = true; + } if (data.merged === true) { out.result = "accepted"; @@ -315,6 +353,9 @@ function main() { let pending = 0; let total = 0; let noop = 0; + let zeroTouchCount = 0; + /** @type {number[]} */ + const resolutionTimes = []; // Clear the evaluations file fs.writeFileSync(EVAL_JSONL, ""); @@ -393,6 +434,9 @@ function main() { switch (evalResult.result) { case "accepted": accepted++; + if (evalResult.zero_touch === true) { + zeroTouchCount++; + } break; case "rejected": rejected++; @@ -401,6 +445,9 @@ function main() { pending++; break; } + if (typeof evalResult.resolution_sec === "number" && evalResult.resolution_sec > 0) { + resolutionTimes.push(evalResult.resolution_sec); + } fs.appendFileSync( EVAL_JSONL, @@ -420,6 +467,11 @@ function main() { changed_files: evalResult.changed_files, additions: evalResult.additions, deletions: evalResult.deletions, + reactions_total: evalResult.reactions_total, + reactions_positive: evalResult.reactions_positive, + reactions_negative: evalResult.reactions_negative, + comments: evalResult.comments, + zero_touch: evalResult.zero_touch || false, }) + "\n" ); } @@ -442,6 +494,15 @@ function main() { const wasteRate = total > 0 ? rejected / total : 0; const noopRate = total + noop > 0 ? noop / (total + noop) : 0; + // Economics: zero-touch rate and median time-to-outcome + const zeroTouchRate = accepted > 0 ? zeroTouchCount / accepted : 0; + resolutionTimes.sort((a, b) => a - b); + let medianResolutionSec = null; + if (resolutionTimes.length > 0) { + const mid = Math.floor(resolutionTimes.length / 2); + medianResolutionSec = resolutionTimes.length % 2 !== 0 ? resolutionTimes[mid] : Math.round((resolutionTimes[mid - 1] + resolutionTimes[mid]) / 2); + } + writeJSONAtomic(SUMMARY_PATH, { runs_checked: checked, total_outcomes: total, @@ -453,6 +514,9 @@ function main() { acceptance_rate: Math.round(acceptanceRate * 10000) / 10000, waste_rate: Math.round(wasteRate * 10000) / 10000, noop_rate: Math.round(noopRate * 10000) / 10000, + zero_touch: zeroTouchCount, + zero_touch_rate: Math.round(zeroTouchRate * 10000) / 10000, + median_resolution_sec: medianResolutionSec, date: new Date().toISOString().slice(0, 10), }); diff --git a/setup/js/generate_safe_outputs_tools.cjs b/setup/js/generate_safe_outputs_tools.cjs index 4508bbec..f88960ae 100644 --- a/setup/js/generate_safe_outputs_tools.cjs +++ b/setup/js/generate_safe_outputs_tools.cjs @@ -64,7 +64,7 @@ async function main() { const config = JSON.parse(fs.readFileSync(configPath, "utf8")); // Load tools meta (description suffixes, repo params, dynamic tools) - /** @type {{description_suffixes?: Record, repo_params?: Record, dynamic_tools?: Array}} */ + /** @type {{description_suffixes?: Record, repo_params?: Record, dynamic_tools?: Array, required_field_removals?: Record}} */ let toolsMeta = { description_suffixes: {}, repo_params: {}, dynamic_tools: [] }; if (fs.existsSync(toolsMetaPath)) { toolsMeta = JSON.parse(fs.readFileSync(toolsMetaPath, "utf8")); @@ -106,9 +106,7 @@ async function main() { // Remove fields from inputSchema.required when configured (e.g. allow-body: false) const requiredRemovals = toolsMeta.required_field_removals?.[tool.name]; if (requiredRemovals && Array.isArray(enhancedTool.inputSchema?.required)) { - enhancedTool.inputSchema.required = enhancedTool.inputSchema.required.filter( - /** @param {string} f */ f => !requiredRemovals.includes(f) - ); + enhancedTool.inputSchema.required = enhancedTool.inputSchema.required.filter(/** @param {string} f */ f => !requiredRemovals.includes(f)); if (enhancedTool.inputSchema.required.length === 0) { delete enhancedTool.inputSchema.required; } diff --git a/setup/js/model_multipliers.json b/setup/js/model_multipliers.json index 934da8d0..fb442e29 100644 --- a/setup/js/model_multipliers.json +++ b/setup/js/model_multipliers.json @@ -42,8 +42,8 @@ "gpt-4.1": 1.0, "gpt-4.1-2025-04-14": 1.0, "gpt-41-copilot": 1.0, - "gpt-4.1-mini": 0.1, - "gpt-4.1-nano": 0.05, + "gpt-4.1-mini": 1.0, + "gpt-4.1-nano": 1.0, "gpt-4-turbo": 1.0, "gpt-4": 1.0, "gpt-5": 1.0, @@ -62,15 +62,15 @@ "gpt-5.1-chat-latest": 3.0, "gpt-5-codex": 1.0, "gpt-5.1-codex": 3.0, - "gpt-5.1-codex-mini": 0.33, + "gpt-5.1-codex-mini": 3.0, "gpt-5.1-codex-max": 3.0, "gpt-5.1-codex-max-customsummarizer": 3.0, "gpt-5.2": 3.0, "gpt-5.2-2025-12-11": 3.0, "gpt-5.2-chat-latest": 3.0, "gpt-5.2-codex": 3.0, - "gpt-5.2-pro": 2.0, - "gpt-5.2-pro-2025-12-11": 2.0, + "gpt-5.2-pro": 3.0, + "gpt-5.2-pro-2025-12-11": 3.0, "gpt-5.3-chat-latest": 3.0, "gpt-5.3-codex": 6.0, "gpt-5.3-codex-api-preview": 6.0, @@ -78,10 +78,10 @@ "gpt-5.4-2026-03-05": 6.0, "gpt-5.4-mini": 6.0, "gpt-5.4-mini-2026-03-17": 6.0, - "gpt-5.4-nano": 0.05, - "gpt-5.4-nano-2026-03-17": 0.05, - "gpt-5.4-pro": 2.0, - "gpt-5.4-pro-2026-03-05": 2.0, + "gpt-5.4-nano": 6.0, + "gpt-5.4-nano-2026-03-17": 6.0, + "gpt-5.4-pro": 6.0, + "gpt-5.4-pro-2026-03-05": 6.0, "gpt-5.5": 7.5, "gpt-5.5-2026-04-23": 7.5, "gpt-5.5-pro": 2.0, @@ -143,5 +143,35 @@ "gemma-4-31b-it": 0.2, "grok-code-fast-1": 0.33, "raptor-mini": 0.33 + }, + "deprecated_models": { + "claude-3-5-haiku": true, + "claude-3-5-opus": true, + "claude-3-5-sonnet": true, + "claude-3-7-sonnet": true, + "claude-3-haiku": true, + "claude-3-opus": true, + "claude-3-sonnet": true, + "claude-haiku-4-5": true, + "claude-haiku-4.5": true, + "claude-opus-4": true, + "claude-opus-4-1": true, + "claude-opus-4-5": true, + "claude-opus-4.5": true, + "claude-opus-4.6": true, + "claude-sonnet-4": true, + "claude-sonnet-4-5": true, + "claude-sonnet-4.5": true, + "claude-sonnet-4.6": true, + "gemini-1.5-flash": true, + "gemini-1.5-pro": true, + "gemini-2.5-flash-native-audio-preview-12-2025": true, + "gpt-4": true, + "gpt-4-turbo": true, + "gpt-5": true, + "gpt-5-chat-latest": true, + "gpt-5-mini": true, + "gpt-5-nano": true, + "gpt-5-pro": true } } diff --git a/setup/js/pi_provider.cjs b/setup/js/pi_provider.cjs index 4238479a..6a449cc8 100644 --- a/setup/js/pi_provider.cjs +++ b/setup/js/pi_provider.cjs @@ -194,25 +194,31 @@ function piProviderExtension(pi) { // Fetch AWF API proxy reflection data before the agent runs to capture initial proxy state. // This is best-effort: failures are logged but do not affect the agent session. - await fetchAWFReflect({ - reflectUrl: AWF_API_PROXY_REFLECT_URL, - outputPath: AWF_REFLECT_OUTPUT_PATH, - timeoutMs: AWF_REFLECT_TIMEOUT_MS, - modelsTimeoutMs: AWF_MODELS_URL_TIMEOUT_MS, - logger: log, - }); + // Skip when AWF_REFLECT_ENABLED is not "1" (e.g. sandbox.agent: false — no api-proxy running). + if (process.env.AWF_REFLECT_ENABLED === "1") { + await fetchAWFReflect({ + reflectUrl: AWF_API_PROXY_REFLECT_URL, + outputPath: AWF_REFLECT_OUTPUT_PATH, + timeoutMs: AWF_REFLECT_TIMEOUT_MS, + modelsTimeoutMs: AWF_MODELS_URL_TIMEOUT_MS, + logger: log, + }); + } }); pi.on("agent_end", async () => { // Fetch AWF API proxy reflection data after the agent finishes for the post-run step summary. // This is best-effort: failures are logged but do not affect the agent exit code. - await fetchAWFReflect({ - reflectUrl: AWF_API_PROXY_REFLECT_URL, - outputPath: AWF_REFLECT_OUTPUT_PATH, - timeoutMs: AWF_REFLECT_TIMEOUT_MS, - modelsTimeoutMs: AWF_MODELS_URL_TIMEOUT_MS, - logger: log, - }); + // Skip when AWF_REFLECT_ENABLED is not "1" (e.g. sandbox.agent: false — no api-proxy running). + if (process.env.AWF_REFLECT_ENABLED === "1") { + await fetchAWFReflect({ + reflectUrl: AWF_API_PROXY_REFLECT_URL, + outputPath: AWF_REFLECT_OUTPUT_PATH, + timeoutMs: AWF_REFLECT_TIMEOUT_MS, + modelsTimeoutMs: AWF_MODELS_URL_TIMEOUT_MS, + logger: log, + }); + } }); } diff --git a/setup/js/safe_output_handler_manager.cjs b/setup/js/safe_output_handler_manager.cjs index 484606b5..50a30223 100644 --- a/setup/js/safe_output_handler_manager.cjs +++ b/setup/js/safe_output_handler_manager.cjs @@ -1032,6 +1032,8 @@ function getContentToCheck(messageType, message, result) { return message.body || ""; case "comment_memory": return result?.managedBody || message.body || ""; + case "create_pull_request": + return result?.managedBody || message.body || ""; default: return null; } @@ -1061,6 +1063,30 @@ async function updateIssueBody(github, context, repo, issueNumber, updatedBody, core.info(`✓ Updated issue ${repo}#${issueNumber}`); } +/** + * Update the body of a pull request with resolved temporary IDs + * @param {any} github - GitHub API client + * @param {any} context - GitHub Actions context + * @param {string} repo - Repository in "owner/repo" format + * @param {number} prNumber - Pull request number to update + * @param {string} updatedBody - Updated body content with resolved temp IDs + * @returns {Promise} + */ +async function updatePullRequestBody(github, context, repo, prNumber, updatedBody, allowedMentionAliases = []) { + const [owner, repoName] = repo.split("/"); + + core.info(`Updating pull request ${repo}#${prNumber} body with resolved temporary IDs`); + + await github.rest.pulls.update({ + owner, + repo: repoName, + pull_number: prNumber, + body: sanitizeContent(updatedBody, { allowedAliases: allowedMentionAliases }), + }); + + core.info(`✓ Updated pull request ${repo}#${prNumber}`); +} + /** * Update the body of a discussion with resolved temporary IDs * @param {any} github - GitHub API client @@ -1228,6 +1254,10 @@ async function processSyntheticUpdates(github, context, trackedOutputs, temporar core.debug(`Skipping synthetic update for comment_memory - comment ID not tracked`); } break; + case "create_pull_request": + await updatePullRequestBody(github, context, tracked.result.repo, tracked.result.number, updatedContent, allowedMentionAliases); + updateCount++; + break; default: core.debug(`Unknown output type: ${tracked.type}`); } diff --git a/setup/js/safe_outputs_action_outputs.cjs b/setup/js/safe_outputs_action_outputs.cjs index 561bc7cd..5d345b0a 100644 --- a/setup/js/safe_outputs_action_outputs.cjs +++ b/setup/js/safe_outputs_action_outputs.cjs @@ -55,13 +55,13 @@ function emitSafeOutputActionOutputs(processingResult) { const firstPRResult = successfulResults.find(r => r.type === "create_pull_request"); if (firstPRResult?.result && !Array.isArray(firstPRResult.result)) { const r = firstPRResult.result; - if (r.pull_request_number != null) { - core.setOutput("created_pr_number", String(r.pull_request_number)); - core.info(`Exported created_pr_number: ${r.pull_request_number}`); + if (r.number != null) { + core.setOutput("created_pr_number", String(r.number)); + core.info(`Exported created_pr_number: ${r.number}`); } - if (r.pull_request_url) { - core.setOutput("created_pr_url", r.pull_request_url); - core.info(`Exported created_pr_url: ${r.pull_request_url}`); + if (r.url) { + core.setOutput("created_pr_url", r.url); + core.info(`Exported created_pr_url: ${r.url}`); } } diff --git a/setup/js/send_otlp_span.cjs b/setup/js/send_otlp_span.cjs index 21ece71b..e449a3c3 100644 --- a/setup/js/send_otlp_span.cjs +++ b/setup/js/send_otlp_span.cjs @@ -567,6 +567,55 @@ function buildExperimentAttributes(assignments) { return attrs; } +// --------------------------------------------------------------------------- +// Custom OTLP attributes (GH_AW_OTLP_ATTRIBUTES) +// --------------------------------------------------------------------------- + +/** + * Parse the GH_AW_OTLP_ATTRIBUTES environment variable into a plain object. + * The variable is a JSON-encoded `Record` injected by the + * gh-aw compiler from the `observability.otlp.attributes` frontmatter field. + * Returns null when the variable is absent, empty, or not valid JSON. + * + * @returns {Record | null} + */ +function parseOTLPCustomAttributes() { + const raw = process.env.GH_AW_OTLP_ATTRIBUTES; + if (!raw) return null; + try { + const parsed = JSON.parse(raw); + if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) return null; + return /** @type {Record} */ (parsed); + } catch { + return null; + } +} + +/** + * Build additional OTLP attribute objects from the GH_AW_OTLP_ATTRIBUTES + * environment variable. + * + * Attribute values are used as-is (use GitHub Actions expressions like + * `${{ vars.MY_VALUE }}` in workflow frontmatter for dynamic values). + * Attributes whose value is an empty string are omitted. When no custom + * attributes are configured, an empty array is returned. + * + * @returns {Array<{key: string, value: object}>} + */ +function buildCustomOTLPAttributes() { + const customDefs = parseOTLPCustomAttributes(); + if (!customDefs) return []; + + const result = []; + for (const [key, value] of Object.entries(customDefs)) { + if (typeof key !== "string" || !key || typeof value !== "string") continue; + if (value !== "") { + result.push(buildAttr(key, value)); + } + } + return result; +} + // --------------------------------------------------------------------------- // HTTP transport // --------------------------------------------------------------------------- @@ -1143,6 +1192,8 @@ async function sendJobSetupSpan(options = {}) { const experimentAssignments = readExperimentAssignments(); attributes.push(...buildExperimentAttributes(experimentAssignments)); attributes.push(...buildEpisodeAttributesFromContext(awInfo, runId, runAttempt)); + // Append user-defined custom attributes from observability.otlp.attributes. + attributes.push(...buildCustomOTLPAttributes()); const resourceAttributes = buildGitHubActionsResourceAttributes({ repository, @@ -1852,6 +1903,9 @@ async function sendJobConclusionSpan(spanName, options = {}) { const conclusionExperimentAssignments = readExperimentAssignments(); attributes.push(...buildExperimentAttributes(conclusionExperimentAssignments)); + // Append user-defined custom attributes from observability.otlp.attributes. + attributes.push(...buildCustomOTLPAttributes()); + // Enrich conclusion span with outcome evaluation fleet metrics when available. // Written by the outcome-collector workflow's pre-agent step. const outcomeSummary = readJSONIfExists("/tmp/gh-aw/outcome-summary.json"); @@ -2023,7 +2077,12 @@ async function sendJobConclusionSpan(spanName, options = {}) { } } - if (!hasDedicatedAgentSpan) { + // Only attach token-usage attributes to the agent job's conclusion span as a + // fallback (when no dedicated agent sub-span was emitted). Non-agent jobs + // (conclusion, detection, safe_outputs) also have agent_usage.json on disk + // (downloaded via the agent artifact) but must NOT emit token data — otherwise + // every sum(gen_ai.usage.*) query is inflated by the number of downstream jobs. + if (!hasDedicatedAgentSpan && jobName === "agent") { attributes.push(...usageAttrs); } @@ -2089,4 +2148,6 @@ module.exports = { OTEL_JSONL_PATH, appendToOTLPJSONL, buildExperimentAttributes, + parseOTLPCustomAttributes, + buildCustomOTLPAttributes, }; diff --git a/setup/sh/mask_otlp_attributes.sh b/setup/sh/mask_otlp_attributes.sh new file mode 100755 index 00000000..8b83cfc4 --- /dev/null +++ b/setup/sh/mask_otlp_attributes.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +set +o histexpand + +# +# mask_otlp_attributes.sh - Mask GH_AW_OTLP_ATTRIBUTES values from GitHub Actions logs +# +# Issues the ::add-mask:: workflow command for every value found in the +# GH_AW_OTLP_ATTRIBUTES JSON object so that user-supplied custom OTLP span +# attribute values (e.g. session IDs, user IDs) do not leak into GitHub +# Actions runner logs (including debug/step-debug logs). +# +# GH_AW_OTLP_ATTRIBUTES is a JSON-encoded Record injected by +# the gh-aw compiler from the `observability.otlp.attributes` frontmatter field. +# Each value is masked individually; empty values are skipped. +# +# Requires node to be available on PATH (it is always present on GitHub Actions +# runners when the gh-aw setup step has run). +# +# Exit codes: +# 0 - Success (variable may be absent or empty, which is a no-op) + +set -euo pipefail + +_attrs="${GH_AW_OTLP_ATTRIBUTES:-}" +[ -z "$_attrs" ] && exit 0 + +# Use node to extract the string values from the JSON object and print one +# per line (null/empty values are omitted). +_GH_AW_NODE=$(which node 2>/dev/null || command -v node 2>/dev/null || echo node) + +# Read the values into an array, then issue ::add-mask:: for each non-empty one. +mapfile -t _values < <( + printf '%s' "$_attrs" | "$_GH_AW_NODE" -e ' + let raw = ""; + process.stdin.on("data", d => { raw += d; }); + process.stdin.on("end", () => { + try { + const obj = JSON.parse(raw); + if (obj !== null && typeof obj === "object" && !Array.isArray(obj)) { + for (const v of Object.values(obj)) { + if (typeof v === "string" && v.length > 0) { + process.stdout.write(v + "\n"); + } + } + } + } catch { /* invalid JSON – no-op */ } + }); + ' +) + +for _val in "${_values[@]}"; do + [ -n "$_val" ] && echo '::add-mask::'"$_val" +done