From 9627c3341a798c9dfb8fc3862912cf5a23afae84 Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 22 Jun 2026 00:09:19 +0900 Subject: [PATCH 01/15] fix(review): tighten 3 review-quality detectors to stop mis-flagging correct reviews (#235) Root 2 / Way 1 (detector precision). The review-quality gate classified genuinely-correct external reviews as failures via three over-broad detectors (Arm B repro nulled 5/7 known-correct reviews). Narrow each so correct reviews stop being flagged while genuinely-bad reviews STAY flagged (the buildReviewSlotDisposition demotion gate must keep preventing false approvals). Each change only narrows its flag and fails toward flagging when in doubt. - permission_blocked: suppress only when a line praises the reviewed code's permission-error handling AND carries no review-process read-denial signal. A reviewer 'I cannot read the file' still flags. - not_reviewed: a could-not-inspect gap attributed to a foreign file is out-of-scope only when the review independently proves it inspected the selected source. Generic/selected-path denials, and gaps without proven inspection, stay flagged. - shallow_output: a short review is substantive only when one clause co-locates a concrete defect cue with a code locus and no negation. A bare 'Verdict: APPROVE' stays shallow. Adds a 20-case adversarial safety corpus driving the real buildReviewAuditManifest entry point, asserting both directions (correct -> not flagged; genuinely-bad -> still flagged). All 9 plugin copies kept byte-identical via sync-review-prompt. Way 2 (advisory disposition state machine) deferred to #236. Closes #235 Co-Authored-By: Claude Opus 4.8 --- .../scripts/lib/review-prompt.mjs | 181 ++++++++- plugins/claude/scripts/lib/review-prompt.mjs | 181 ++++++++- plugins/gemini/scripts/lib/review-prompt.mjs | 181 ++++++++- plugins/grok/scripts/lib/review-prompt.mjs | 181 ++++++++- plugins/kimi/scripts/lib/review-prompt.mjs | 181 ++++++++- .../scripts/lib/review-prompt.mjs | 181 ++++++++- .../relay-grok/scripts/lib/review-prompt.mjs | 181 ++++++++- .../relay-kimi/scripts/lib/review-prompt.mjs | 181 ++++++++- scripts/lib/review-prompt.mjs | 181 ++++++++- tests/unit/review-prompt.test.mjs | 360 ++++++++++++++++++ 10 files changed, 1935 insertions(+), 54 deletions(-) diff --git a/plugins/api-reviewers/scripts/lib/review-prompt.mjs b/plugins/api-reviewers/scripts/lib/review-prompt.mjs index 5ed505d9..1ea98b42 100644 --- a/plugins/api-reviewers/scripts/lib/review-prompt.mjs +++ b/plugins/api-reviewers/scripts/lib/review-prompt.mjs @@ -402,6 +402,7 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, + isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -698,6 +699,114 @@ function isCodePermissionConcernLine(lower) { ]); } +// A reviewer describing reviewed code that CORRECTLY HANDLES a permission +// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting +// that the review process was itself blocked. Suppress only when the line both +// (a) praises the code's handling of the error, and (b) carries no signal that +// the REVIEW PROCESS could not read/inspect/access the selected source. When in +// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. +function isPermissionHandlingPraiseLine(lower) { + if (!includesPermissionFailureLiteral(lower)) return false; + if (!codeCorrectlyHandlesPermissionError(lower)) return false; + return !reviewProcessBlockedSignal(lower); +} + +function codeCorrectlyHandlesPermissionError(lower) { + return includesAny(lower, [ + "correctly handles", + "correctly handle", + "handles eacces", + "handle eacces", + "handles eperm", + "handle eperm", + "handles the permission", + "handle the permission", + "gracefully handles", + "gracefully handle", + "handled correctly", + "handled gracefully", + "catches the error", + "catches the permission", + "catches eacces", + "catches eperm", + "catch eacces", + "catch eperm", + "is caught", + "are caught", + "falls back", + "fall back", + "graceful fallback", + "degrades gracefully", + "surfaces a typed error", + "surfaces an error", + "returns a clear", + "returns a typed", + "is the right behavior", + "is the correct behavior", + "the right behavior", + "correct behavior", + ]); +} + +// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was +// unable to read/inspect/access the source. Mirrors the genuine-block surface +// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase +// so a real read-denial is never masked by incidental handling-praise wording. +function reviewProcessBlockedSignal(lower) { + return includesAny(lower, [ + "no inspection was possible", + "could not be inspected", + "were not inspected", + "was not inspected", + "not inspected", + "could not inspect", + "cannot inspect", + "can't inspect", + "unable to inspect", + "did not inspect", + "could not access", + "cannot access", + "can't access", + "unable to access", + "prevented me", + "prevented file access", + "prevented access", + "permission block prevented", + "permission blocks prevented", + "while reading", + "while inspecting", + "i could not", + "i was unable", + "i was blocked", + "review was blocked", + "blocked from reading", + "blocked from inspecting", + "i could not read", + "i cannot read", + "i can't read", + "i was unable to read", + "could not read the source", + "cannot read the source", + "could not read the file", + "cannot read the file", + "could not read the selected", + "cannot read the selected", + "could not read it", + "cannot read it", + "selected source", + "selected file", + "selected files", + "supplied source", + "supplied diff", + "supplied file", + "supplied files", + "source file", + "source files", + "target file", + "target files", + ]); +} + function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) @@ -1010,7 +1119,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1054,9 +1163,9 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower) { +function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - return includesAny(lower, [ + if (includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1070,7 +1179,35 @@ function isOutOfScopeInspectionGapLine(lower) { "not part of this packet", "not supplied", "not included in the prompt", - ]); + ])) return true; + // Foreign-path branch: a could-not-inspect gap attributed to a concrete file + // that is NOT the selected source is an out-of-scope gap, not a denial that the + // selected source itself was reviewed. Suppress ONLY when the line names such a + // foreign path, does not generically deny the selected source, does not name the + // selected path, AND the review independently proves the selected source WAS + // inspected. When in doubt, fall through to false so the line stays flaggable. + if (mentionsSelectedSourceGeneric(lower)) return false; + if (mentionsSelectedSourcePath(lower, selectedSource)) return false; + if (!selectedSourceInspected) return false; + return namesNonSelectedFileGapLine(lower, selectedSource); +} + +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +function namesNonSelectedFileGapLine(lower, selectedSource) { + const selectedPaths = (selectedSource?.files ?? []) + .map((file) => String(file?.path ?? "").toLowerCase()) + .filter(Boolean); + NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; + let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + while (match !== null) { + const path = match[1]; + const isSelected = selectedPaths.some((selected) => ( + selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) + )); + if (!isSelected) return true; + match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + } + return false; } function isPriorReviewCommentsGapLine(lower) { @@ -1149,10 +1286,15 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); + const selectedSourceInspected = mentionsSelectedSourceInspection( + normalizeReviewSearchText(text).toLowerCase(), + selectedSource, + ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); + return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + && !isPriorReviewCommentsGapLine(lower); }) .join("\n") .toLowerCase(); @@ -1262,6 +1404,31 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; +// A short review is substantive (not shallow) when SOME single clause names a +// concrete code locus AND describes a specific defect/change at it, and that +// clause is not a negation/absence/praise assertion. Requires co-location so a +// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something +// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing +// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only +// asserts correctness stays flagged (conservative — fail toward flagging). +const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +const CONCRETE_FINDING_CODE_LOCUS = [ + /(? { + if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + }); +} + function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1302,9 +1469,11 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview; + && !conciseTinyReview + && !conciseConcreteReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/plugins/claude/scripts/lib/review-prompt.mjs b/plugins/claude/scripts/lib/review-prompt.mjs index 5ed505d9..1ea98b42 100644 --- a/plugins/claude/scripts/lib/review-prompt.mjs +++ b/plugins/claude/scripts/lib/review-prompt.mjs @@ -402,6 +402,7 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, + isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -698,6 +699,114 @@ function isCodePermissionConcernLine(lower) { ]); } +// A reviewer describing reviewed code that CORRECTLY HANDLES a permission +// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting +// that the review process was itself blocked. Suppress only when the line both +// (a) praises the code's handling of the error, and (b) carries no signal that +// the REVIEW PROCESS could not read/inspect/access the selected source. When in +// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. +function isPermissionHandlingPraiseLine(lower) { + if (!includesPermissionFailureLiteral(lower)) return false; + if (!codeCorrectlyHandlesPermissionError(lower)) return false; + return !reviewProcessBlockedSignal(lower); +} + +function codeCorrectlyHandlesPermissionError(lower) { + return includesAny(lower, [ + "correctly handles", + "correctly handle", + "handles eacces", + "handle eacces", + "handles eperm", + "handle eperm", + "handles the permission", + "handle the permission", + "gracefully handles", + "gracefully handle", + "handled correctly", + "handled gracefully", + "catches the error", + "catches the permission", + "catches eacces", + "catches eperm", + "catch eacces", + "catch eperm", + "is caught", + "are caught", + "falls back", + "fall back", + "graceful fallback", + "degrades gracefully", + "surfaces a typed error", + "surfaces an error", + "returns a clear", + "returns a typed", + "is the right behavior", + "is the correct behavior", + "the right behavior", + "correct behavior", + ]); +} + +// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was +// unable to read/inspect/access the source. Mirrors the genuine-block surface +// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase +// so a real read-denial is never masked by incidental handling-praise wording. +function reviewProcessBlockedSignal(lower) { + return includesAny(lower, [ + "no inspection was possible", + "could not be inspected", + "were not inspected", + "was not inspected", + "not inspected", + "could not inspect", + "cannot inspect", + "can't inspect", + "unable to inspect", + "did not inspect", + "could not access", + "cannot access", + "can't access", + "unable to access", + "prevented me", + "prevented file access", + "prevented access", + "permission block prevented", + "permission blocks prevented", + "while reading", + "while inspecting", + "i could not", + "i was unable", + "i was blocked", + "review was blocked", + "blocked from reading", + "blocked from inspecting", + "i could not read", + "i cannot read", + "i can't read", + "i was unable to read", + "could not read the source", + "cannot read the source", + "could not read the file", + "cannot read the file", + "could not read the selected", + "cannot read the selected", + "could not read it", + "cannot read it", + "selected source", + "selected file", + "selected files", + "supplied source", + "supplied diff", + "supplied file", + "supplied files", + "source file", + "source files", + "target file", + "target files", + ]); +} + function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) @@ -1010,7 +1119,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1054,9 +1163,9 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower) { +function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - return includesAny(lower, [ + if (includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1070,7 +1179,35 @@ function isOutOfScopeInspectionGapLine(lower) { "not part of this packet", "not supplied", "not included in the prompt", - ]); + ])) return true; + // Foreign-path branch: a could-not-inspect gap attributed to a concrete file + // that is NOT the selected source is an out-of-scope gap, not a denial that the + // selected source itself was reviewed. Suppress ONLY when the line names such a + // foreign path, does not generically deny the selected source, does not name the + // selected path, AND the review independently proves the selected source WAS + // inspected. When in doubt, fall through to false so the line stays flaggable. + if (mentionsSelectedSourceGeneric(lower)) return false; + if (mentionsSelectedSourcePath(lower, selectedSource)) return false; + if (!selectedSourceInspected) return false; + return namesNonSelectedFileGapLine(lower, selectedSource); +} + +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +function namesNonSelectedFileGapLine(lower, selectedSource) { + const selectedPaths = (selectedSource?.files ?? []) + .map((file) => String(file?.path ?? "").toLowerCase()) + .filter(Boolean); + NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; + let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + while (match !== null) { + const path = match[1]; + const isSelected = selectedPaths.some((selected) => ( + selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) + )); + if (!isSelected) return true; + match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + } + return false; } function isPriorReviewCommentsGapLine(lower) { @@ -1149,10 +1286,15 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); + const selectedSourceInspected = mentionsSelectedSourceInspection( + normalizeReviewSearchText(text).toLowerCase(), + selectedSource, + ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); + return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + && !isPriorReviewCommentsGapLine(lower); }) .join("\n") .toLowerCase(); @@ -1262,6 +1404,31 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; +// A short review is substantive (not shallow) when SOME single clause names a +// concrete code locus AND describes a specific defect/change at it, and that +// clause is not a negation/absence/praise assertion. Requires co-location so a +// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something +// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing +// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only +// asserts correctness stays flagged (conservative — fail toward flagging). +const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +const CONCRETE_FINDING_CODE_LOCUS = [ + /(? { + if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + }); +} + function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1302,9 +1469,11 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview; + && !conciseTinyReview + && !conciseConcreteReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/plugins/gemini/scripts/lib/review-prompt.mjs b/plugins/gemini/scripts/lib/review-prompt.mjs index 5ed505d9..1ea98b42 100644 --- a/plugins/gemini/scripts/lib/review-prompt.mjs +++ b/plugins/gemini/scripts/lib/review-prompt.mjs @@ -402,6 +402,7 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, + isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -698,6 +699,114 @@ function isCodePermissionConcernLine(lower) { ]); } +// A reviewer describing reviewed code that CORRECTLY HANDLES a permission +// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting +// that the review process was itself blocked. Suppress only when the line both +// (a) praises the code's handling of the error, and (b) carries no signal that +// the REVIEW PROCESS could not read/inspect/access the selected source. When in +// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. +function isPermissionHandlingPraiseLine(lower) { + if (!includesPermissionFailureLiteral(lower)) return false; + if (!codeCorrectlyHandlesPermissionError(lower)) return false; + return !reviewProcessBlockedSignal(lower); +} + +function codeCorrectlyHandlesPermissionError(lower) { + return includesAny(lower, [ + "correctly handles", + "correctly handle", + "handles eacces", + "handle eacces", + "handles eperm", + "handle eperm", + "handles the permission", + "handle the permission", + "gracefully handles", + "gracefully handle", + "handled correctly", + "handled gracefully", + "catches the error", + "catches the permission", + "catches eacces", + "catches eperm", + "catch eacces", + "catch eperm", + "is caught", + "are caught", + "falls back", + "fall back", + "graceful fallback", + "degrades gracefully", + "surfaces a typed error", + "surfaces an error", + "returns a clear", + "returns a typed", + "is the right behavior", + "is the correct behavior", + "the right behavior", + "correct behavior", + ]); +} + +// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was +// unable to read/inspect/access the source. Mirrors the genuine-block surface +// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase +// so a real read-denial is never masked by incidental handling-praise wording. +function reviewProcessBlockedSignal(lower) { + return includesAny(lower, [ + "no inspection was possible", + "could not be inspected", + "were not inspected", + "was not inspected", + "not inspected", + "could not inspect", + "cannot inspect", + "can't inspect", + "unable to inspect", + "did not inspect", + "could not access", + "cannot access", + "can't access", + "unable to access", + "prevented me", + "prevented file access", + "prevented access", + "permission block prevented", + "permission blocks prevented", + "while reading", + "while inspecting", + "i could not", + "i was unable", + "i was blocked", + "review was blocked", + "blocked from reading", + "blocked from inspecting", + "i could not read", + "i cannot read", + "i can't read", + "i was unable to read", + "could not read the source", + "cannot read the source", + "could not read the file", + "cannot read the file", + "could not read the selected", + "cannot read the selected", + "could not read it", + "cannot read it", + "selected source", + "selected file", + "selected files", + "supplied source", + "supplied diff", + "supplied file", + "supplied files", + "source file", + "source files", + "target file", + "target files", + ]); +} + function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) @@ -1010,7 +1119,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1054,9 +1163,9 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower) { +function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - return includesAny(lower, [ + if (includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1070,7 +1179,35 @@ function isOutOfScopeInspectionGapLine(lower) { "not part of this packet", "not supplied", "not included in the prompt", - ]); + ])) return true; + // Foreign-path branch: a could-not-inspect gap attributed to a concrete file + // that is NOT the selected source is an out-of-scope gap, not a denial that the + // selected source itself was reviewed. Suppress ONLY when the line names such a + // foreign path, does not generically deny the selected source, does not name the + // selected path, AND the review independently proves the selected source WAS + // inspected. When in doubt, fall through to false so the line stays flaggable. + if (mentionsSelectedSourceGeneric(lower)) return false; + if (mentionsSelectedSourcePath(lower, selectedSource)) return false; + if (!selectedSourceInspected) return false; + return namesNonSelectedFileGapLine(lower, selectedSource); +} + +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +function namesNonSelectedFileGapLine(lower, selectedSource) { + const selectedPaths = (selectedSource?.files ?? []) + .map((file) => String(file?.path ?? "").toLowerCase()) + .filter(Boolean); + NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; + let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + while (match !== null) { + const path = match[1]; + const isSelected = selectedPaths.some((selected) => ( + selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) + )); + if (!isSelected) return true; + match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + } + return false; } function isPriorReviewCommentsGapLine(lower) { @@ -1149,10 +1286,15 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); + const selectedSourceInspected = mentionsSelectedSourceInspection( + normalizeReviewSearchText(text).toLowerCase(), + selectedSource, + ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); + return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + && !isPriorReviewCommentsGapLine(lower); }) .join("\n") .toLowerCase(); @@ -1262,6 +1404,31 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; +// A short review is substantive (not shallow) when SOME single clause names a +// concrete code locus AND describes a specific defect/change at it, and that +// clause is not a negation/absence/praise assertion. Requires co-location so a +// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something +// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing +// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only +// asserts correctness stays flagged (conservative — fail toward flagging). +const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +const CONCRETE_FINDING_CODE_LOCUS = [ + /(? { + if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + }); +} + function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1302,9 +1469,11 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview; + && !conciseTinyReview + && !conciseConcreteReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/plugins/grok/scripts/lib/review-prompt.mjs b/plugins/grok/scripts/lib/review-prompt.mjs index 5ed505d9..1ea98b42 100644 --- a/plugins/grok/scripts/lib/review-prompt.mjs +++ b/plugins/grok/scripts/lib/review-prompt.mjs @@ -402,6 +402,7 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, + isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -698,6 +699,114 @@ function isCodePermissionConcernLine(lower) { ]); } +// A reviewer describing reviewed code that CORRECTLY HANDLES a permission +// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting +// that the review process was itself blocked. Suppress only when the line both +// (a) praises the code's handling of the error, and (b) carries no signal that +// the REVIEW PROCESS could not read/inspect/access the selected source. When in +// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. +function isPermissionHandlingPraiseLine(lower) { + if (!includesPermissionFailureLiteral(lower)) return false; + if (!codeCorrectlyHandlesPermissionError(lower)) return false; + return !reviewProcessBlockedSignal(lower); +} + +function codeCorrectlyHandlesPermissionError(lower) { + return includesAny(lower, [ + "correctly handles", + "correctly handle", + "handles eacces", + "handle eacces", + "handles eperm", + "handle eperm", + "handles the permission", + "handle the permission", + "gracefully handles", + "gracefully handle", + "handled correctly", + "handled gracefully", + "catches the error", + "catches the permission", + "catches eacces", + "catches eperm", + "catch eacces", + "catch eperm", + "is caught", + "are caught", + "falls back", + "fall back", + "graceful fallback", + "degrades gracefully", + "surfaces a typed error", + "surfaces an error", + "returns a clear", + "returns a typed", + "is the right behavior", + "is the correct behavior", + "the right behavior", + "correct behavior", + ]); +} + +// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was +// unable to read/inspect/access the source. Mirrors the genuine-block surface +// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase +// so a real read-denial is never masked by incidental handling-praise wording. +function reviewProcessBlockedSignal(lower) { + return includesAny(lower, [ + "no inspection was possible", + "could not be inspected", + "were not inspected", + "was not inspected", + "not inspected", + "could not inspect", + "cannot inspect", + "can't inspect", + "unable to inspect", + "did not inspect", + "could not access", + "cannot access", + "can't access", + "unable to access", + "prevented me", + "prevented file access", + "prevented access", + "permission block prevented", + "permission blocks prevented", + "while reading", + "while inspecting", + "i could not", + "i was unable", + "i was blocked", + "review was blocked", + "blocked from reading", + "blocked from inspecting", + "i could not read", + "i cannot read", + "i can't read", + "i was unable to read", + "could not read the source", + "cannot read the source", + "could not read the file", + "cannot read the file", + "could not read the selected", + "cannot read the selected", + "could not read it", + "cannot read it", + "selected source", + "selected file", + "selected files", + "supplied source", + "supplied diff", + "supplied file", + "supplied files", + "source file", + "source files", + "target file", + "target files", + ]); +} + function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) @@ -1010,7 +1119,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1054,9 +1163,9 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower) { +function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - return includesAny(lower, [ + if (includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1070,7 +1179,35 @@ function isOutOfScopeInspectionGapLine(lower) { "not part of this packet", "not supplied", "not included in the prompt", - ]); + ])) return true; + // Foreign-path branch: a could-not-inspect gap attributed to a concrete file + // that is NOT the selected source is an out-of-scope gap, not a denial that the + // selected source itself was reviewed. Suppress ONLY when the line names such a + // foreign path, does not generically deny the selected source, does not name the + // selected path, AND the review independently proves the selected source WAS + // inspected. When in doubt, fall through to false so the line stays flaggable. + if (mentionsSelectedSourceGeneric(lower)) return false; + if (mentionsSelectedSourcePath(lower, selectedSource)) return false; + if (!selectedSourceInspected) return false; + return namesNonSelectedFileGapLine(lower, selectedSource); +} + +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +function namesNonSelectedFileGapLine(lower, selectedSource) { + const selectedPaths = (selectedSource?.files ?? []) + .map((file) => String(file?.path ?? "").toLowerCase()) + .filter(Boolean); + NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; + let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + while (match !== null) { + const path = match[1]; + const isSelected = selectedPaths.some((selected) => ( + selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) + )); + if (!isSelected) return true; + match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + } + return false; } function isPriorReviewCommentsGapLine(lower) { @@ -1149,10 +1286,15 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); + const selectedSourceInspected = mentionsSelectedSourceInspection( + normalizeReviewSearchText(text).toLowerCase(), + selectedSource, + ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); + return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + && !isPriorReviewCommentsGapLine(lower); }) .join("\n") .toLowerCase(); @@ -1262,6 +1404,31 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; +// A short review is substantive (not shallow) when SOME single clause names a +// concrete code locus AND describes a specific defect/change at it, and that +// clause is not a negation/absence/praise assertion. Requires co-location so a +// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something +// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing +// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only +// asserts correctness stays flagged (conservative — fail toward flagging). +const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +const CONCRETE_FINDING_CODE_LOCUS = [ + /(? { + if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + }); +} + function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1302,9 +1469,11 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview; + && !conciseTinyReview + && !conciseConcreteReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/plugins/kimi/scripts/lib/review-prompt.mjs b/plugins/kimi/scripts/lib/review-prompt.mjs index 5ed505d9..1ea98b42 100644 --- a/plugins/kimi/scripts/lib/review-prompt.mjs +++ b/plugins/kimi/scripts/lib/review-prompt.mjs @@ -402,6 +402,7 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, + isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -698,6 +699,114 @@ function isCodePermissionConcernLine(lower) { ]); } +// A reviewer describing reviewed code that CORRECTLY HANDLES a permission +// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting +// that the review process was itself blocked. Suppress only when the line both +// (a) praises the code's handling of the error, and (b) carries no signal that +// the REVIEW PROCESS could not read/inspect/access the selected source. When in +// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. +function isPermissionHandlingPraiseLine(lower) { + if (!includesPermissionFailureLiteral(lower)) return false; + if (!codeCorrectlyHandlesPermissionError(lower)) return false; + return !reviewProcessBlockedSignal(lower); +} + +function codeCorrectlyHandlesPermissionError(lower) { + return includesAny(lower, [ + "correctly handles", + "correctly handle", + "handles eacces", + "handle eacces", + "handles eperm", + "handle eperm", + "handles the permission", + "handle the permission", + "gracefully handles", + "gracefully handle", + "handled correctly", + "handled gracefully", + "catches the error", + "catches the permission", + "catches eacces", + "catches eperm", + "catch eacces", + "catch eperm", + "is caught", + "are caught", + "falls back", + "fall back", + "graceful fallback", + "degrades gracefully", + "surfaces a typed error", + "surfaces an error", + "returns a clear", + "returns a typed", + "is the right behavior", + "is the correct behavior", + "the right behavior", + "correct behavior", + ]); +} + +// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was +// unable to read/inspect/access the source. Mirrors the genuine-block surface +// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase +// so a real read-denial is never masked by incidental handling-praise wording. +function reviewProcessBlockedSignal(lower) { + return includesAny(lower, [ + "no inspection was possible", + "could not be inspected", + "were not inspected", + "was not inspected", + "not inspected", + "could not inspect", + "cannot inspect", + "can't inspect", + "unable to inspect", + "did not inspect", + "could not access", + "cannot access", + "can't access", + "unable to access", + "prevented me", + "prevented file access", + "prevented access", + "permission block prevented", + "permission blocks prevented", + "while reading", + "while inspecting", + "i could not", + "i was unable", + "i was blocked", + "review was blocked", + "blocked from reading", + "blocked from inspecting", + "i could not read", + "i cannot read", + "i can't read", + "i was unable to read", + "could not read the source", + "cannot read the source", + "could not read the file", + "cannot read the file", + "could not read the selected", + "cannot read the selected", + "could not read it", + "cannot read it", + "selected source", + "selected file", + "selected files", + "supplied source", + "supplied diff", + "supplied file", + "supplied files", + "source file", + "source files", + "target file", + "target files", + ]); +} + function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) @@ -1010,7 +1119,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1054,9 +1163,9 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower) { +function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - return includesAny(lower, [ + if (includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1070,7 +1179,35 @@ function isOutOfScopeInspectionGapLine(lower) { "not part of this packet", "not supplied", "not included in the prompt", - ]); + ])) return true; + // Foreign-path branch: a could-not-inspect gap attributed to a concrete file + // that is NOT the selected source is an out-of-scope gap, not a denial that the + // selected source itself was reviewed. Suppress ONLY when the line names such a + // foreign path, does not generically deny the selected source, does not name the + // selected path, AND the review independently proves the selected source WAS + // inspected. When in doubt, fall through to false so the line stays flaggable. + if (mentionsSelectedSourceGeneric(lower)) return false; + if (mentionsSelectedSourcePath(lower, selectedSource)) return false; + if (!selectedSourceInspected) return false; + return namesNonSelectedFileGapLine(lower, selectedSource); +} + +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +function namesNonSelectedFileGapLine(lower, selectedSource) { + const selectedPaths = (selectedSource?.files ?? []) + .map((file) => String(file?.path ?? "").toLowerCase()) + .filter(Boolean); + NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; + let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + while (match !== null) { + const path = match[1]; + const isSelected = selectedPaths.some((selected) => ( + selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) + )); + if (!isSelected) return true; + match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + } + return false; } function isPriorReviewCommentsGapLine(lower) { @@ -1149,10 +1286,15 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); + const selectedSourceInspected = mentionsSelectedSourceInspection( + normalizeReviewSearchText(text).toLowerCase(), + selectedSource, + ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); + return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + && !isPriorReviewCommentsGapLine(lower); }) .join("\n") .toLowerCase(); @@ -1262,6 +1404,31 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; +// A short review is substantive (not shallow) when SOME single clause names a +// concrete code locus AND describes a specific defect/change at it, and that +// clause is not a negation/absence/praise assertion. Requires co-location so a +// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something +// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing +// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only +// asserts correctness stays flagged (conservative — fail toward flagging). +const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +const CONCRETE_FINDING_CODE_LOCUS = [ + /(? { + if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + }); +} + function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1302,9 +1469,11 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview; + && !conciseTinyReview + && !conciseConcreteReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/relay/relay-gemini/scripts/lib/review-prompt.mjs b/relay/relay-gemini/scripts/lib/review-prompt.mjs index 5ed505d9..1ea98b42 100644 --- a/relay/relay-gemini/scripts/lib/review-prompt.mjs +++ b/relay/relay-gemini/scripts/lib/review-prompt.mjs @@ -402,6 +402,7 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, + isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -698,6 +699,114 @@ function isCodePermissionConcernLine(lower) { ]); } +// A reviewer describing reviewed code that CORRECTLY HANDLES a permission +// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting +// that the review process was itself blocked. Suppress only when the line both +// (a) praises the code's handling of the error, and (b) carries no signal that +// the REVIEW PROCESS could not read/inspect/access the selected source. When in +// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. +function isPermissionHandlingPraiseLine(lower) { + if (!includesPermissionFailureLiteral(lower)) return false; + if (!codeCorrectlyHandlesPermissionError(lower)) return false; + return !reviewProcessBlockedSignal(lower); +} + +function codeCorrectlyHandlesPermissionError(lower) { + return includesAny(lower, [ + "correctly handles", + "correctly handle", + "handles eacces", + "handle eacces", + "handles eperm", + "handle eperm", + "handles the permission", + "handle the permission", + "gracefully handles", + "gracefully handle", + "handled correctly", + "handled gracefully", + "catches the error", + "catches the permission", + "catches eacces", + "catches eperm", + "catch eacces", + "catch eperm", + "is caught", + "are caught", + "falls back", + "fall back", + "graceful fallback", + "degrades gracefully", + "surfaces a typed error", + "surfaces an error", + "returns a clear", + "returns a typed", + "is the right behavior", + "is the correct behavior", + "the right behavior", + "correct behavior", + ]); +} + +// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was +// unable to read/inspect/access the source. Mirrors the genuine-block surface +// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase +// so a real read-denial is never masked by incidental handling-praise wording. +function reviewProcessBlockedSignal(lower) { + return includesAny(lower, [ + "no inspection was possible", + "could not be inspected", + "were not inspected", + "was not inspected", + "not inspected", + "could not inspect", + "cannot inspect", + "can't inspect", + "unable to inspect", + "did not inspect", + "could not access", + "cannot access", + "can't access", + "unable to access", + "prevented me", + "prevented file access", + "prevented access", + "permission block prevented", + "permission blocks prevented", + "while reading", + "while inspecting", + "i could not", + "i was unable", + "i was blocked", + "review was blocked", + "blocked from reading", + "blocked from inspecting", + "i could not read", + "i cannot read", + "i can't read", + "i was unable to read", + "could not read the source", + "cannot read the source", + "could not read the file", + "cannot read the file", + "could not read the selected", + "cannot read the selected", + "could not read it", + "cannot read it", + "selected source", + "selected file", + "selected files", + "supplied source", + "supplied diff", + "supplied file", + "supplied files", + "source file", + "source files", + "target file", + "target files", + ]); +} + function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) @@ -1010,7 +1119,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1054,9 +1163,9 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower) { +function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - return includesAny(lower, [ + if (includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1070,7 +1179,35 @@ function isOutOfScopeInspectionGapLine(lower) { "not part of this packet", "not supplied", "not included in the prompt", - ]); + ])) return true; + // Foreign-path branch: a could-not-inspect gap attributed to a concrete file + // that is NOT the selected source is an out-of-scope gap, not a denial that the + // selected source itself was reviewed. Suppress ONLY when the line names such a + // foreign path, does not generically deny the selected source, does not name the + // selected path, AND the review independently proves the selected source WAS + // inspected. When in doubt, fall through to false so the line stays flaggable. + if (mentionsSelectedSourceGeneric(lower)) return false; + if (mentionsSelectedSourcePath(lower, selectedSource)) return false; + if (!selectedSourceInspected) return false; + return namesNonSelectedFileGapLine(lower, selectedSource); +} + +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +function namesNonSelectedFileGapLine(lower, selectedSource) { + const selectedPaths = (selectedSource?.files ?? []) + .map((file) => String(file?.path ?? "").toLowerCase()) + .filter(Boolean); + NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; + let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + while (match !== null) { + const path = match[1]; + const isSelected = selectedPaths.some((selected) => ( + selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) + )); + if (!isSelected) return true; + match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + } + return false; } function isPriorReviewCommentsGapLine(lower) { @@ -1149,10 +1286,15 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); + const selectedSourceInspected = mentionsSelectedSourceInspection( + normalizeReviewSearchText(text).toLowerCase(), + selectedSource, + ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); + return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + && !isPriorReviewCommentsGapLine(lower); }) .join("\n") .toLowerCase(); @@ -1262,6 +1404,31 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; +// A short review is substantive (not shallow) when SOME single clause names a +// concrete code locus AND describes a specific defect/change at it, and that +// clause is not a negation/absence/praise assertion. Requires co-location so a +// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something +// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing +// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only +// asserts correctness stays flagged (conservative — fail toward flagging). +const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +const CONCRETE_FINDING_CODE_LOCUS = [ + /(? { + if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + }); +} + function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1302,9 +1469,11 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview; + && !conciseTinyReview + && !conciseConcreteReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/relay/relay-grok/scripts/lib/review-prompt.mjs b/relay/relay-grok/scripts/lib/review-prompt.mjs index 5ed505d9..1ea98b42 100644 --- a/relay/relay-grok/scripts/lib/review-prompt.mjs +++ b/relay/relay-grok/scripts/lib/review-prompt.mjs @@ -402,6 +402,7 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, + isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -698,6 +699,114 @@ function isCodePermissionConcernLine(lower) { ]); } +// A reviewer describing reviewed code that CORRECTLY HANDLES a permission +// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting +// that the review process was itself blocked. Suppress only when the line both +// (a) praises the code's handling of the error, and (b) carries no signal that +// the REVIEW PROCESS could not read/inspect/access the selected source. When in +// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. +function isPermissionHandlingPraiseLine(lower) { + if (!includesPermissionFailureLiteral(lower)) return false; + if (!codeCorrectlyHandlesPermissionError(lower)) return false; + return !reviewProcessBlockedSignal(lower); +} + +function codeCorrectlyHandlesPermissionError(lower) { + return includesAny(lower, [ + "correctly handles", + "correctly handle", + "handles eacces", + "handle eacces", + "handles eperm", + "handle eperm", + "handles the permission", + "handle the permission", + "gracefully handles", + "gracefully handle", + "handled correctly", + "handled gracefully", + "catches the error", + "catches the permission", + "catches eacces", + "catches eperm", + "catch eacces", + "catch eperm", + "is caught", + "are caught", + "falls back", + "fall back", + "graceful fallback", + "degrades gracefully", + "surfaces a typed error", + "surfaces an error", + "returns a clear", + "returns a typed", + "is the right behavior", + "is the correct behavior", + "the right behavior", + "correct behavior", + ]); +} + +// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was +// unable to read/inspect/access the source. Mirrors the genuine-block surface +// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase +// so a real read-denial is never masked by incidental handling-praise wording. +function reviewProcessBlockedSignal(lower) { + return includesAny(lower, [ + "no inspection was possible", + "could not be inspected", + "were not inspected", + "was not inspected", + "not inspected", + "could not inspect", + "cannot inspect", + "can't inspect", + "unable to inspect", + "did not inspect", + "could not access", + "cannot access", + "can't access", + "unable to access", + "prevented me", + "prevented file access", + "prevented access", + "permission block prevented", + "permission blocks prevented", + "while reading", + "while inspecting", + "i could not", + "i was unable", + "i was blocked", + "review was blocked", + "blocked from reading", + "blocked from inspecting", + "i could not read", + "i cannot read", + "i can't read", + "i was unable to read", + "could not read the source", + "cannot read the source", + "could not read the file", + "cannot read the file", + "could not read the selected", + "cannot read the selected", + "could not read it", + "cannot read it", + "selected source", + "selected file", + "selected files", + "supplied source", + "supplied diff", + "supplied file", + "supplied files", + "source file", + "source files", + "target file", + "target files", + ]); +} + function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) @@ -1010,7 +1119,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1054,9 +1163,9 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower) { +function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - return includesAny(lower, [ + if (includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1070,7 +1179,35 @@ function isOutOfScopeInspectionGapLine(lower) { "not part of this packet", "not supplied", "not included in the prompt", - ]); + ])) return true; + // Foreign-path branch: a could-not-inspect gap attributed to a concrete file + // that is NOT the selected source is an out-of-scope gap, not a denial that the + // selected source itself was reviewed. Suppress ONLY when the line names such a + // foreign path, does not generically deny the selected source, does not name the + // selected path, AND the review independently proves the selected source WAS + // inspected. When in doubt, fall through to false so the line stays flaggable. + if (mentionsSelectedSourceGeneric(lower)) return false; + if (mentionsSelectedSourcePath(lower, selectedSource)) return false; + if (!selectedSourceInspected) return false; + return namesNonSelectedFileGapLine(lower, selectedSource); +} + +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +function namesNonSelectedFileGapLine(lower, selectedSource) { + const selectedPaths = (selectedSource?.files ?? []) + .map((file) => String(file?.path ?? "").toLowerCase()) + .filter(Boolean); + NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; + let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + while (match !== null) { + const path = match[1]; + const isSelected = selectedPaths.some((selected) => ( + selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) + )); + if (!isSelected) return true; + match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + } + return false; } function isPriorReviewCommentsGapLine(lower) { @@ -1149,10 +1286,15 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); + const selectedSourceInspected = mentionsSelectedSourceInspection( + normalizeReviewSearchText(text).toLowerCase(), + selectedSource, + ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); + return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + && !isPriorReviewCommentsGapLine(lower); }) .join("\n") .toLowerCase(); @@ -1262,6 +1404,31 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; +// A short review is substantive (not shallow) when SOME single clause names a +// concrete code locus AND describes a specific defect/change at it, and that +// clause is not a negation/absence/praise assertion. Requires co-location so a +// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something +// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing +// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only +// asserts correctness stays flagged (conservative — fail toward flagging). +const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +const CONCRETE_FINDING_CODE_LOCUS = [ + /(? { + if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + }); +} + function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1302,9 +1469,11 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview; + && !conciseTinyReview + && !conciseConcreteReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/relay/relay-kimi/scripts/lib/review-prompt.mjs b/relay/relay-kimi/scripts/lib/review-prompt.mjs index 5ed505d9..1ea98b42 100644 --- a/relay/relay-kimi/scripts/lib/review-prompt.mjs +++ b/relay/relay-kimi/scripts/lib/review-prompt.mjs @@ -402,6 +402,7 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, + isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -698,6 +699,114 @@ function isCodePermissionConcernLine(lower) { ]); } +// A reviewer describing reviewed code that CORRECTLY HANDLES a permission +// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting +// that the review process was itself blocked. Suppress only when the line both +// (a) praises the code's handling of the error, and (b) carries no signal that +// the REVIEW PROCESS could not read/inspect/access the selected source. When in +// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. +function isPermissionHandlingPraiseLine(lower) { + if (!includesPermissionFailureLiteral(lower)) return false; + if (!codeCorrectlyHandlesPermissionError(lower)) return false; + return !reviewProcessBlockedSignal(lower); +} + +function codeCorrectlyHandlesPermissionError(lower) { + return includesAny(lower, [ + "correctly handles", + "correctly handle", + "handles eacces", + "handle eacces", + "handles eperm", + "handle eperm", + "handles the permission", + "handle the permission", + "gracefully handles", + "gracefully handle", + "handled correctly", + "handled gracefully", + "catches the error", + "catches the permission", + "catches eacces", + "catches eperm", + "catch eacces", + "catch eperm", + "is caught", + "are caught", + "falls back", + "fall back", + "graceful fallback", + "degrades gracefully", + "surfaces a typed error", + "surfaces an error", + "returns a clear", + "returns a typed", + "is the right behavior", + "is the correct behavior", + "the right behavior", + "correct behavior", + ]); +} + +// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was +// unable to read/inspect/access the source. Mirrors the genuine-block surface +// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase +// so a real read-denial is never masked by incidental handling-praise wording. +function reviewProcessBlockedSignal(lower) { + return includesAny(lower, [ + "no inspection was possible", + "could not be inspected", + "were not inspected", + "was not inspected", + "not inspected", + "could not inspect", + "cannot inspect", + "can't inspect", + "unable to inspect", + "did not inspect", + "could not access", + "cannot access", + "can't access", + "unable to access", + "prevented me", + "prevented file access", + "prevented access", + "permission block prevented", + "permission blocks prevented", + "while reading", + "while inspecting", + "i could not", + "i was unable", + "i was blocked", + "review was blocked", + "blocked from reading", + "blocked from inspecting", + "i could not read", + "i cannot read", + "i can't read", + "i was unable to read", + "could not read the source", + "cannot read the source", + "could not read the file", + "cannot read the file", + "could not read the selected", + "cannot read the selected", + "could not read it", + "cannot read it", + "selected source", + "selected file", + "selected files", + "supplied source", + "supplied diff", + "supplied file", + "supplied files", + "source file", + "source files", + "target file", + "target files", + ]); +} + function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) @@ -1010,7 +1119,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1054,9 +1163,9 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower) { +function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - return includesAny(lower, [ + if (includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1070,7 +1179,35 @@ function isOutOfScopeInspectionGapLine(lower) { "not part of this packet", "not supplied", "not included in the prompt", - ]); + ])) return true; + // Foreign-path branch: a could-not-inspect gap attributed to a concrete file + // that is NOT the selected source is an out-of-scope gap, not a denial that the + // selected source itself was reviewed. Suppress ONLY when the line names such a + // foreign path, does not generically deny the selected source, does not name the + // selected path, AND the review independently proves the selected source WAS + // inspected. When in doubt, fall through to false so the line stays flaggable. + if (mentionsSelectedSourceGeneric(lower)) return false; + if (mentionsSelectedSourcePath(lower, selectedSource)) return false; + if (!selectedSourceInspected) return false; + return namesNonSelectedFileGapLine(lower, selectedSource); +} + +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +function namesNonSelectedFileGapLine(lower, selectedSource) { + const selectedPaths = (selectedSource?.files ?? []) + .map((file) => String(file?.path ?? "").toLowerCase()) + .filter(Boolean); + NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; + let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + while (match !== null) { + const path = match[1]; + const isSelected = selectedPaths.some((selected) => ( + selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) + )); + if (!isSelected) return true; + match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + } + return false; } function isPriorReviewCommentsGapLine(lower) { @@ -1149,10 +1286,15 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); + const selectedSourceInspected = mentionsSelectedSourceInspection( + normalizeReviewSearchText(text).toLowerCase(), + selectedSource, + ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); + return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + && !isPriorReviewCommentsGapLine(lower); }) .join("\n") .toLowerCase(); @@ -1262,6 +1404,31 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; +// A short review is substantive (not shallow) when SOME single clause names a +// concrete code locus AND describes a specific defect/change at it, and that +// clause is not a negation/absence/praise assertion. Requires co-location so a +// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something +// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing +// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only +// asserts correctness stays flagged (conservative — fail toward flagging). +const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +const CONCRETE_FINDING_CODE_LOCUS = [ + /(? { + if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + }); +} + function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1302,9 +1469,11 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview; + && !conciseTinyReview + && !conciseConcreteReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/scripts/lib/review-prompt.mjs b/scripts/lib/review-prompt.mjs index 5ed505d9..1ea98b42 100644 --- a/scripts/lib/review-prompt.mjs +++ b/scripts/lib/review-prompt.mjs @@ -402,6 +402,7 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, + isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -698,6 +699,114 @@ function isCodePermissionConcernLine(lower) { ]); } +// A reviewer describing reviewed code that CORRECTLY HANDLES a permission +// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting +// that the review process was itself blocked. Suppress only when the line both +// (a) praises the code's handling of the error, and (b) carries no signal that +// the REVIEW PROCESS could not read/inspect/access the selected source. When in +// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. +function isPermissionHandlingPraiseLine(lower) { + if (!includesPermissionFailureLiteral(lower)) return false; + if (!codeCorrectlyHandlesPermissionError(lower)) return false; + return !reviewProcessBlockedSignal(lower); +} + +function codeCorrectlyHandlesPermissionError(lower) { + return includesAny(lower, [ + "correctly handles", + "correctly handle", + "handles eacces", + "handle eacces", + "handles eperm", + "handle eperm", + "handles the permission", + "handle the permission", + "gracefully handles", + "gracefully handle", + "handled correctly", + "handled gracefully", + "catches the error", + "catches the permission", + "catches eacces", + "catches eperm", + "catch eacces", + "catch eperm", + "is caught", + "are caught", + "falls back", + "fall back", + "graceful fallback", + "degrades gracefully", + "surfaces a typed error", + "surfaces an error", + "returns a clear", + "returns a typed", + "is the right behavior", + "is the correct behavior", + "the right behavior", + "correct behavior", + ]); +} + +// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was +// unable to read/inspect/access the source. Mirrors the genuine-block surface +// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase +// so a real read-denial is never masked by incidental handling-praise wording. +function reviewProcessBlockedSignal(lower) { + return includesAny(lower, [ + "no inspection was possible", + "could not be inspected", + "were not inspected", + "was not inspected", + "not inspected", + "could not inspect", + "cannot inspect", + "can't inspect", + "unable to inspect", + "did not inspect", + "could not access", + "cannot access", + "can't access", + "unable to access", + "prevented me", + "prevented file access", + "prevented access", + "permission block prevented", + "permission blocks prevented", + "while reading", + "while inspecting", + "i could not", + "i was unable", + "i was blocked", + "review was blocked", + "blocked from reading", + "blocked from inspecting", + "i could not read", + "i cannot read", + "i can't read", + "i was unable to read", + "could not read the source", + "cannot read the source", + "could not read the file", + "cannot read the file", + "could not read the selected", + "cannot read the selected", + "could not read it", + "cannot read it", + "selected source", + "selected file", + "selected files", + "supplied source", + "supplied diff", + "supplied file", + "supplied files", + "source file", + "source files", + "target file", + "target files", + ]); +} + function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) @@ -1010,7 +1119,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1054,9 +1163,9 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower) { +function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - return includesAny(lower, [ + if (includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1070,7 +1179,35 @@ function isOutOfScopeInspectionGapLine(lower) { "not part of this packet", "not supplied", "not included in the prompt", - ]); + ])) return true; + // Foreign-path branch: a could-not-inspect gap attributed to a concrete file + // that is NOT the selected source is an out-of-scope gap, not a denial that the + // selected source itself was reviewed. Suppress ONLY when the line names such a + // foreign path, does not generically deny the selected source, does not name the + // selected path, AND the review independently proves the selected source WAS + // inspected. When in doubt, fall through to false so the line stays flaggable. + if (mentionsSelectedSourceGeneric(lower)) return false; + if (mentionsSelectedSourcePath(lower, selectedSource)) return false; + if (!selectedSourceInspected) return false; + return namesNonSelectedFileGapLine(lower, selectedSource); +} + +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +function namesNonSelectedFileGapLine(lower, selectedSource) { + const selectedPaths = (selectedSource?.files ?? []) + .map((file) => String(file?.path ?? "").toLowerCase()) + .filter(Boolean); + NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; + let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + while (match !== null) { + const path = match[1]; + const isSelected = selectedPaths.some((selected) => ( + selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) + )); + if (!isSelected) return true; + match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); + } + return false; } function isPriorReviewCommentsGapLine(lower) { @@ -1149,10 +1286,15 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); + const selectedSourceInspected = mentionsSelectedSourceInspection( + normalizeReviewSearchText(text).toLowerCase(), + selectedSource, + ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); + return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + && !isPriorReviewCommentsGapLine(lower); }) .join("\n") .toLowerCase(); @@ -1262,6 +1404,31 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; +// A short review is substantive (not shallow) when SOME single clause names a +// concrete code locus AND describes a specific defect/change at it, and that +// clause is not a negation/absence/praise assertion. Requires co-location so a +// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something +// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing +// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only +// asserts correctness stays flagged (conservative — fail toward flagging). +const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +const CONCRETE_FINDING_CODE_LOCUS = [ + /(? { + if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + }); +} + function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1302,9 +1469,11 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview; + && !conciseTinyReview + && !conciseConcreteReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/tests/unit/review-prompt.test.mjs b/tests/unit/review-prompt.test.mjs index d839fcf1..185021c0 100644 --- a/tests/unit/review-prompt.test.mjs +++ b/tests/unit/review-prompt.test.mjs @@ -3948,3 +3948,363 @@ for (const [name, file] of REVIEW_PROMPT_MODULES) { }), "working-tree"); }); } + +// --------------------------------------------------------------------------- +// Root 2 detector-tightening safety corpus (#235 Way 1). +// Each case drives the real buildReviewAuditManifest entry point and asserts +// BOTH semantic_failure_reasons membership AND failed_review_slot end-to-end. +// MUST-NOT-FLAG cases prove correct reviews are no longer mis-flagged. +// MUST-STILL-FLAG cases are load-bearing safety regressions: a genuinely-bad +// review must stay flagged so buildReviewSlotDisposition keeps demoting it. +// --------------------------------------------------------------------------- + +// Benign filler used to push a non-shallow MUST-NOT-FLAG review past the 500 +// char looks_shallow threshold without introducing denial/permission/defect +// noise that would trip an unrelated detector. +const ROOT2_PAD = [ + "The structured sections below follow the reviewer contract exactly.", + "Base and head refs were confirmed against the supplied metadata.", + "No timeout, truncation, interruption, or permission block occurred during this review.", + "The summary, blocking findings, and non-blocking concerns are kept distinct.", + "This narrative padding exists only to exercise the non-shallow code path.", +].join("\n"); + +// Detector 1 — permission_blocked -------------------------------------------- + +test("root2 detector1: handling-praise reviews do not flag permission_blocked", () => { + const cases = [ + "Verdict: Approve\nThe new code correctly handles EACCES when it cannot read the optional config file: it catches the error, logs a warning, and falls back to documented defaults.", + "Verdict: Approve\nThe writer correctly catches EPERM thrown by fs.writeSync and surfaces a typed error to the caller instead of crashing.", + "Verdict: Approve\nThe diff returns a clear 'permission denied' message to the API client when the user lacks the scope, which is the right behavior.", + // GUARD REGRESSION: reviewed-process subject must stay unflagged after Edit 1b. + "Verdict: Approve\nWhen the process cannot read the optional cache file it raises EACCES; the new code catches the error and falls back gracefully, which is the correct behavior.", + ]; + for (const result of cases) { + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "src/example.js", text: "export const value = 1;\n" }], + result: `${result}\n${ROOT2_PAD}`, + status: "completed", + errorCode: null, + }); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("permission_blocked"), + false, + `permission_blocked should be absent for: ${result}`, + ); + assert.equal(manifest.review_quality.failed_review_slot, false, `failed_review_slot should be false for: ${result}`); + } +}); + +test("root2 detector1: genuine reviewer read-denials still flag permission_blocked", () => { + const cases = [ + "I could not read src/auth.js: EACCES permission denied; no inspection was possible.", + "Verdict: Approve\nI wanted to verify the code handles EACCES, but I could not read the file at all, so no inspection was possible.", + "Verdict: Approve\nThe diff supposedly falls back on EPERM, but the selected source could not be inspected due to permission denied.", + ]; + for (const result of cases) { + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "src/auth.js", text: "export const value = 1;\n" }], + result, + status: "completed", + errorCode: null, + }); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("permission_blocked"), + true, + `permission_blocked should be present for: ${result}`, + ); + assert.equal(manifest.review_quality.failed_review_slot, true, `failed_review_slot should be true for: ${result}`); + } +}); + +test("root2 detector1: long structured APPROVE with reviewer-subject read-denial still flags", () => { + // ADVERSARIAL GUARD REGRESSION: >500 chars, full verdict + both finding + // sections, but the summary asserts "I cannot read the file" — a genuine + // reviewer-process block. reviewProcessBlockedSignal must match + // "i cannot read"/"cannot read the file" so suppression is refused. + const result = [ + "Verdict: APPROVE", + "Blocking findings", + "- No blocking findings are claimed for this change.", + "Non-blocking concerns", + "- None worth raising at this time.", + "Summary", + "- I cannot read the file (EACCES) but the code falls back, which is correct behavior, so I am approving on that basis.", + ROOT2_PAD, + ].join("\n"); + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "src/example.js", text: "export const value = 1;\n" }], + result, + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("permission_blocked"), true); + assert.equal(manifest.review_quality.failed_review_slot, true); +}); + +// Detector 2 — not_reviewed -------------------------------------------------- + +test("root2 detector2: foreign-path inspection gaps with proven selected-source inspection do not flag", () => { + const cases = [ + { + selected: "cart.js", + result: [ + "Verdict: APPROVE", + "Blocking findings", + "- None.", + "Non-blocking concerns", + "- The supplied cart.js diff was fully reviewed and the arithmetic is correct.", + "- I could not inspect the upstream caller in routes.js, but the diff is internally consistent.", + ROOT2_PAD, + ].join("\n"), + }, + { + selected: "src/auth/login.js", + result: [ + "Verdict: APPROVE", + "Blocking findings", + "- None.", + "Non-blocking concerns", + "- The selected source src/auth/login.js was fully inspected; the token comparison is constant-time.", + "- I was unable to inspect middleware/session.js, so its interaction is noted as a gap.", + ROOT2_PAD, + ].join("\n"), + }, + { + selected: "src/order/process.js", + result: [ + "Verdict: REQUEST CHANGES", + "Blocking findings", + "- I reviewed the full supplied source src/order/process.js and the rounding is wrong.", + "Non-blocking concerns", + "- I could not inspect config/defaults.json, but the bug is determinable from the diff.", + ROOT2_PAD, + ].join("\n"), + }, + ]; + for (const { selected, result } of cases) { + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], + result, + status: "completed", + errorCode: null, + }); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), + false, + `not_reviewed should be absent for selected=${selected}`, + ); + assert.equal(manifest.review_quality.failed_review_slot, false, `failed_review_slot should be false for selected=${selected}`); + } +}); + +test("root2 detector2: not-reviewed verdict still flags not_reviewed", () => { + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: "Verdict: NOT REVIEWED.\nNo file content examined; the selected source was not inspected.", + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true); + assert.equal(manifest.review_quality.failed_review_slot, true); +}); + +test("root2 detector2: generic selected-source denial still flags not_reviewed", () => { + // mentionsSelectedSourceGeneric true -> foreign-path branch bails. + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: [ + "Verdict: APPROVE", + "Blocking findings: none.", + "The selected source was not inspected; I could not inspect it.", + ROOT2_PAD, + ].join("\n"), + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true); + assert.equal(manifest.review_quality.failed_review_slot, true); +}); + +test("root2 detector2: self-referential could-not-inspect of the selected path still flags", () => { + // mentionsSelectedSourcePath true -> foreign-path branch bails. + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: [ + "Verdict: APPROVE", + "Blocking findings: none.", + "I could not inspect sample.js because access was unavailable.", + ROOT2_PAD, + ].join("\n"), + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true); + assert.equal(manifest.review_quality.failed_review_slot, true); +}); + +test("root2 detector2: hyphenated pass-through prose with no file token still flags", () => { + // No file token -> namesNonSelectedFileGapLine false -> foreign-path branch + // returns false -> the bare "could not inspect" substring fires not_reviewed. + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: [ + "Verdict: APPROVE", + "- Null check: pass-through for trusted callers, but I could not inspect the error path.", + ROOT2_PAD, + ].join("\n"), + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true); + assert.equal(manifest.review_quality.failed_review_slot, true); +}); + +test("root2 detector2: foreign-path gap WITHOUT proven selected-source inspection still flags", () => { + // ADVERSARIAL GUARD REGRESSION: names only a foreign path, no generic token, + // and NEVER applies an inspection verb to the selected path -> selectedSourceInspected + // false -> Edit 2a's `if (!selectedSourceInspected) return false;` blocks suppression. + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "src/billing/charge.js", text: "export const value = 1;\n" }], + result: [ + "Verdict: APPROVE", + "Blocking findings: none.", + "I could not inspect models/user.js, which is where the real authentication logic lives, so this approval rests on the commit message and the diff summary rather than on the code itself.", + ROOT2_PAD, + ].join("\n"), + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true); + assert.equal(manifest.review_quality.failed_review_slot, true); +}); + +// Detector 3 — shallow_output ------------------------------------------------ + +test("root2 detector3: terse-but-concrete reviews do not flag shallow_output", () => { + const cases = [ + { + selected: "src/cart.js", + result: "Verdict: REQUEST CHANGES. src/cart.js total() subtracts item.price instead of adding; the reduce should use sum + item.price. That is the only blocker.", + }, + { + selected: "parser.mjs", + result: "Request changes: parser.mjs scanDigits() has an off-by-one; the loop should use index <= len, not index < len.", + }, + { + selected: "utils.js", + result: "Verdict: REQUEST CHANGES. utils.js:42 slice() drops the last element; it should be slice(0, len) not slice(0, len-1).", + }, + ]; + for (const { selected, result } of cases) { + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], + result, + status: "completed", + errorCode: null, + }); + assert.equal( + manifest.review_quality.looks_shallow, + false, + `looks_shallow should be false for: ${result}`, + ); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), + false, + `shallow_output should be absent for: ${result}`, + ); + assert.equal(manifest.review_quality.failed_review_slot, false, `failed_review_slot should be false for: ${result}`); + } +}); + +test("root2 detector3: bare-LGTM with no verdict still flags shallow_output", () => { + // Also yields missing_verdict (Root-3-owned); assert only shallow_output here. + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: "Looks fine to me.", + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, true); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true); + assert.equal(manifest.review_quality.failed_review_slot, true); +}); + +test("root2 detector3: terse APPROVE with no concrete finding still flags shallow_output", () => { + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: "Verdict: APPROVE\nNo blocking findings.", + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, true); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true); + assert.equal(manifest.review_quality.failed_review_slot, true); +}); + +test("root2 detector3: terse APPROVE with locus but no defect cue still flags shallow_output", () => { + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "src/cart.js", text: "export const value = 1;\n" }], + result: "Verdict: APPROVE. I looked at src/cart.js handleLogin() and it is fine.", + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, true); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true); + assert.equal(manifest.review_quality.failed_review_slot, true); +}); + +test("root2 detector3: defect-flavored words with no code locus still flag shallow_output", () => { + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: "Verdict: REQUEST CHANGES. Something seems incorrect and should be fixed.", + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, true); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true); + assert.equal(manifest.review_quality.failed_review_slot, true); +}); + +test("root2 detector3: praise/absence clauses do not count as concrete findings (still flags)", () => { + // ADVERSARIAL GUARD REGRESSION: every cue sits in a negated/praise clause -> + // hasConcreteFinding false -> looks_shallow true. + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "parser.js", text: "export const value = 1;\n" }], + result: "Verdict: APPROVE\nThe parseConfig() function correctly throws on bad input and the schema.json is missing nothing important. Solid work.", + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, true); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true); + assert.equal(manifest.review_quality.failed_review_slot, true); +}); + +test("root2 detector3: negated-finding variant does not count as concrete (still flags)", () => { + // ADVERSARIAL GUARD REGRESSION: negated-finding clauses -> stays flagged. + const manifest = buildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "app.js", text: "export const value = 1;\n" }], + result: "Verdict: APPROVE. app.js handler() does not handle nothing improperly; there is no off-by-one. Looks good.", + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, true); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true); + assert.equal(manifest.review_quality.failed_review_slot, true); +}); From 3112e08bca46542d116857982b18213b39f76c04 Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 22 Jun 2026 00:34:34 +0900 Subject: [PATCH 02/15] fix(review): parametrize root2 corpus over plugin copies to restore coverage baseline PR #237 CI coverage gate failed: plugins/*/scripts/lib/review-prompt.mjs line coverage 92.33% < baseline 95.73%. The new root2 detector1/2/3 corpus tests exercised only the shared-source buildReviewAuditManifest, leaving the new detector lines uncovered in the 5 plugin copies (the gate measures the copies, not the source). Parametrize the 20 corpus tests over all 6 REVIEW_PROMPT_MODULES using the file's existing loadReviewPromptModule convention, so the new detector code runs in every plugin copy. Copies now at 97.47% line (>= 95.73% baseline); 410 tests pass; sync-review-prompt --check clean (copies untouched). Refs #235 Co-Authored-By: Claude Opus 4.8 --- tests/unit/review-prompt.test.mjs | 466 +++++++++++++++++------------- 1 file changed, 259 insertions(+), 207 deletions(-) diff --git a/tests/unit/review-prompt.test.mjs b/tests/unit/review-prompt.test.mjs index 185021c0..90f6f6c6 100644 --- a/tests/unit/review-prompt.test.mjs +++ b/tests/unit/review-prompt.test.mjs @@ -3970,8 +3970,12 @@ const ROOT2_PAD = [ ].join("\n"); // Detector 1 — permission_blocked -------------------------------------------- +// Each test is parametrized over REVIEW_PROMPT_MODULES so the new detector code +// runs in every plugin copy (not just the shared source), keeping the per-copy +// coverage gate satisfied. targetBuildReviewAuditManifest is the per-module +// entry point; `name` is appended to every assert message for per-copy attribution. -test("root2 detector1: handling-praise reviews do not flag permission_blocked", () => { +test("root2 detector1: handling-praise reviews do not flag permission_blocked", async () => { const cases = [ "Verdict: Approve\nThe new code correctly handles EACCES when it cannot read the optional config file: it catches the error, logs a warning, and falls back to documented defaults.", "Verdict: Approve\nThe writer correctly catches EPERM thrown by fs.writeSync and surfaces a typed error to the caller instead of crashing.", @@ -3979,47 +3983,53 @@ test("root2 detector1: handling-praise reviews do not flag permission_blocked", // GUARD REGRESSION: reviewed-process subject must stay unflagged after Edit 1b. "Verdict: Approve\nWhen the process cannot read the optional cache file it raises EACCES; the new code catches the error and falls back gracefully, which is the correct behavior.", ]; - for (const result of cases) { - const manifest = buildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "src/example.js", text: "export const value = 1;\n" }], - result: `${result}\n${ROOT2_PAD}`, - status: "completed", - errorCode: null, - }); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("permission_blocked"), - false, - `permission_blocked should be absent for: ${result}`, - ); - assert.equal(manifest.review_quality.failed_review_slot, false, `failed_review_slot should be false for: ${result}`); + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + for (const result of cases) { + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "src/example.js", text: "export const value = 1;\n" }], + result: `${result}\n${ROOT2_PAD}`, + status: "completed", + errorCode: null, + }); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("permission_blocked"), + false, + `[${name}] permission_blocked should be absent for: ${result}`, + ); + assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] failed_review_slot should be false for: ${result}`); + } } }); -test("root2 detector1: genuine reviewer read-denials still flag permission_blocked", () => { +test("root2 detector1: genuine reviewer read-denials still flag permission_blocked", async () => { const cases = [ "I could not read src/auth.js: EACCES permission denied; no inspection was possible.", "Verdict: Approve\nI wanted to verify the code handles EACCES, but I could not read the file at all, so no inspection was possible.", "Verdict: Approve\nThe diff supposedly falls back on EPERM, but the selected source could not be inspected due to permission denied.", ]; - for (const result of cases) { - const manifest = buildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "src/auth.js", text: "export const value = 1;\n" }], - result, - status: "completed", - errorCode: null, - }); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("permission_blocked"), - true, - `permission_blocked should be present for: ${result}`, - ); - assert.equal(manifest.review_quality.failed_review_slot, true, `failed_review_slot should be true for: ${result}`); + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + for (const result of cases) { + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "src/auth.js", text: "export const value = 1;\n" }], + result, + status: "completed", + errorCode: null, + }); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("permission_blocked"), + true, + `[${name}] permission_blocked should be present for: ${result}`, + ); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true for: ${result}`); + } } }); -test("root2 detector1: long structured APPROVE with reviewer-subject read-denial still flags", () => { +test("root2 detector1: long structured APPROVE with reviewer-subject read-denial still flags", async () => { // ADVERSARIAL GUARD REGRESSION: >500 chars, full verdict + both finding // sections, but the summary asserts "I cannot read the file" — a genuine // reviewer-process block. reviewProcessBlockedSignal must match @@ -4034,20 +4044,23 @@ test("root2 detector1: long structured APPROVE with reviewer-subject read-denial "- I cannot read the file (EACCES) but the code falls back, which is correct behavior, so I am approving on that basis.", ROOT2_PAD, ].join("\n"); - const manifest = buildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "src/example.js", text: "export const value = 1;\n" }], - result, - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.semantic_failure_reasons.includes("permission_blocked"), true); - assert.equal(manifest.review_quality.failed_review_slot, true); + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "src/example.js", text: "export const value = 1;\n" }], + result, + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("permission_blocked"), true, `[${name}] permission_blocked should be present`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true`); + } }); // Detector 2 — not_reviewed -------------------------------------------------- -test("root2 detector2: foreign-path inspection gaps with proven selected-source inspection do not flag", () => { +test("root2 detector2: foreign-path inspection gaps with proven selected-source inspection do not flag", async () => { const cases = [ { selected: "cart.js", @@ -4085,112 +4098,130 @@ test("root2 detector2: foreign-path inspection gaps with proven selected-source ].join("\n"), }, ]; - for (const { selected, result } of cases) { - const manifest = buildReviewAuditManifest({ + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + for (const { selected, result } of cases) { + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], + result, + status: "completed", + errorCode: null, + }); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), + false, + `[${name}] not_reviewed should be absent for selected=${selected}`, + ); + assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] failed_review_slot should be false for selected=${selected}`); + } + } +}); + +test("root2 detector2: not-reviewed verdict still flags not_reviewed", async () => { + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ prompt: "rendered prompt", - sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], - result, + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: "Verdict: NOT REVIEWED.\nNo file content examined; the selected source was not inspected.", status: "completed", errorCode: null, }); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), - false, - `not_reviewed should be absent for selected=${selected}`, - ); - assert.equal(manifest.review_quality.failed_review_slot, false, `failed_review_slot should be false for selected=${selected}`); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true, `[${name}] not_reviewed should be present`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true`); } }); -test("root2 detector2: not-reviewed verdict still flags not_reviewed", () => { - const manifest = buildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], - result: "Verdict: NOT REVIEWED.\nNo file content examined; the selected source was not inspected.", - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true); - assert.equal(manifest.review_quality.failed_review_slot, true); -}); - -test("root2 detector2: generic selected-source denial still flags not_reviewed", () => { +test("root2 detector2: generic selected-source denial still flags not_reviewed", async () => { // mentionsSelectedSourceGeneric true -> foreign-path branch bails. - const manifest = buildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], - result: [ - "Verdict: APPROVE", - "Blocking findings: none.", - "The selected source was not inspected; I could not inspect it.", - ROOT2_PAD, - ].join("\n"), - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true); - assert.equal(manifest.review_quality.failed_review_slot, true); + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: [ + "Verdict: APPROVE", + "Blocking findings: none.", + "The selected source was not inspected; I could not inspect it.", + ROOT2_PAD, + ].join("\n"), + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true, `[${name}] not_reviewed should be present`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true`); + } }); -test("root2 detector2: self-referential could-not-inspect of the selected path still flags", () => { +test("root2 detector2: self-referential could-not-inspect of the selected path still flags", async () => { // mentionsSelectedSourcePath true -> foreign-path branch bails. - const manifest = buildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], - result: [ - "Verdict: APPROVE", - "Blocking findings: none.", - "I could not inspect sample.js because access was unavailable.", - ROOT2_PAD, - ].join("\n"), - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true); - assert.equal(manifest.review_quality.failed_review_slot, true); + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: [ + "Verdict: APPROVE", + "Blocking findings: none.", + "I could not inspect sample.js because access was unavailable.", + ROOT2_PAD, + ].join("\n"), + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true, `[${name}] not_reviewed should be present`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true`); + } }); -test("root2 detector2: hyphenated pass-through prose with no file token still flags", () => { +test("root2 detector2: hyphenated pass-through prose with no file token still flags", async () => { // No file token -> namesNonSelectedFileGapLine false -> foreign-path branch // returns false -> the bare "could not inspect" substring fires not_reviewed. - const manifest = buildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], - result: [ - "Verdict: APPROVE", - "- Null check: pass-through for trusted callers, but I could not inspect the error path.", - ROOT2_PAD, - ].join("\n"), - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true); - assert.equal(manifest.review_quality.failed_review_slot, true); + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: [ + "Verdict: APPROVE", + "- Null check: pass-through for trusted callers, but I could not inspect the error path.", + ROOT2_PAD, + ].join("\n"), + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true, `[${name}] not_reviewed should be present`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true`); + } }); -test("root2 detector2: foreign-path gap WITHOUT proven selected-source inspection still flags", () => { +test("root2 detector2: foreign-path gap WITHOUT proven selected-source inspection still flags", async () => { // ADVERSARIAL GUARD REGRESSION: names only a foreign path, no generic token, // and NEVER applies an inspection verb to the selected path -> selectedSourceInspected // false -> Edit 2a's `if (!selectedSourceInspected) return false;` blocks suppression. - const manifest = buildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "src/billing/charge.js", text: "export const value = 1;\n" }], - result: [ - "Verdict: APPROVE", - "Blocking findings: none.", - "I could not inspect models/user.js, which is where the real authentication logic lives, so this approval rests on the commit message and the diff summary rather than on the code itself.", - ROOT2_PAD, - ].join("\n"), - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true); - assert.equal(manifest.review_quality.failed_review_slot, true); + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "src/billing/charge.js", text: "export const value = 1;\n" }], + result: [ + "Verdict: APPROVE", + "Blocking findings: none.", + "I could not inspect models/user.js, which is where the real authentication logic lives, so this approval rests on the commit message and the diff summary rather than on the code itself.", + ROOT2_PAD, + ].join("\n"), + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true, `[${name}] not_reviewed should be present`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true`); + } }); // Detector 3 — shallow_output ------------------------------------------------ -test("root2 detector3: terse-but-concrete reviews do not flag shallow_output", () => { +test("root2 detector3: terse-but-concrete reviews do not flag shallow_output", async () => { const cases = [ { selected: "src/cart.js", @@ -4205,106 +4236,127 @@ test("root2 detector3: terse-but-concrete reviews do not flag shallow_output", ( result: "Verdict: REQUEST CHANGES. utils.js:42 slice() drops the last element; it should be slice(0, len) not slice(0, len-1).", }, ]; - for (const { selected, result } of cases) { - const manifest = buildReviewAuditManifest({ + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + for (const { selected, result } of cases) { + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], + result, + status: "completed", + errorCode: null, + }); + assert.equal( + manifest.review_quality.looks_shallow, + false, + `[${name}] looks_shallow should be false for: ${result}`, + ); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), + false, + `[${name}] shallow_output should be absent for: ${result}`, + ); + assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] failed_review_slot should be false for: ${result}`); + } + } +}); + +test("root2 detector3: bare-LGTM with no verdict still flags shallow_output", async () => { + // Also yields missing_verdict (Root-3-owned); assert only shallow_output here. + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ prompt: "rendered prompt", - sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], - result, + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: "Looks fine to me.", status: "completed", errorCode: null, }); - assert.equal( - manifest.review_quality.looks_shallow, - false, - `looks_shallow should be false for: ${result}`, - ); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), - false, - `shallow_output should be absent for: ${result}`, - ); - assert.equal(manifest.review_quality.failed_review_slot, false, `failed_review_slot should be false for: ${result}`); + assert.equal(manifest.review_quality.looks_shallow, true, `[${name}] looks_shallow should be true`); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true, `[${name}] shallow_output should be present`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true`); } }); -test("root2 detector3: bare-LGTM with no verdict still flags shallow_output", () => { - // Also yields missing_verdict (Root-3-owned); assert only shallow_output here. - const manifest = buildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], - result: "Looks fine to me.", - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.looks_shallow, true); - assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true); - assert.equal(manifest.review_quality.failed_review_slot, true); -}); - -test("root2 detector3: terse APPROVE with no concrete finding still flags shallow_output", () => { - const manifest = buildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], - result: "Verdict: APPROVE\nNo blocking findings.", - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.looks_shallow, true); - assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true); - assert.equal(manifest.review_quality.failed_review_slot, true); +test("root2 detector3: terse APPROVE with no concrete finding still flags shallow_output", async () => { + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: "Verdict: APPROVE\nNo blocking findings.", + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, true, `[${name}] looks_shallow should be true`); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true, `[${name}] shallow_output should be present`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true`); + } }); -test("root2 detector3: terse APPROVE with locus but no defect cue still flags shallow_output", () => { - const manifest = buildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "src/cart.js", text: "export const value = 1;\n" }], - result: "Verdict: APPROVE. I looked at src/cart.js handleLogin() and it is fine.", - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.looks_shallow, true); - assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true); - assert.equal(manifest.review_quality.failed_review_slot, true); +test("root2 detector3: terse APPROVE with locus but no defect cue still flags shallow_output", async () => { + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "src/cart.js", text: "export const value = 1;\n" }], + result: "Verdict: APPROVE. I looked at src/cart.js handleLogin() and it is fine.", + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, true, `[${name}] looks_shallow should be true`); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true, `[${name}] shallow_output should be present`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true`); + } }); -test("root2 detector3: defect-flavored words with no code locus still flag shallow_output", () => { - const manifest = buildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], - result: "Verdict: REQUEST CHANGES. Something seems incorrect and should be fixed.", - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.looks_shallow, true); - assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true); - assert.equal(manifest.review_quality.failed_review_slot, true); +test("root2 detector3: defect-flavored words with no code locus still flag shallow_output", async () => { + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: "Verdict: REQUEST CHANGES. Something seems incorrect and should be fixed.", + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, true, `[${name}] looks_shallow should be true`); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true, `[${name}] shallow_output should be present`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true`); + } }); -test("root2 detector3: praise/absence clauses do not count as concrete findings (still flags)", () => { +test("root2 detector3: praise/absence clauses do not count as concrete findings (still flags)", async () => { // ADVERSARIAL GUARD REGRESSION: every cue sits in a negated/praise clause -> // hasConcreteFinding false -> looks_shallow true. - const manifest = buildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "parser.js", text: "export const value = 1;\n" }], - result: "Verdict: APPROVE\nThe parseConfig() function correctly throws on bad input and the schema.json is missing nothing important. Solid work.", - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.looks_shallow, true); - assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true); - assert.equal(manifest.review_quality.failed_review_slot, true); + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "parser.js", text: "export const value = 1;\n" }], + result: "Verdict: APPROVE\nThe parseConfig() function correctly throws on bad input and the schema.json is missing nothing important. Solid work.", + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, true, `[${name}] looks_shallow should be true`); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true, `[${name}] shallow_output should be present`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true`); + } }); -test("root2 detector3: negated-finding variant does not count as concrete (still flags)", () => { +test("root2 detector3: negated-finding variant does not count as concrete (still flags)", async () => { // ADVERSARIAL GUARD REGRESSION: negated-finding clauses -> stays flagged. - const manifest = buildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "app.js", text: "export const value = 1;\n" }], - result: "Verdict: APPROVE. app.js handler() does not handle nothing improperly; there is no off-by-one. Looks good.", - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.looks_shallow, true); - assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true); - assert.equal(manifest.review_quality.failed_review_slot, true); + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "app.js", text: "export const value = 1;\n" }], + result: "Verdict: APPROVE. app.js handler() does not handle nothing improperly; there is no off-by-one. Looks good.", + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, true, `[${name}] looks_shallow should be true`); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true, `[${name}] shallow_output should be present`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true`); + } }); From e7e0224bc3c4991420fa9ddeef4244766342e476 Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 22 Jun 2026 01:57:13 +0900 Subject: [PATCH 03/15] fix(review): bound CONCRETE_FINDING_CODE_LOCUS quantifiers to clear SonarCloud S5852 (#235) The three review-quality concrete-finding locus regexes ran with unbounded */+ quantifiers on attacker-controllable external-review text. SonarCloud flagged the ident-call and ident.member regexes as javascript:S5852 (super-linear regex backtracking). Empirically all three are already linear (the lookbehind anchors prune start positions), but bound every quantifier so linearity is STRUCTURALLY provable and the analyzer no longer fires: path-prefix {0,255}, filename {1,128}, line-number {1,9}, identifier {0,128}, inter-token whitespace {0,16}. Bounding yields a strict SUBSET of the original language, so the Root-2 shallow-output detector still only narrows (fails toward flagging) by construction. Verified independently: 0 divergences on the realistic+boundary corpus, 0 subset violations on adversarial input, hasConcreteFinding identical on full reviews, linear timing to 256k chars (<0.6ms). Re-synced all eight plugin/relay copies (byte-identical). Added a ReDoS-timing guard (200k-char adversarial input under a 2000ms budget) and a long-but-realistic call-locus equivalence anchor, both parametrized over all six review-prompt modules. Co-Authored-By: Claude Opus 4.8 --- .../scripts/lib/review-prompt.mjs | 14 +++- plugins/claude/scripts/lib/review-prompt.mjs | 14 +++- plugins/gemini/scripts/lib/review-prompt.mjs | 14 +++- plugins/grok/scripts/lib/review-prompt.mjs | 14 +++- plugins/kimi/scripts/lib/review-prompt.mjs | 14 +++- .../scripts/lib/review-prompt.mjs | 14 +++- .../relay-grok/scripts/lib/review-prompt.mjs | 14 +++- .../relay-kimi/scripts/lib/review-prompt.mjs | 14 +++- scripts/lib/review-prompt.mjs | 14 +++- tests/unit/review-prompt.test.mjs | 71 +++++++++++++++++++ 10 files changed, 170 insertions(+), 27 deletions(-) diff --git a/plugins/api-reviewers/scripts/lib/review-prompt.mjs b/plugins/api-reviewers/scripts/lib/review-prompt.mjs index 1ea98b42..e5176a3b 100644 --- a/plugins/api-reviewers/scripts/lib/review-prompt.mjs +++ b/plugins/api-reviewers/scripts/lib/review-prompt.mjs @@ -1412,10 +1412,18 @@ const TINY_SOURCE_MAX_LINES = 5; // nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only // asserts correctness stays flagged (conservative — fail toward flagging). const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +// Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): +// these run on adversarial external-review text, so each must be provably linear-time +// (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, +// identifier 128, inter-token whitespace 16) sit far above any real path/identifier, so +// bounding only clips pathological >bound runs — it never changes a match on real review +// text (verified: 0 divergences on the realistic corpus) and the bounded language is a +// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward +// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?bound runs — it never changes a match on real review +// text (verified: 0 divergences on the realistic corpus) and the bounded language is a +// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward +// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?bound runs — it never changes a match on real review +// text (verified: 0 divergences on the realistic corpus) and the bounded language is a +// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward +// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?bound runs — it never changes a match on real review +// text (verified: 0 divergences on the realistic corpus) and the bounded language is a +// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward +// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?bound runs — it never changes a match on real review +// text (verified: 0 divergences on the realistic corpus) and the bounded language is a +// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward +// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?bound runs — it never changes a match on real review +// text (verified: 0 divergences on the realistic corpus) and the bounded language is a +// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward +// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?bound runs — it never changes a match on real review +// text (verified: 0 divergences on the realistic corpus) and the bounded language is a +// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward +// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?bound runs — it never changes a match on real review +// text (verified: 0 divergences on the realistic corpus) and the bounded language is a +// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward +// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?bound runs — it never changes a match on real review +// text (verified: 0 divergences on the realistic corpus) and the bounded language is a +// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward +// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(? { + // CONCRETE_FINDING_CODE_LOCUS runs on adversarial external-review text. After bounding every + // quantifier (the S5852 fix), all three locus regexes must stay linear-time. This input is a + // defect-cue-bearing clause (so hasConcreteFinding evaluates every locus regex) followed by a + // 200k-char pathological run with no terminating dot/paren — forcing each regex to scan to the + // end without matching. A backtracking regression (re-introducing an unbounded *,+) would blow + // this from ~10ms to seconds+; the generous 2000ms budget catches that without CI flake. + const adversarialReview = "Verdict: REQUEST_CHANGES\nthe handler throws " + "a".repeat(200000); + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const start = process.hrtime.bigint(); + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], + result: adversarialReview, + status: "completed", + errorCode: null, + }); + const elapsedMs = Number(process.hrtime.bigint() - start) / 1e6; + assert.ok( + elapsedMs < 2000, + `[${name}] buildReviewAuditManifest took ${elapsedMs.toFixed(1)}ms on a 200k-char adversarial review; ` + + "the bounded locus regexes must be linear (<2000ms). A super-linear regression likely re-introduced an unbounded quantifier.", + ); + // A 200k-char review is long, so it is never "shallow" regardless of locus matching. + assert.equal(manifest.review_quality.looks_shallow, false, `[${name}] a 200k-char review must not be flagged shallow`); + } +}); + +test("root2 detector3: long-but-realistic call loci still escape the shallow flag after bounding", async () => { + // EQUIVALENCE ANCHOR: the identifier bound ({0,128}) sits far above any real identifier, so a + // concise review whose only concrete finding cites a long-but-realistic function call must still + // be recognized (looks_shallow=false). Guards against tightening the bound far enough to clip + // real loci and re-introduce the Root-2 false positive. (Only the call locus is asserted here: + // hasConcreteFinding splits clauses on ".", so path/member loci are evaluated on dot-free clauses + // — that pre-existing reachability gap is tracked separately, not relied on by this fix.) + const cases = [ + { + selected: "src/services/auth/scheduler.js", + result: "Verdict: REQUEST CHANGES. The function validateAndRefreshAuthToken() returns the wrong expiry instead of the computed deadline", + }, + { + selected: "lib/persistence/pool.js", + result: "Request changes: acquireConnectionWithRetry() leaks the socket and the cleanup path swallows the close error", + }, + { + selected: "webhooks.js", + result: "Verdict: REQUEST CHANGES. processIncomingWebhookPayload() drops the signature header instead of validating it first", + }, + ]; + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + for (const { selected, result } of cases) { + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], + result, + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, false, `[${name}] looks_shallow should be false for: ${result}`); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), + false, + `[${name}] shallow_output should be absent for: ${result}`, + ); + assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] failed_review_slot should be false for: ${result}`); + } + } +}); From 070e52ddb7b6ff1d218953efe14c58020a707b7c Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 22 Jun 2026 02:20:21 +0900 Subject: [PATCH 04/15] fix(review): resolve two detector bugs from PR #237 review (multi-dot file token + negation-cue conflict) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two HIGH-priority findings from the gemini-code-assist review of #237, both reproduced through the real buildReviewAuditManifest entry point before fixing. Detector 2 (not_reviewed) — NON_SELECTED_FILE_TOKEN_RE tokenized multi-dot filenames partially (webpack.config.js -> webpack.config), so a basename denial of a directory-prefixed multi-dot SELECTED source was mis-read as a foreign-file gap and suppressed, letting a genuine selected-source denial bypass the gate (unsafe direction). It now carries interior dotted segments whole via a bounded (?:\.seg){0,8} group; every quantifier is upper-bounded so the token scan stays linear-time (no new S5852). The fix flags more (safe direction) and is equivalent to the old regex on single-dot paths (0 token divergences). Detector 3 (shallow_output) — defect cues that legitimately contain negation words (never closed, does not handle, should not) were killed by CONCRETE_FINDING_NEGATION, mis-flagging valid concrete reviews as shallow. hasConcreteFinding now strips the matched defect cue(s) from the clause before the negation check (global strip, replaced with a space to keep word boundaries); genuine negations/absence (no off-by-one, correctly, missing nothing) sit outside the cue and survive, so praise/absence LGTMs still stay flagged. Re-synced across all 8 plugin/relay copies. Added both-direction regression tests (flag + still-suppress / not-shallow + still-shallow), parametrized over all six review-prompt modules. Verified: 414 review-prompt + 442 affected-unit + 696 smoke tests pass; lint:sync clean; copy coverage 97.60% (>= 95.73% baseline). Refs #235 #237 Co-Authored-By: Claude Opus 4.8 --- .../scripts/lib/review-prompt.mjs | 18 ++- plugins/claude/scripts/lib/review-prompt.mjs | 18 ++- plugins/gemini/scripts/lib/review-prompt.mjs | 18 ++- plugins/grok/scripts/lib/review-prompt.mjs | 18 ++- plugins/kimi/scripts/lib/review-prompt.mjs | 18 ++- .../scripts/lib/review-prompt.mjs | 18 ++- .../relay-grok/scripts/lib/review-prompt.mjs | 18 ++- .../relay-kimi/scripts/lib/review-prompt.mjs | 18 ++- scripts/lib/review-prompt.mjs | 18 ++- tests/unit/review-prompt.test.mjs | 127 ++++++++++++++++++ 10 files changed, 271 insertions(+), 18 deletions(-) diff --git a/plugins/api-reviewers/scripts/lib/review-prompt.mjs b/plugins/api-reviewers/scripts/lib/review-prompt.mjs index e5176a3b..dad9d449 100644 --- a/plugins/api-reviewers/scripts/lib/review-prompt.mjs +++ b/plugins/api-reviewers/scripts/lib/review-prompt.mjs @@ -1192,7 +1192,13 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou return namesNonSelectedFileGapLine(lower, selectedSource); } -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename +// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, +// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries +// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED +// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time +// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1426,13 +1432,21 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); + if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); }); } diff --git a/plugins/claude/scripts/lib/review-prompt.mjs b/plugins/claude/scripts/lib/review-prompt.mjs index e5176a3b..dad9d449 100644 --- a/plugins/claude/scripts/lib/review-prompt.mjs +++ b/plugins/claude/scripts/lib/review-prompt.mjs @@ -1192,7 +1192,13 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou return namesNonSelectedFileGapLine(lower, selectedSource); } -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename +// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, +// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries +// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED +// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time +// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1426,13 +1432,21 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); + if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); }); } diff --git a/plugins/gemini/scripts/lib/review-prompt.mjs b/plugins/gemini/scripts/lib/review-prompt.mjs index e5176a3b..dad9d449 100644 --- a/plugins/gemini/scripts/lib/review-prompt.mjs +++ b/plugins/gemini/scripts/lib/review-prompt.mjs @@ -1192,7 +1192,13 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou return namesNonSelectedFileGapLine(lower, selectedSource); } -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename +// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, +// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries +// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED +// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time +// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1426,13 +1432,21 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); + if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); }); } diff --git a/plugins/grok/scripts/lib/review-prompt.mjs b/plugins/grok/scripts/lib/review-prompt.mjs index e5176a3b..dad9d449 100644 --- a/plugins/grok/scripts/lib/review-prompt.mjs +++ b/plugins/grok/scripts/lib/review-prompt.mjs @@ -1192,7 +1192,13 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou return namesNonSelectedFileGapLine(lower, selectedSource); } -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename +// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, +// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries +// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED +// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time +// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1426,13 +1432,21 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); + if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); }); } diff --git a/plugins/kimi/scripts/lib/review-prompt.mjs b/plugins/kimi/scripts/lib/review-prompt.mjs index e5176a3b..dad9d449 100644 --- a/plugins/kimi/scripts/lib/review-prompt.mjs +++ b/plugins/kimi/scripts/lib/review-prompt.mjs @@ -1192,7 +1192,13 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou return namesNonSelectedFileGapLine(lower, selectedSource); } -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename +// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, +// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries +// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED +// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time +// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1426,13 +1432,21 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); + if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); }); } diff --git a/relay/relay-gemini/scripts/lib/review-prompt.mjs b/relay/relay-gemini/scripts/lib/review-prompt.mjs index e5176a3b..dad9d449 100644 --- a/relay/relay-gemini/scripts/lib/review-prompt.mjs +++ b/relay/relay-gemini/scripts/lib/review-prompt.mjs @@ -1192,7 +1192,13 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou return namesNonSelectedFileGapLine(lower, selectedSource); } -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename +// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, +// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries +// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED +// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time +// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1426,13 +1432,21 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); + if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); }); } diff --git a/relay/relay-grok/scripts/lib/review-prompt.mjs b/relay/relay-grok/scripts/lib/review-prompt.mjs index e5176a3b..dad9d449 100644 --- a/relay/relay-grok/scripts/lib/review-prompt.mjs +++ b/relay/relay-grok/scripts/lib/review-prompt.mjs @@ -1192,7 +1192,13 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou return namesNonSelectedFileGapLine(lower, selectedSource); } -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename +// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, +// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries +// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED +// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time +// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1426,13 +1432,21 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); + if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); }); } diff --git a/relay/relay-kimi/scripts/lib/review-prompt.mjs b/relay/relay-kimi/scripts/lib/review-prompt.mjs index e5176a3b..dad9d449 100644 --- a/relay/relay-kimi/scripts/lib/review-prompt.mjs +++ b/relay/relay-kimi/scripts/lib/review-prompt.mjs @@ -1192,7 +1192,13 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou return namesNonSelectedFileGapLine(lower, selectedSource); } -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename +// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, +// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries +// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED +// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time +// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1426,13 +1432,21 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); + if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); }); } diff --git a/scripts/lib/review-prompt.mjs b/scripts/lib/review-prompt.mjs index e5176a3b..dad9d449 100644 --- a/scripts/lib/review-prompt.mjs +++ b/scripts/lib/review-prompt.mjs @@ -1192,7 +1192,13 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou return namesNonSelectedFileGapLine(lower, selectedSource); } -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]+\/)*[a-z0-9_-]+\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename +// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, +// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries +// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED +// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time +// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1426,13 +1432,21 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - if (CONCRETE_FINDING_NEGATION.test(clause)) return false; + const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); + if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); }); } diff --git a/tests/unit/review-prompt.test.mjs b/tests/unit/review-prompt.test.mjs index 37ec08c5..11c6b719 100644 --- a/tests/unit/review-prompt.test.mjs +++ b/tests/unit/review-prompt.test.mjs @@ -4431,3 +4431,130 @@ test("root2 detector3: long-but-realistic call loci still escape the shallow fla } } }); + +test("root2 detector2: multi-dot selected-source basename denial still flags not_reviewed (PR #237 comment 1)", async () => { + // Regression: NON_SELECTED_FILE_TOKEN_RE must tokenize multi-dot filenames whole. Otherwise a + // basename denial of a directory-prefixed multi-dot SELECTED source is mis-read as a foreign-file + // gap and wrongly suppressed, letting a genuine selected-source denial bypass not_reviewed. Both + // directions: the selected-source basename denial MUST flag; a genuine foreign multi-dot gap with + // proven selected inspection MUST stay suppressed (the safe direction is not over-corrected). + const FLAG = [ + { + selected: "config/webpack.config.js", + result: [ + "Verdict: APPROVE", + "Blocking findings", + "- None.", + "Non-blocking concerns", + "- The selected source config/webpack.config.js was fully inspected; the structure is valid.", + "- I could not inspect webpack.config.js override defaults, so that path is a gap.", + ROOT2_PAD, + ].join("\n"), + }, + { + selected: "src/index.test.ts", + result: [ + "Verdict: APPROVE", + "Blocking findings", + "- None.", + "Non-blocking concerns", + "- The selected source src/index.test.ts was fully inspected; the assertions are sound.", + "- I was unable to inspect index.test.ts fixtures, so that remains a gap.", + ROOT2_PAD, + ].join("\n"), + }, + ]; + const SUPPRESS = [ + { + selected: "src/app.js", + result: [ + "Verdict: APPROVE", + "Blocking findings", + "- None.", + "Non-blocking concerns", + "- The selected source src/app.js was fully inspected; the handler is correct.", + "- I could not inspect webpack.config.js, but the diff is internally consistent.", + ROOT2_PAD, + ].join("\n"), + }, + ]; + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + for (const { selected, result } of FLAG) { + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], + result, + status: "completed", + errorCode: null, + }); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), + true, + `[${name}] not_reviewed should be present for multi-dot selected=${selected}`, + ); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true for multi-dot selected=${selected}`); + } + for (const { selected, result } of SUPPRESS) { + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], + result, + status: "completed", + errorCode: null, + }); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), + false, + `[${name}] a genuine foreign multi-dot gap must stay suppressed for selected=${selected}`, + ); + } + } +}); + +test("root2 detector3: negation-bearing defect cues do not mis-flag concrete reviews as shallow (PR #237 comment 2)", async () => { + // Regression: valid defect cues that contain negation words ("never called", "should not", + // "does not free") must not trip CONCRETE_FINDING_NEGATION when they carry a real call locus. + // Guard: a clause that strips to a GENUINE negation/absence must STAY flagged (no over-rescue). + const NOT_SHALLOW = [ + { selected: "socket.js", result: "Verdict: REQUEST CHANGES. The socket close() is never called on the error path" }, + { selected: "validator.js", result: "Verdict: REQUEST CHANGES. validateInput() should not return early on empty arrays" }, + { selected: "pool.js", result: "Verdict: REQUEST CHANGES. acquire() does not free the slot when the request times out" }, + ]; + const STILL_SHALLOW = [ + { selected: "socket.js", result: "Verdict: APPROVE. close() is never called but that is no real problem here" }, + { selected: "parser.js", result: "Verdict: APPROVE\nThe parseConfig() function correctly throws on bad input and the schema is missing nothing important." }, + ]; + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + for (const { selected, result } of NOT_SHALLOW) { + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], + result, + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, false, `[${name}] looks_shallow should be false for: ${result}`); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), + false, + `[${name}] shallow_output should be absent for: ${result}`, + ); + } + for (const { selected, result } of STILL_SHALLOW) { + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], + result, + status: "completed", + errorCode: null, + }); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), + true, + `[${name}] shallow_output must STAY present (genuine negation/absence) for: ${result}`, + ); + } + } +}); From a2a7be1fb7224604b6b4649d276a64fdeb6b04a3 Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 22 Jun 2026 05:04:06 +0900 Subject: [PATCH 05/15] fix(review): pattern-level hardening of shallow_output + permission_blocked detectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three external reviewers (Kimi, Claude, GPT) found unsafe false-negatives this PR introduced; an adversarial sweep then showed an enumeration patch just moved the error around. This replaces it with structural predicates and accepts an explicit, tracked residual (#238) for the surface-undecidable cases. shallow_output (hasConcreteFinding): drop the fragile cue-strip. A clause is a concrete finding only with cue + locus + NOT a confirmation (CONCRETE_FINDING_ PRAISE: ' ... as expected/promised') + NOT a dismissal (CONCRETE_FINDING_ DISMISSAL: a negation BOUND to a defect noun within 2 words, or correctness- praise / LGTM — not a bare 'no'/'never' that merely appears in the finding). An adversative-tail override keeps a real defect after 'but/yet/whereas' from being suppressed. Fixes the 'should not ' / 'as expected' leaks AND the 'no bounds check' / 'clean teardown' / 'none of the keys' false-positives. permission_blocked (reviewProcessBlockedSignal): normalize unicode apostrophes; add a first-person-anchored block regex (i/we + no-inspection cue + inspection verb/source-object, one sentence, bounded for linear time); REMOVE the bare artifact nouns ('source file'/'selected source'/...) and non-first-person 'read the X' phrases that fired on third-person code praise. Perception verbs see/saw/ view are excluded ('i did not see any issues' is praise, not a block). Fixes the contraction/paraphrase/unicode reviewer FNs AND the 8 bare-noun false-positives ('I inspected the selected source ...'), and clears pre-existing main FPs. Verified: oracle 0 decidable fails; 416/416 review-prompt + 179 affected-unit; lint:sync clean; copy coverage 97.65%; two adversarial sweeps (shallow leaks 24->9, FPs 15->4). Residual (surface-undecidable hedges/3rd-person blocks) and the PRE-EXISTING structural no-literal raising-detector gap are tracked for the Way-2 redesign (#236/#238), not patched by enumeration. Refs #235 #237 Co-Authored-By: Claude Opus 4.8 --- .../scripts/lib/review-prompt.mjs | 103 ++++++++++++------ plugins/claude/scripts/lib/review-prompt.mjs | 103 ++++++++++++------ plugins/gemini/scripts/lib/review-prompt.mjs | 103 ++++++++++++------ plugins/grok/scripts/lib/review-prompt.mjs | 103 ++++++++++++------ plugins/kimi/scripts/lib/review-prompt.mjs | 103 ++++++++++++------ .../scripts/lib/review-prompt.mjs | 103 ++++++++++++------ .../relay-grok/scripts/lib/review-prompt.mjs | 103 ++++++++++++------ .../relay-kimi/scripts/lib/review-prompt.mjs | 103 ++++++++++++------ scripts/lib/review-prompt.mjs | 103 ++++++++++++------ tests/unit/review-prompt.test.mjs | 77 +++++++++++++ 10 files changed, 725 insertions(+), 279 deletions(-) diff --git a/plugins/api-reviewers/scripts/lib/review-prompt.mjs b/plugins/api-reviewers/scripts/lib/review-prompt.mjs index dad9d449..ec8a52e4 100644 --- a/plugins/api-reviewers/scripts/lib/review-prompt.mjs +++ b/plugins/api-reviewers/scripts/lib/review-prompt.mjs @@ -752,7 +752,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // unable to read/inspect/access the source. Mirrors the genuine-block surface // used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase // so a real read-denial is never masked by incidental handling-praise wording. -function reviewProcessBlockedSignal(lower) { +// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one +// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the +// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken +// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs +// emit ("couldn't") match the same as straight ones. +// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ +// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / +// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block +// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." +// object alternative below. +const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +function reviewProcessBlockedSignal(lowerRaw) { + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -785,25 +798,36 @@ function reviewProcessBlockedSignal(lower) { "i cannot read", "i can't read", "i was unable to read", - "could not read the source", - "cannot read the source", - "could not read the file", - "cannot read the file", - "could not read the selected", - "cannot read the selected", - "could not read it", - "cannot read it", - "selected source", - "selected file", - "selected files", - "supplied source", - "supplied diff", - "supplied file", - "supplied files", - "source file", - "source files", - "target file", - "target files", + // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the + // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the + // source file"). A first-person read-denial is caught by the regex above or "i could not read". + // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: + // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader + // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine + // block names the artifact with one of the explicit denial verbs above or via the first-person + // regex; a bare noun is not, by itself, a review-process-block signal. + "only reviewed the diff", + "only read the diff", + "only saw the diff", + "only had the diff", + "based only on the diff", + "from the diff alone", + "diff summary", + "denied opening", + "denied reading", + "denied access to", + "review attempt hit", + "review attempt failed", + "review attempt was blocked", + "approving without reading", + "approve without reading", + "approved without reading", + "approving without inspecting", + "approved without inspecting", + "without actually reading", + "without actually inspecting", + "without ever reading", + "without ever inspecting", ]); } @@ -1431,23 +1455,40 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); - if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; - return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + // Contrast override: when the clause's praise/dismissal head is followed by an adversative + // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the + // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), + // so the head marker must not suppress it. + const adv = clause.match(CONCRETE_FINDING_CONTRAST); + if (adv) { + const tail = clause.slice(adv.index + adv[0].length); + if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { + return true; + } + } + if (CONCRETE_FINDING_PRAISE.test(clause)) return false; + if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; + return true; }); } diff --git a/plugins/claude/scripts/lib/review-prompt.mjs b/plugins/claude/scripts/lib/review-prompt.mjs index dad9d449..ec8a52e4 100644 --- a/plugins/claude/scripts/lib/review-prompt.mjs +++ b/plugins/claude/scripts/lib/review-prompt.mjs @@ -752,7 +752,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // unable to read/inspect/access the source. Mirrors the genuine-block surface // used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase // so a real read-denial is never masked by incidental handling-praise wording. -function reviewProcessBlockedSignal(lower) { +// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one +// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the +// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken +// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs +// emit ("couldn't") match the same as straight ones. +// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ +// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / +// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block +// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." +// object alternative below. +const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +function reviewProcessBlockedSignal(lowerRaw) { + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -785,25 +798,36 @@ function reviewProcessBlockedSignal(lower) { "i cannot read", "i can't read", "i was unable to read", - "could not read the source", - "cannot read the source", - "could not read the file", - "cannot read the file", - "could not read the selected", - "cannot read the selected", - "could not read it", - "cannot read it", - "selected source", - "selected file", - "selected files", - "supplied source", - "supplied diff", - "supplied file", - "supplied files", - "source file", - "source files", - "target file", - "target files", + // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the + // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the + // source file"). A first-person read-denial is caught by the regex above or "i could not read". + // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: + // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader + // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine + // block names the artifact with one of the explicit denial verbs above or via the first-person + // regex; a bare noun is not, by itself, a review-process-block signal. + "only reviewed the diff", + "only read the diff", + "only saw the diff", + "only had the diff", + "based only on the diff", + "from the diff alone", + "diff summary", + "denied opening", + "denied reading", + "denied access to", + "review attempt hit", + "review attempt failed", + "review attempt was blocked", + "approving without reading", + "approve without reading", + "approved without reading", + "approving without inspecting", + "approved without inspecting", + "without actually reading", + "without actually inspecting", + "without ever reading", + "without ever inspecting", ]); } @@ -1431,23 +1455,40 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); - if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; - return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + // Contrast override: when the clause's praise/dismissal head is followed by an adversative + // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the + // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), + // so the head marker must not suppress it. + const adv = clause.match(CONCRETE_FINDING_CONTRAST); + if (adv) { + const tail = clause.slice(adv.index + adv[0].length); + if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { + return true; + } + } + if (CONCRETE_FINDING_PRAISE.test(clause)) return false; + if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; + return true; }); } diff --git a/plugins/gemini/scripts/lib/review-prompt.mjs b/plugins/gemini/scripts/lib/review-prompt.mjs index dad9d449..ec8a52e4 100644 --- a/plugins/gemini/scripts/lib/review-prompt.mjs +++ b/plugins/gemini/scripts/lib/review-prompt.mjs @@ -752,7 +752,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // unable to read/inspect/access the source. Mirrors the genuine-block surface // used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase // so a real read-denial is never masked by incidental handling-praise wording. -function reviewProcessBlockedSignal(lower) { +// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one +// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the +// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken +// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs +// emit ("couldn't") match the same as straight ones. +// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ +// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / +// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block +// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." +// object alternative below. +const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +function reviewProcessBlockedSignal(lowerRaw) { + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -785,25 +798,36 @@ function reviewProcessBlockedSignal(lower) { "i cannot read", "i can't read", "i was unable to read", - "could not read the source", - "cannot read the source", - "could not read the file", - "cannot read the file", - "could not read the selected", - "cannot read the selected", - "could not read it", - "cannot read it", - "selected source", - "selected file", - "selected files", - "supplied source", - "supplied diff", - "supplied file", - "supplied files", - "source file", - "source files", - "target file", - "target files", + // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the + // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the + // source file"). A first-person read-denial is caught by the regex above or "i could not read". + // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: + // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader + // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine + // block names the artifact with one of the explicit denial verbs above or via the first-person + // regex; a bare noun is not, by itself, a review-process-block signal. + "only reviewed the diff", + "only read the diff", + "only saw the diff", + "only had the diff", + "based only on the diff", + "from the diff alone", + "diff summary", + "denied opening", + "denied reading", + "denied access to", + "review attempt hit", + "review attempt failed", + "review attempt was blocked", + "approving without reading", + "approve without reading", + "approved without reading", + "approving without inspecting", + "approved without inspecting", + "without actually reading", + "without actually inspecting", + "without ever reading", + "without ever inspecting", ]); } @@ -1431,23 +1455,40 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); - if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; - return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + // Contrast override: when the clause's praise/dismissal head is followed by an adversative + // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the + // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), + // so the head marker must not suppress it. + const adv = clause.match(CONCRETE_FINDING_CONTRAST); + if (adv) { + const tail = clause.slice(adv.index + adv[0].length); + if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { + return true; + } + } + if (CONCRETE_FINDING_PRAISE.test(clause)) return false; + if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; + return true; }); } diff --git a/plugins/grok/scripts/lib/review-prompt.mjs b/plugins/grok/scripts/lib/review-prompt.mjs index dad9d449..ec8a52e4 100644 --- a/plugins/grok/scripts/lib/review-prompt.mjs +++ b/plugins/grok/scripts/lib/review-prompt.mjs @@ -752,7 +752,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // unable to read/inspect/access the source. Mirrors the genuine-block surface // used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase // so a real read-denial is never masked by incidental handling-praise wording. -function reviewProcessBlockedSignal(lower) { +// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one +// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the +// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken +// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs +// emit ("couldn't") match the same as straight ones. +// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ +// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / +// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block +// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." +// object alternative below. +const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +function reviewProcessBlockedSignal(lowerRaw) { + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -785,25 +798,36 @@ function reviewProcessBlockedSignal(lower) { "i cannot read", "i can't read", "i was unable to read", - "could not read the source", - "cannot read the source", - "could not read the file", - "cannot read the file", - "could not read the selected", - "cannot read the selected", - "could not read it", - "cannot read it", - "selected source", - "selected file", - "selected files", - "supplied source", - "supplied diff", - "supplied file", - "supplied files", - "source file", - "source files", - "target file", - "target files", + // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the + // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the + // source file"). A first-person read-denial is caught by the regex above or "i could not read". + // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: + // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader + // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine + // block names the artifact with one of the explicit denial verbs above or via the first-person + // regex; a bare noun is not, by itself, a review-process-block signal. + "only reviewed the diff", + "only read the diff", + "only saw the diff", + "only had the diff", + "based only on the diff", + "from the diff alone", + "diff summary", + "denied opening", + "denied reading", + "denied access to", + "review attempt hit", + "review attempt failed", + "review attempt was blocked", + "approving without reading", + "approve without reading", + "approved without reading", + "approving without inspecting", + "approved without inspecting", + "without actually reading", + "without actually inspecting", + "without ever reading", + "without ever inspecting", ]); } @@ -1431,23 +1455,40 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); - if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; - return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + // Contrast override: when the clause's praise/dismissal head is followed by an adversative + // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the + // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), + // so the head marker must not suppress it. + const adv = clause.match(CONCRETE_FINDING_CONTRAST); + if (adv) { + const tail = clause.slice(adv.index + adv[0].length); + if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { + return true; + } + } + if (CONCRETE_FINDING_PRAISE.test(clause)) return false; + if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; + return true; }); } diff --git a/plugins/kimi/scripts/lib/review-prompt.mjs b/plugins/kimi/scripts/lib/review-prompt.mjs index dad9d449..ec8a52e4 100644 --- a/plugins/kimi/scripts/lib/review-prompt.mjs +++ b/plugins/kimi/scripts/lib/review-prompt.mjs @@ -752,7 +752,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // unable to read/inspect/access the source. Mirrors the genuine-block surface // used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase // so a real read-denial is never masked by incidental handling-praise wording. -function reviewProcessBlockedSignal(lower) { +// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one +// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the +// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken +// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs +// emit ("couldn't") match the same as straight ones. +// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ +// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / +// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block +// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." +// object alternative below. +const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +function reviewProcessBlockedSignal(lowerRaw) { + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -785,25 +798,36 @@ function reviewProcessBlockedSignal(lower) { "i cannot read", "i can't read", "i was unable to read", - "could not read the source", - "cannot read the source", - "could not read the file", - "cannot read the file", - "could not read the selected", - "cannot read the selected", - "could not read it", - "cannot read it", - "selected source", - "selected file", - "selected files", - "supplied source", - "supplied diff", - "supplied file", - "supplied files", - "source file", - "source files", - "target file", - "target files", + // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the + // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the + // source file"). A first-person read-denial is caught by the regex above or "i could not read". + // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: + // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader + // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine + // block names the artifact with one of the explicit denial verbs above or via the first-person + // regex; a bare noun is not, by itself, a review-process-block signal. + "only reviewed the diff", + "only read the diff", + "only saw the diff", + "only had the diff", + "based only on the diff", + "from the diff alone", + "diff summary", + "denied opening", + "denied reading", + "denied access to", + "review attempt hit", + "review attempt failed", + "review attempt was blocked", + "approving without reading", + "approve without reading", + "approved without reading", + "approving without inspecting", + "approved without inspecting", + "without actually reading", + "without actually inspecting", + "without ever reading", + "without ever inspecting", ]); } @@ -1431,23 +1455,40 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); - if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; - return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + // Contrast override: when the clause's praise/dismissal head is followed by an adversative + // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the + // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), + // so the head marker must not suppress it. + const adv = clause.match(CONCRETE_FINDING_CONTRAST); + if (adv) { + const tail = clause.slice(adv.index + adv[0].length); + if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { + return true; + } + } + if (CONCRETE_FINDING_PRAISE.test(clause)) return false; + if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; + return true; }); } diff --git a/relay/relay-gemini/scripts/lib/review-prompt.mjs b/relay/relay-gemini/scripts/lib/review-prompt.mjs index dad9d449..ec8a52e4 100644 --- a/relay/relay-gemini/scripts/lib/review-prompt.mjs +++ b/relay/relay-gemini/scripts/lib/review-prompt.mjs @@ -752,7 +752,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // unable to read/inspect/access the source. Mirrors the genuine-block surface // used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase // so a real read-denial is never masked by incidental handling-praise wording. -function reviewProcessBlockedSignal(lower) { +// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one +// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the +// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken +// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs +// emit ("couldn't") match the same as straight ones. +// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ +// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / +// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block +// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." +// object alternative below. +const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +function reviewProcessBlockedSignal(lowerRaw) { + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -785,25 +798,36 @@ function reviewProcessBlockedSignal(lower) { "i cannot read", "i can't read", "i was unable to read", - "could not read the source", - "cannot read the source", - "could not read the file", - "cannot read the file", - "could not read the selected", - "cannot read the selected", - "could not read it", - "cannot read it", - "selected source", - "selected file", - "selected files", - "supplied source", - "supplied diff", - "supplied file", - "supplied files", - "source file", - "source files", - "target file", - "target files", + // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the + // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the + // source file"). A first-person read-denial is caught by the regex above or "i could not read". + // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: + // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader + // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine + // block names the artifact with one of the explicit denial verbs above or via the first-person + // regex; a bare noun is not, by itself, a review-process-block signal. + "only reviewed the diff", + "only read the diff", + "only saw the diff", + "only had the diff", + "based only on the diff", + "from the diff alone", + "diff summary", + "denied opening", + "denied reading", + "denied access to", + "review attempt hit", + "review attempt failed", + "review attempt was blocked", + "approving without reading", + "approve without reading", + "approved without reading", + "approving without inspecting", + "approved without inspecting", + "without actually reading", + "without actually inspecting", + "without ever reading", + "without ever inspecting", ]); } @@ -1431,23 +1455,40 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); - if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; - return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + // Contrast override: when the clause's praise/dismissal head is followed by an adversative + // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the + // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), + // so the head marker must not suppress it. + const adv = clause.match(CONCRETE_FINDING_CONTRAST); + if (adv) { + const tail = clause.slice(adv.index + adv[0].length); + if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { + return true; + } + } + if (CONCRETE_FINDING_PRAISE.test(clause)) return false; + if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; + return true; }); } diff --git a/relay/relay-grok/scripts/lib/review-prompt.mjs b/relay/relay-grok/scripts/lib/review-prompt.mjs index dad9d449..ec8a52e4 100644 --- a/relay/relay-grok/scripts/lib/review-prompt.mjs +++ b/relay/relay-grok/scripts/lib/review-prompt.mjs @@ -752,7 +752,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // unable to read/inspect/access the source. Mirrors the genuine-block surface // used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase // so a real read-denial is never masked by incidental handling-praise wording. -function reviewProcessBlockedSignal(lower) { +// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one +// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the +// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken +// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs +// emit ("couldn't") match the same as straight ones. +// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ +// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / +// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block +// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." +// object alternative below. +const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +function reviewProcessBlockedSignal(lowerRaw) { + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -785,25 +798,36 @@ function reviewProcessBlockedSignal(lower) { "i cannot read", "i can't read", "i was unable to read", - "could not read the source", - "cannot read the source", - "could not read the file", - "cannot read the file", - "could not read the selected", - "cannot read the selected", - "could not read it", - "cannot read it", - "selected source", - "selected file", - "selected files", - "supplied source", - "supplied diff", - "supplied file", - "supplied files", - "source file", - "source files", - "target file", - "target files", + // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the + // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the + // source file"). A first-person read-denial is caught by the regex above or "i could not read". + // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: + // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader + // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine + // block names the artifact with one of the explicit denial verbs above or via the first-person + // regex; a bare noun is not, by itself, a review-process-block signal. + "only reviewed the diff", + "only read the diff", + "only saw the diff", + "only had the diff", + "based only on the diff", + "from the diff alone", + "diff summary", + "denied opening", + "denied reading", + "denied access to", + "review attempt hit", + "review attempt failed", + "review attempt was blocked", + "approving without reading", + "approve without reading", + "approved without reading", + "approving without inspecting", + "approved without inspecting", + "without actually reading", + "without actually inspecting", + "without ever reading", + "without ever inspecting", ]); } @@ -1431,23 +1455,40 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); - if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; - return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + // Contrast override: when the clause's praise/dismissal head is followed by an adversative + // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the + // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), + // so the head marker must not suppress it. + const adv = clause.match(CONCRETE_FINDING_CONTRAST); + if (adv) { + const tail = clause.slice(adv.index + adv[0].length); + if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { + return true; + } + } + if (CONCRETE_FINDING_PRAISE.test(clause)) return false; + if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; + return true; }); } diff --git a/relay/relay-kimi/scripts/lib/review-prompt.mjs b/relay/relay-kimi/scripts/lib/review-prompt.mjs index dad9d449..ec8a52e4 100644 --- a/relay/relay-kimi/scripts/lib/review-prompt.mjs +++ b/relay/relay-kimi/scripts/lib/review-prompt.mjs @@ -752,7 +752,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // unable to read/inspect/access the source. Mirrors the genuine-block surface // used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase // so a real read-denial is never masked by incidental handling-praise wording. -function reviewProcessBlockedSignal(lower) { +// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one +// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the +// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken +// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs +// emit ("couldn't") match the same as straight ones. +// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ +// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / +// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block +// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." +// object alternative below. +const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +function reviewProcessBlockedSignal(lowerRaw) { + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -785,25 +798,36 @@ function reviewProcessBlockedSignal(lower) { "i cannot read", "i can't read", "i was unable to read", - "could not read the source", - "cannot read the source", - "could not read the file", - "cannot read the file", - "could not read the selected", - "cannot read the selected", - "could not read it", - "cannot read it", - "selected source", - "selected file", - "selected files", - "supplied source", - "supplied diff", - "supplied file", - "supplied files", - "source file", - "source files", - "target file", - "target files", + // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the + // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the + // source file"). A first-person read-denial is caught by the regex above or "i could not read". + // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: + // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader + // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine + // block names the artifact with one of the explicit denial verbs above or via the first-person + // regex; a bare noun is not, by itself, a review-process-block signal. + "only reviewed the diff", + "only read the diff", + "only saw the diff", + "only had the diff", + "based only on the diff", + "from the diff alone", + "diff summary", + "denied opening", + "denied reading", + "denied access to", + "review attempt hit", + "review attempt failed", + "review attempt was blocked", + "approving without reading", + "approve without reading", + "approved without reading", + "approving without inspecting", + "approved without inspecting", + "without actually reading", + "without actually inspecting", + "without ever reading", + "without ever inspecting", ]); } @@ -1431,23 +1455,40 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); - if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; - return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + // Contrast override: when the clause's praise/dismissal head is followed by an adversative + // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the + // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), + // so the head marker must not suppress it. + const adv = clause.match(CONCRETE_FINDING_CONTRAST); + if (adv) { + const tail = clause.slice(adv.index + adv[0].length); + if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { + return true; + } + } + if (CONCRETE_FINDING_PRAISE.test(clause)) return false; + if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; + return true; }); } diff --git a/scripts/lib/review-prompt.mjs b/scripts/lib/review-prompt.mjs index dad9d449..ec8a52e4 100644 --- a/scripts/lib/review-prompt.mjs +++ b/scripts/lib/review-prompt.mjs @@ -752,7 +752,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // unable to read/inspect/access the source. Mirrors the genuine-block surface // used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase // so a real read-denial is never masked by incidental handling-praise wording. -function reviewProcessBlockedSignal(lower) { +// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one +// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the +// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken +// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs +// emit ("couldn't") match the same as straight ones. +// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ +// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / +// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block +// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." +// object alternative below. +const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +function reviewProcessBlockedSignal(lowerRaw) { + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -785,25 +798,36 @@ function reviewProcessBlockedSignal(lower) { "i cannot read", "i can't read", "i was unable to read", - "could not read the source", - "cannot read the source", - "could not read the file", - "cannot read the file", - "could not read the selected", - "cannot read the selected", - "could not read it", - "cannot read it", - "selected source", - "selected file", - "selected files", - "supplied source", - "supplied diff", - "supplied file", - "supplied files", - "source file", - "source files", - "target file", - "target files", + // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the + // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the + // source file"). A first-person read-denial is caught by the regex above or "i could not read". + // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: + // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader + // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine + // block names the artifact with one of the explicit denial verbs above or via the first-person + // regex; a bare noun is not, by itself, a review-process-block signal. + "only reviewed the diff", + "only read the diff", + "only saw the diff", + "only had the diff", + "based only on the diff", + "from the diff alone", + "diff summary", + "denied opening", + "denied reading", + "denied access to", + "review attempt hit", + "review attempt failed", + "review attempt was blocked", + "approving without reading", + "approve without reading", + "approved without reading", + "approving without inspecting", + "approved without inspecting", + "without actually reading", + "without actually inspecting", + "without ever reading", + "without ever inspecting", ]); } @@ -1431,23 +1455,40 @@ const CONCRETE_FINDING_CODE_LOCUS = [ /(? { if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; - const withoutCue = clause.replace(CONCRETE_FINDING_DEFECT_CUE_GLOBAL, " "); - if (CONCRETE_FINDING_NEGATION.test(withoutCue)) return false; - return CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause)); + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + // Contrast override: when the clause's praise/dismissal head is followed by an adversative + // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the + // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), + // so the head marker must not suppress it. + const adv = clause.match(CONCRETE_FINDING_CONTRAST); + if (adv) { + const tail = clause.slice(adv.index + adv[0].length); + if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { + return true; + } + } + if (CONCRETE_FINDING_PRAISE.test(clause)) return false; + if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; + return true; }); } diff --git a/tests/unit/review-prompt.test.mjs b/tests/unit/review-prompt.test.mjs index 11c6b719..ce3261f4 100644 --- a/tests/unit/review-prompt.test.mjs +++ b/tests/unit/review-prompt.test.mjs @@ -4558,3 +4558,80 @@ test("root2 detector3: negation-bearing defect cues do not mis-flag concrete rev } } }); + +// Pad past the 500-char shallow threshold so permission_blocked is evaluated independently. +const PERM_PAD = [ + "The structured sections below follow the reviewer contract exactly and in full.", + "Base and head refs were confirmed against the supplied metadata and the diff stat here.", + "The summary, blocking findings, and non-blocking concerns are kept distinct throughout this review.", + "This narrative padding exists only to exercise the non-shallow code path and exceed the threshold.", + "Every structured section is present and the verdict line is unambiguous and well formed in this body.", +].join("\n"); + +test("root2 detector1: reviewer-process blocks (contraction/paraphrase/unicode) still flag permission_blocked", async () => { + // Reviewer-found false-negatives (Kimi/GPT). Each carries a permission literal AND a genuine + // first-person read-block; handling-praise must NOT suppress it. (No-literal blocks are a + // separate, pre-existing raising-detector gap tracked in #238, not asserted here.) + const FLAG = [ + "Verdict: APPROVE\nThe code correctly handles EACCES. I couldn't inspect the actual file due to the error.", + "Verdict: APPROVE\nThe code correctly handles permission denied. I wasn't able to inspect the source.", + "Verdict: APPROVE\nThe code correctly handles EACCES. I never saw the source.", + "Verdict: APPROVE\nThe code correctly handles EACCES. I only reviewed the diff summary.", + "Verdict: APPROVE\nThe code handles EACCES gracefully, though I couldn’t actually open the file to confirm it.", + "Verdict: APPROVE\nPermission denied handling falls back well, yet I lacked access to verify against the real file.", + "Verdict: APPROVE\nThe review attempt hit EACCES permission denied opening src/auth.js; the code correctly handles EACCES by falling back.", + ]; + // Pure code-handling praise that names the artifact / observes no findings must STAY suppressed. + const CLEAN = [ + "Verdict: APPROVE\nThe code correctly handles EACCES by falling back to defaults, which is the right behavior.", + "Verdict: APPROVE\nThe loader correctly handles EACCES when it cannot read the source file, returning a clear typed error.", + "Verdict: APPROVE\nI inspected the selected source and confirmed the code correctly handles EACCES on the config read.", + "Verdict: APPROVE\nI did not see any issues; EACCES is handled correctly by the loader and falls back cleanly.", + ]; + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: target } = await loadReviewPromptModule(file); + for (const body of FLAG) { + const m = target({ prompt: "p", sourceFiles: [{ path: "src/auth.js", text: "export const value = 1;\n" }], result: `${body}\n${PERM_PAD}`, status: "completed", errorCode: null }); + assert.equal(m.review_quality.semantic_failure_reasons.includes("permission_blocked"), true, `[${name}] permission_blocked should be present for: ${body}`); + assert.equal(m.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true for: ${body}`); + } + for (const body of CLEAN) { + const m = target({ prompt: "p", sourceFiles: [{ path: "src/config-loader.js", text: "export const value = 1;\n" }], result: `${body}\n${PERM_PAD}`, status: "completed", errorCode: null }); + assert.equal(m.review_quality.failed_review_slot, false, `[${name}] code-handling praise must stay clean for: ${body}`); + } + } +}); + +test("root2 detector3: praise/confirmation reusing defect vocabulary flags shallow_output; real findings stay clean", async () => { + // Reviewer/sweep-found false-negatives (decidable subset): "should not ", + // " as expected/promised", LGTM. Surface-undecidable residual ("throws sensibly", + // hedges) is tracked in #238 for Way-2 advisory disposition, not asserted here. + const FLAG = [ + "Verdict: APPROVE\nfoo() should not be a problem", + "Verdict: APPROVE\nparseConfig() should not cause issues", + "Verdict: APPROVE\ncache.get() should not regress", + "Verdict: APPROVE. parseConfig() throws on bad input as expected.", + "Verdict: APPROVE\nThe close() handler throws as promised on bad input.", + ]; + // Genuine concise findings — including the negation-strip-leftover FP cases — must STAY clean. + const CLEAN = [ + "Verdict: REQUEST CHANGES. socket close() is never called", + "Verdict: REQUEST CHANGES. acquire() does not free the slot", + "Verdict: REQUEST CHANGES. validateInput() should not return early", + "Verdict: REQUEST CHANGES. parseInt() returns the wrong index", + "Verdict: REQUEST_CHANGES\nindexInto() returns the wrong value because no bounds check guards the array access.", + "Verdict: REQUEST_CHANGES\nlookup() returns the wrong index when none of the keys match, instead of throwing.", + "Verdict: REQUEST_CHANGES\nthe happy path of encode() works as expected, but the empty-input branch throws and crashes.", + ]; + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: target } = await loadReviewPromptModule(file); + for (const result of FLAG) { + const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: "export const value = 1;\n" }], result, status: "completed", errorCode: null }); + assert.equal(m.review_quality.semantic_failure_reasons.includes("shallow_output"), true, `[${name}] shallow_output should be present for: ${result}`); + } + for (const result of CLEAN) { + const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: "export const value = 1;\n" }], result, status: "completed", errorCode: null }); + assert.equal(m.review_quality.looks_shallow, false, `[${name}] real finding must stay clean for: ${result}`); + } + } +}); From a2d77aa383185f3bdad4312abd0bb23246e29426 Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 22 Jun 2026 11:43:58 +0900 Subject: [PATCH 06/15] refactor(review): split detector regexes under SonarCloud complexity cap (behavior-identical) The a2a7be1 redesign tripped SonarCloud new_maintainability_rating=C: four regexes over the S5843 complexity-20 cap (DISMISSAL 215, block 96, DEFECT_CUE 51, PRAISE 43) plus minor S6397/S6535/S6594. Decompose each mega-regex into small sub-patterns behind a helper, moving fixed phrases to includesAny string lists: - hasConcreteFinding: CONCRETE_FINDING_DISMISSAL -> clauseIsDismissal() over six small regexes (negated-defect / should-not / nothing / absence / correctly / looks) + an LGTM includesAny list; PRAISE simplified to bare 'as ' (the cue is already required); contrast match -> firstContrastIndex() via indexOf (drops .match, fixes S6594). - CONCRETE_FINDING_DEFECT_CUE -> hasDefectCue() over three small sub-patterns. - reviewProcessBlockedSignal first-person regex -> REVIEWER_BLOCK_CUE + REVIEWER_BLOCK_TARGET (positional split via reviewerFirstPersonBlock()). - S6397: [\w]{0,4}->\w{0,4}, examine[d]?->examine(?:...)? ; S6535: drop \[ escape. Behavior-identical: oracle 0 decidable-fails, 416/416 review-prompt + 103 affected-unit, lint:sync clean, ReDoS still linear (17ms @200k). No test changes. Refs #235 #237 Co-Authored-By: Claude Opus 4.8 --- .../scripts/lib/review-prompt.mjs | 81 ++++++++++++++----- plugins/claude/scripts/lib/review-prompt.mjs | 81 ++++++++++++++----- plugins/gemini/scripts/lib/review-prompt.mjs | 81 ++++++++++++++----- plugins/grok/scripts/lib/review-prompt.mjs | 81 ++++++++++++++----- plugins/kimi/scripts/lib/review-prompt.mjs | 81 ++++++++++++++----- .../scripts/lib/review-prompt.mjs | 81 ++++++++++++++----- .../relay-grok/scripts/lib/review-prompt.mjs | 81 ++++++++++++++----- .../relay-kimi/scripts/lib/review-prompt.mjs | 81 ++++++++++++++----- scripts/lib/review-prompt.mjs | 81 ++++++++++++++----- 9 files changed, 567 insertions(+), 162 deletions(-) diff --git a/plugins/api-reviewers/scripts/lib/review-prompt.mjs b/plugins/api-reviewers/scripts/lib/review-prompt.mjs index ec8a52e4..ab4196a7 100644 --- a/plugins/api-reviewers/scripts/lib/review-prompt.mjs +++ b/plugins/api-reviewers/scripts/lib/review-prompt.mjs @@ -762,10 +762,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block // names the artifact ("i never saw the source"), which still matches via the "the source/file/..." // object alternative below. -const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a +// source object within the next ~48 chars) so each regex stays well under the complexity cap while +// preserving the positional "i/we ... cue ... target, one sentence" semantics. +const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +function reviewerFirstPersonBlock(lower) { + const m = REVIEWER_BLOCK_CUE.exec(lower); + if (!m) return false; + const start = m.index + m[0].length; + return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); +} function reviewProcessBlockedSignal(lowerRaw) { const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); - if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; + if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -1222,7 +1232,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou // the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED // (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time // (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1441,7 +1451,14 @@ const TINY_SOURCE_MAX_LINES = 5; // seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing // nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only // asserts correctness stays flagged (conservative — fail toward flagging). -const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue +// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. +const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; +const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; +const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +function hasDefectCue(clause) { + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); +} // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time // (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, @@ -1451,7 +1468,7 @@ const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call| // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark +// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under +// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a +// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", +// "none of the keys") does not suppress it. +const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; +const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; +const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; +const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; + +function clauseIsDismissal(clause) { + return DISMISSAL_NEGATED_DEFECT.test(clause) + || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_NOTHING.test(clause) + || DISMISSAL_ABSENCE.test(clause) + || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_LOOKS.test(clause) + || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); +} +function clauseIsPraiseOrDismissal(clause) { + return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); +} +function firstContrastIndex(lowerClause) { + let best = -1; + for (const word of CONTRAST_WORDS) { + const at = lowerClause.indexOf(word); + if (at !== -1 && (best === -1 || at < best)) best = at; + } + return best; +} function hasConcreteFinding(text) { const value = String(text ?? ""); const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { - if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (!hasDefectCue(clause)) return false; if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), // so the head marker must not suppress it. - const adv = clause.match(CONCRETE_FINDING_CONTRAST); - if (adv) { - const tail = clause.slice(adv.index + adv[0].length); - if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { - return true; - } + const advIdx = firstContrastIndex(clause.toLowerCase()); + if (advIdx !== -1) { + const tail = clause.slice(advIdx); + if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; } - if (CONCRETE_FINDING_PRAISE.test(clause)) return false; - if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; - return true; + return !clauseIsPraiseOrDismissal(clause); }); } diff --git a/plugins/claude/scripts/lib/review-prompt.mjs b/plugins/claude/scripts/lib/review-prompt.mjs index ec8a52e4..ab4196a7 100644 --- a/plugins/claude/scripts/lib/review-prompt.mjs +++ b/plugins/claude/scripts/lib/review-prompt.mjs @@ -762,10 +762,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block // names the artifact ("i never saw the source"), which still matches via the "the source/file/..." // object alternative below. -const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a +// source object within the next ~48 chars) so each regex stays well under the complexity cap while +// preserving the positional "i/we ... cue ... target, one sentence" semantics. +const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +function reviewerFirstPersonBlock(lower) { + const m = REVIEWER_BLOCK_CUE.exec(lower); + if (!m) return false; + const start = m.index + m[0].length; + return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); +} function reviewProcessBlockedSignal(lowerRaw) { const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); - if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; + if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -1222,7 +1232,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou // the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED // (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time // (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1441,7 +1451,14 @@ const TINY_SOURCE_MAX_LINES = 5; // seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing // nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only // asserts correctness stays flagged (conservative — fail toward flagging). -const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue +// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. +const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; +const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; +const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +function hasDefectCue(clause) { + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); +} // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time // (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, @@ -1451,7 +1468,7 @@ const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call| // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark +// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under +// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a +// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", +// "none of the keys") does not suppress it. +const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; +const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; +const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; +const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; + +function clauseIsDismissal(clause) { + return DISMISSAL_NEGATED_DEFECT.test(clause) + || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_NOTHING.test(clause) + || DISMISSAL_ABSENCE.test(clause) + || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_LOOKS.test(clause) + || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); +} +function clauseIsPraiseOrDismissal(clause) { + return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); +} +function firstContrastIndex(lowerClause) { + let best = -1; + for (const word of CONTRAST_WORDS) { + const at = lowerClause.indexOf(word); + if (at !== -1 && (best === -1 || at < best)) best = at; + } + return best; +} function hasConcreteFinding(text) { const value = String(text ?? ""); const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { - if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (!hasDefectCue(clause)) return false; if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), // so the head marker must not suppress it. - const adv = clause.match(CONCRETE_FINDING_CONTRAST); - if (adv) { - const tail = clause.slice(adv.index + adv[0].length); - if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { - return true; - } + const advIdx = firstContrastIndex(clause.toLowerCase()); + if (advIdx !== -1) { + const tail = clause.slice(advIdx); + if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; } - if (CONCRETE_FINDING_PRAISE.test(clause)) return false; - if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; - return true; + return !clauseIsPraiseOrDismissal(clause); }); } diff --git a/plugins/gemini/scripts/lib/review-prompt.mjs b/plugins/gemini/scripts/lib/review-prompt.mjs index ec8a52e4..ab4196a7 100644 --- a/plugins/gemini/scripts/lib/review-prompt.mjs +++ b/plugins/gemini/scripts/lib/review-prompt.mjs @@ -762,10 +762,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block // names the artifact ("i never saw the source"), which still matches via the "the source/file/..." // object alternative below. -const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a +// source object within the next ~48 chars) so each regex stays well under the complexity cap while +// preserving the positional "i/we ... cue ... target, one sentence" semantics. +const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +function reviewerFirstPersonBlock(lower) { + const m = REVIEWER_BLOCK_CUE.exec(lower); + if (!m) return false; + const start = m.index + m[0].length; + return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); +} function reviewProcessBlockedSignal(lowerRaw) { const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); - if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; + if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -1222,7 +1232,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou // the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED // (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time // (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1441,7 +1451,14 @@ const TINY_SOURCE_MAX_LINES = 5; // seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing // nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only // asserts correctness stays flagged (conservative — fail toward flagging). -const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue +// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. +const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; +const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; +const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +function hasDefectCue(clause) { + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); +} // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time // (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, @@ -1451,7 +1468,7 @@ const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call| // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark +// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under +// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a +// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", +// "none of the keys") does not suppress it. +const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; +const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; +const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; +const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; + +function clauseIsDismissal(clause) { + return DISMISSAL_NEGATED_DEFECT.test(clause) + || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_NOTHING.test(clause) + || DISMISSAL_ABSENCE.test(clause) + || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_LOOKS.test(clause) + || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); +} +function clauseIsPraiseOrDismissal(clause) { + return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); +} +function firstContrastIndex(lowerClause) { + let best = -1; + for (const word of CONTRAST_WORDS) { + const at = lowerClause.indexOf(word); + if (at !== -1 && (best === -1 || at < best)) best = at; + } + return best; +} function hasConcreteFinding(text) { const value = String(text ?? ""); const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { - if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (!hasDefectCue(clause)) return false; if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), // so the head marker must not suppress it. - const adv = clause.match(CONCRETE_FINDING_CONTRAST); - if (adv) { - const tail = clause.slice(adv.index + adv[0].length); - if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { - return true; - } + const advIdx = firstContrastIndex(clause.toLowerCase()); + if (advIdx !== -1) { + const tail = clause.slice(advIdx); + if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; } - if (CONCRETE_FINDING_PRAISE.test(clause)) return false; - if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; - return true; + return !clauseIsPraiseOrDismissal(clause); }); } diff --git a/plugins/grok/scripts/lib/review-prompt.mjs b/plugins/grok/scripts/lib/review-prompt.mjs index ec8a52e4..ab4196a7 100644 --- a/plugins/grok/scripts/lib/review-prompt.mjs +++ b/plugins/grok/scripts/lib/review-prompt.mjs @@ -762,10 +762,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block // names the artifact ("i never saw the source"), which still matches via the "the source/file/..." // object alternative below. -const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a +// source object within the next ~48 chars) so each regex stays well under the complexity cap while +// preserving the positional "i/we ... cue ... target, one sentence" semantics. +const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +function reviewerFirstPersonBlock(lower) { + const m = REVIEWER_BLOCK_CUE.exec(lower); + if (!m) return false; + const start = m.index + m[0].length; + return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); +} function reviewProcessBlockedSignal(lowerRaw) { const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); - if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; + if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -1222,7 +1232,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou // the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED // (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time // (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1441,7 +1451,14 @@ const TINY_SOURCE_MAX_LINES = 5; // seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing // nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only // asserts correctness stays flagged (conservative — fail toward flagging). -const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue +// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. +const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; +const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; +const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +function hasDefectCue(clause) { + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); +} // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time // (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, @@ -1451,7 +1468,7 @@ const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call| // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark +// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under +// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a +// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", +// "none of the keys") does not suppress it. +const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; +const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; +const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; +const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; + +function clauseIsDismissal(clause) { + return DISMISSAL_NEGATED_DEFECT.test(clause) + || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_NOTHING.test(clause) + || DISMISSAL_ABSENCE.test(clause) + || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_LOOKS.test(clause) + || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); +} +function clauseIsPraiseOrDismissal(clause) { + return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); +} +function firstContrastIndex(lowerClause) { + let best = -1; + for (const word of CONTRAST_WORDS) { + const at = lowerClause.indexOf(word); + if (at !== -1 && (best === -1 || at < best)) best = at; + } + return best; +} function hasConcreteFinding(text) { const value = String(text ?? ""); const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { - if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (!hasDefectCue(clause)) return false; if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), // so the head marker must not suppress it. - const adv = clause.match(CONCRETE_FINDING_CONTRAST); - if (adv) { - const tail = clause.slice(adv.index + adv[0].length); - if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { - return true; - } + const advIdx = firstContrastIndex(clause.toLowerCase()); + if (advIdx !== -1) { + const tail = clause.slice(advIdx); + if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; } - if (CONCRETE_FINDING_PRAISE.test(clause)) return false; - if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; - return true; + return !clauseIsPraiseOrDismissal(clause); }); } diff --git a/plugins/kimi/scripts/lib/review-prompt.mjs b/plugins/kimi/scripts/lib/review-prompt.mjs index ec8a52e4..ab4196a7 100644 --- a/plugins/kimi/scripts/lib/review-prompt.mjs +++ b/plugins/kimi/scripts/lib/review-prompt.mjs @@ -762,10 +762,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block // names the artifact ("i never saw the source"), which still matches via the "the source/file/..." // object alternative below. -const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a +// source object within the next ~48 chars) so each regex stays well under the complexity cap while +// preserving the positional "i/we ... cue ... target, one sentence" semantics. +const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +function reviewerFirstPersonBlock(lower) { + const m = REVIEWER_BLOCK_CUE.exec(lower); + if (!m) return false; + const start = m.index + m[0].length; + return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); +} function reviewProcessBlockedSignal(lowerRaw) { const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); - if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; + if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -1222,7 +1232,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou // the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED // (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time // (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1441,7 +1451,14 @@ const TINY_SOURCE_MAX_LINES = 5; // seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing // nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only // asserts correctness stays flagged (conservative — fail toward flagging). -const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue +// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. +const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; +const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; +const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +function hasDefectCue(clause) { + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); +} // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time // (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, @@ -1451,7 +1468,7 @@ const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call| // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark +// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under +// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a +// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", +// "none of the keys") does not suppress it. +const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; +const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; +const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; +const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; + +function clauseIsDismissal(clause) { + return DISMISSAL_NEGATED_DEFECT.test(clause) + || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_NOTHING.test(clause) + || DISMISSAL_ABSENCE.test(clause) + || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_LOOKS.test(clause) + || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); +} +function clauseIsPraiseOrDismissal(clause) { + return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); +} +function firstContrastIndex(lowerClause) { + let best = -1; + for (const word of CONTRAST_WORDS) { + const at = lowerClause.indexOf(word); + if (at !== -1 && (best === -1 || at < best)) best = at; + } + return best; +} function hasConcreteFinding(text) { const value = String(text ?? ""); const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { - if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (!hasDefectCue(clause)) return false; if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), // so the head marker must not suppress it. - const adv = clause.match(CONCRETE_FINDING_CONTRAST); - if (adv) { - const tail = clause.slice(adv.index + adv[0].length); - if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { - return true; - } + const advIdx = firstContrastIndex(clause.toLowerCase()); + if (advIdx !== -1) { + const tail = clause.slice(advIdx); + if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; } - if (CONCRETE_FINDING_PRAISE.test(clause)) return false; - if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; - return true; + return !clauseIsPraiseOrDismissal(clause); }); } diff --git a/relay/relay-gemini/scripts/lib/review-prompt.mjs b/relay/relay-gemini/scripts/lib/review-prompt.mjs index ec8a52e4..ab4196a7 100644 --- a/relay/relay-gemini/scripts/lib/review-prompt.mjs +++ b/relay/relay-gemini/scripts/lib/review-prompt.mjs @@ -762,10 +762,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block // names the artifact ("i never saw the source"), which still matches via the "the source/file/..." // object alternative below. -const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a +// source object within the next ~48 chars) so each regex stays well under the complexity cap while +// preserving the positional "i/we ... cue ... target, one sentence" semantics. +const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +function reviewerFirstPersonBlock(lower) { + const m = REVIEWER_BLOCK_CUE.exec(lower); + if (!m) return false; + const start = m.index + m[0].length; + return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); +} function reviewProcessBlockedSignal(lowerRaw) { const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); - if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; + if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -1222,7 +1232,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou // the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED // (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time // (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1441,7 +1451,14 @@ const TINY_SOURCE_MAX_LINES = 5; // seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing // nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only // asserts correctness stays flagged (conservative — fail toward flagging). -const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue +// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. +const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; +const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; +const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +function hasDefectCue(clause) { + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); +} // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time // (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, @@ -1451,7 +1468,7 @@ const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call| // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark +// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under +// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a +// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", +// "none of the keys") does not suppress it. +const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; +const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; +const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; +const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; + +function clauseIsDismissal(clause) { + return DISMISSAL_NEGATED_DEFECT.test(clause) + || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_NOTHING.test(clause) + || DISMISSAL_ABSENCE.test(clause) + || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_LOOKS.test(clause) + || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); +} +function clauseIsPraiseOrDismissal(clause) { + return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); +} +function firstContrastIndex(lowerClause) { + let best = -1; + for (const word of CONTRAST_WORDS) { + const at = lowerClause.indexOf(word); + if (at !== -1 && (best === -1 || at < best)) best = at; + } + return best; +} function hasConcreteFinding(text) { const value = String(text ?? ""); const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { - if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (!hasDefectCue(clause)) return false; if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), // so the head marker must not suppress it. - const adv = clause.match(CONCRETE_FINDING_CONTRAST); - if (adv) { - const tail = clause.slice(adv.index + adv[0].length); - if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { - return true; - } + const advIdx = firstContrastIndex(clause.toLowerCase()); + if (advIdx !== -1) { + const tail = clause.slice(advIdx); + if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; } - if (CONCRETE_FINDING_PRAISE.test(clause)) return false; - if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; - return true; + return !clauseIsPraiseOrDismissal(clause); }); } diff --git a/relay/relay-grok/scripts/lib/review-prompt.mjs b/relay/relay-grok/scripts/lib/review-prompt.mjs index ec8a52e4..ab4196a7 100644 --- a/relay/relay-grok/scripts/lib/review-prompt.mjs +++ b/relay/relay-grok/scripts/lib/review-prompt.mjs @@ -762,10 +762,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block // names the artifact ("i never saw the source"), which still matches via the "the source/file/..." // object alternative below. -const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a +// source object within the next ~48 chars) so each regex stays well under the complexity cap while +// preserving the positional "i/we ... cue ... target, one sentence" semantics. +const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +function reviewerFirstPersonBlock(lower) { + const m = REVIEWER_BLOCK_CUE.exec(lower); + if (!m) return false; + const start = m.index + m[0].length; + return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); +} function reviewProcessBlockedSignal(lowerRaw) { const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); - if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; + if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -1222,7 +1232,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou // the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED // (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time // (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1441,7 +1451,14 @@ const TINY_SOURCE_MAX_LINES = 5; // seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing // nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only // asserts correctness stays flagged (conservative — fail toward flagging). -const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue +// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. +const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; +const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; +const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +function hasDefectCue(clause) { + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); +} // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time // (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, @@ -1451,7 +1468,7 @@ const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call| // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark +// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under +// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a +// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", +// "none of the keys") does not suppress it. +const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; +const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; +const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; +const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; + +function clauseIsDismissal(clause) { + return DISMISSAL_NEGATED_DEFECT.test(clause) + || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_NOTHING.test(clause) + || DISMISSAL_ABSENCE.test(clause) + || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_LOOKS.test(clause) + || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); +} +function clauseIsPraiseOrDismissal(clause) { + return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); +} +function firstContrastIndex(lowerClause) { + let best = -1; + for (const word of CONTRAST_WORDS) { + const at = lowerClause.indexOf(word); + if (at !== -1 && (best === -1 || at < best)) best = at; + } + return best; +} function hasConcreteFinding(text) { const value = String(text ?? ""); const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { - if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (!hasDefectCue(clause)) return false; if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), // so the head marker must not suppress it. - const adv = clause.match(CONCRETE_FINDING_CONTRAST); - if (adv) { - const tail = clause.slice(adv.index + adv[0].length); - if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { - return true; - } + const advIdx = firstContrastIndex(clause.toLowerCase()); + if (advIdx !== -1) { + const tail = clause.slice(advIdx); + if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; } - if (CONCRETE_FINDING_PRAISE.test(clause)) return false; - if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; - return true; + return !clauseIsPraiseOrDismissal(clause); }); } diff --git a/relay/relay-kimi/scripts/lib/review-prompt.mjs b/relay/relay-kimi/scripts/lib/review-prompt.mjs index ec8a52e4..ab4196a7 100644 --- a/relay/relay-kimi/scripts/lib/review-prompt.mjs +++ b/relay/relay-kimi/scripts/lib/review-prompt.mjs @@ -762,10 +762,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block // names the artifact ("i never saw the source"), which still matches via the "the source/file/..." // object alternative below. -const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a +// source object within the next ~48 chars) so each regex stays well under the complexity cap while +// preserving the positional "i/we ... cue ... target, one sentence" semantics. +const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +function reviewerFirstPersonBlock(lower) { + const m = REVIEWER_BLOCK_CUE.exec(lower); + if (!m) return false; + const start = m.index + m[0].length; + return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); +} function reviewProcessBlockedSignal(lowerRaw) { const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); - if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; + if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -1222,7 +1232,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou // the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED // (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time // (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1441,7 +1451,14 @@ const TINY_SOURCE_MAX_LINES = 5; // seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing // nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only // asserts correctness stays flagged (conservative — fail toward flagging). -const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue +// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. +const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; +const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; +const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +function hasDefectCue(clause) { + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); +} // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time // (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, @@ -1451,7 +1468,7 @@ const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call| // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark +// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under +// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a +// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", +// "none of the keys") does not suppress it. +const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; +const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; +const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; +const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; + +function clauseIsDismissal(clause) { + return DISMISSAL_NEGATED_DEFECT.test(clause) + || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_NOTHING.test(clause) + || DISMISSAL_ABSENCE.test(clause) + || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_LOOKS.test(clause) + || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); +} +function clauseIsPraiseOrDismissal(clause) { + return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); +} +function firstContrastIndex(lowerClause) { + let best = -1; + for (const word of CONTRAST_WORDS) { + const at = lowerClause.indexOf(word); + if (at !== -1 && (best === -1 || at < best)) best = at; + } + return best; +} function hasConcreteFinding(text) { const value = String(text ?? ""); const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { - if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (!hasDefectCue(clause)) return false; if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), // so the head marker must not suppress it. - const adv = clause.match(CONCRETE_FINDING_CONTRAST); - if (adv) { - const tail = clause.slice(adv.index + adv[0].length); - if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { - return true; - } + const advIdx = firstContrastIndex(clause.toLowerCase()); + if (advIdx !== -1) { + const tail = clause.slice(advIdx); + if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; } - if (CONCRETE_FINDING_PRAISE.test(clause)) return false; - if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; - return true; + return !clauseIsPraiseOrDismissal(clause); }); } diff --git a/scripts/lib/review-prompt.mjs b/scripts/lib/review-prompt.mjs index ec8a52e4..ab4196a7 100644 --- a/scripts/lib/review-prompt.mjs +++ b/scripts/lib/review-prompt.mjs @@ -762,10 +762,20 @@ function codeCorrectlyHandlesPermissionError(lower) { // "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block // names the artifact ("i never saw the source"), which still matches via the "the source/file/..." // object alternative below. -const REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?not|couldn'?t|can ?not|can'?t|cannot|was ?not able|wasn'?t able|were ?not able|weren'?t able|did ?not|didn'?t|never|only|unable to|lacked? access|had no access|have no access|no access to)\b[^.\n]{0,44}?\b(?:inspect(?:ed|ing)?|read(?:ing)?|open(?:ed|ing)?|access(?:ed|ing)?|examine[d]?|verif(?:y|ied|ying)|confirm(?:ed|ing)?|load(?:ed|ing)?|the diff|the (?:\w+ ){0,2}?(?:file|source|module|contents|selected))\b/; +// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a +// source object within the next ~48 chars) so each regex stays well under the complexity cap while +// preserving the positional "i/we ... cue ... target, one sentence" semantics. +const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +function reviewerFirstPersonBlock(lower) { + const m = REVIEWER_BLOCK_CUE.exec(lower); + if (!m) return false; + const start = m.index + m[0].length; + return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); +} function reviewProcessBlockedSignal(lowerRaw) { const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); - if (REVIEW_PROCESS_FIRST_PERSON_BLOCK_RE.test(lower)) return true; + if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", "could not be inspected", @@ -1222,7 +1232,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou // the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED // (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time // (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"(\[{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; +const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; function namesNonSelectedFileGapLine(lower, selectedSource) { const selectedPaths = (selectedSource?.files ?? []) .map((file) => String(file?.path ?? "").toLowerCase()) @@ -1441,7 +1451,14 @@ const TINY_SOURCE_MAX_LINES = 5; // seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing // nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only // asserts correctness stays flagged (conservative — fail toward flagging). -const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call|not)|rather than|off-by-one|null deref|use-after-free|race condition|returns? the wrong|subtracts?|adds? to|drops?|never (?:called|awaited|closed)|leaks?|swallows?|throws?|overflow|underflow|incorrect|wrong (?:order|sign|value|index)|fails to|does not (?:handle|close|await|free|release))\b/i; +// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue +// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. +const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; +const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; +const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +function hasDefectCue(clause) { + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); +} // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time // (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, @@ -1451,7 +1468,7 @@ const CONCRETE_FINDING_DEFECT_CUE = /\b(instead of|should (?:be|use|return|call| // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark +// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under +// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a +// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", +// "none of the keys") does not suppress it. +const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; +const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; +const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; +const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; + +function clauseIsDismissal(clause) { + return DISMISSAL_NEGATED_DEFECT.test(clause) + || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_NOTHING.test(clause) + || DISMISSAL_ABSENCE.test(clause) + || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_LOOKS.test(clause) + || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); +} +function clauseIsPraiseOrDismissal(clause) { + return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); +} +function firstContrastIndex(lowerClause) { + let best = -1; + for (const word of CONTRAST_WORDS) { + const at = lowerClause.indexOf(word); + if (at !== -1 && (best === -1 || at < best)) best = at; + } + return best; +} function hasConcreteFinding(text) { const value = String(text ?? ""); const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { - if (!CONCRETE_FINDING_DEFECT_CUE.test(clause)) return false; + if (!hasDefectCue(clause)) return false; if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), // so the head marker must not suppress it. - const adv = clause.match(CONCRETE_FINDING_CONTRAST); - if (adv) { - const tail = clause.slice(adv.index + adv[0].length); - if (CONCRETE_FINDING_DEFECT_CUE.test(tail) && !CONCRETE_FINDING_DISMISSAL.test(tail) && !CONCRETE_FINDING_PRAISE.test(tail)) { - return true; - } + const advIdx = firstContrastIndex(clause.toLowerCase()); + if (advIdx !== -1) { + const tail = clause.slice(advIdx); + if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; } - if (CONCRETE_FINDING_PRAISE.test(clause)) return false; - if (CONCRETE_FINDING_DISMISSAL.test(clause)) return false; - return true; + return !clauseIsPraiseOrDismissal(clause); }); } From a5c286857156851e6a282207c7b4fed33e60efa2 Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 22 Jun 2026 11:56:00 +0900 Subject: [PATCH 07/15] refactor(review): split remaining detector regexes strictly under complexity cap Follow-up to a2d77aa: the 6 regexes still over SonarCloud's S5843 cap (block-cue 40, negated-defect 32, block-target 23, correctly 24, absence 22, defect-verb 21) are split into paired sub-patterns whose union is identical. Block cue/target each split in two; the normalizer now also expands contractions to full forms (couldn't -> could not, requiring the apostrophe so 'important'/'content' are untouched) so the cue patterns need no apostrophe-variant branches. Dropped the dead n['o]?t negation arm (the leading word boundary made it unmatchable inside contractions). Behavior-identical: oracle 0 decidable-fails, 416/416 review-prompt + affected-unit, lint:sync clean, ReDoS still linear; corruption check confirms nt-ending words near a permission literal stay clean while curly couldn't/wasn't still flag. No test changes. Refs #235 #237 Co-Authored-By: Claude Opus 4.8 --- .../scripts/lib/review-prompt.mjs | 48 +++++++++++++------ plugins/claude/scripts/lib/review-prompt.mjs | 48 +++++++++++++------ plugins/gemini/scripts/lib/review-prompt.mjs | 48 +++++++++++++------ plugins/grok/scripts/lib/review-prompt.mjs | 48 +++++++++++++------ plugins/kimi/scripts/lib/review-prompt.mjs | 48 +++++++++++++------ .../scripts/lib/review-prompt.mjs | 48 +++++++++++++------ .../relay-grok/scripts/lib/review-prompt.mjs | 48 +++++++++++++------ .../relay-kimi/scripts/lib/review-prompt.mjs | 48 +++++++++++++------ scripts/lib/review-prompt.mjs | 48 +++++++++++++------ 9 files changed, 306 insertions(+), 126 deletions(-) diff --git a/plugins/api-reviewers/scripts/lib/review-prompt.mjs b/plugins/api-reviewers/scripts/lib/review-prompt.mjs index ab4196a7..1366b330 100644 --- a/plugins/api-reviewers/scripts/lib/review-prompt.mjs +++ b/plugins/api-reviewers/scripts/lib/review-prompt.mjs @@ -765,16 +765,25 @@ function codeCorrectlyHandlesPermissionError(lower) { // Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a // source object within the next ~48 chars) so each regex stays well under the complexity cap while // preserving the positional "i/we ... cue ... target, one sentence" semantics. -const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to +// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. +const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; +const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; +const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; function reviewerFirstPersonBlock(lower) { - const m = REVIEWER_BLOCK_CUE.exec(lower); - if (!m) return false; - const start = m.index + m[0].length; - return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); + for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { + const m = cue.exec(lower); + if (!m) continue; + const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); + if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; + } + return false; } function reviewProcessBlockedSignal(lowerRaw) { - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") + // so the block patterns and the literal list below match a single canonical spelling. + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", @@ -1455,9 +1464,11 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) + || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); } // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time @@ -1488,21 +1499,30 @@ const CONCRETE_FINDING_CODE_LOCUS = [ // defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", // "none of the keys") does not suppress it. const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns +// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it +// unmatchable inside contractions ("isn't"), so it never fired. +const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; +const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; +const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; +const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT.test(clause) + return DISMISSAL_NEGATED_DEFECT_A.test(clause) + || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_NO_X.test(clause) + || DISMISSAL_CORRECTLY_A.test(clause) + || DISMISSAL_CORRECTLY_B.test(clause) || DISMISSAL_LOOKS.test(clause) || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); } diff --git a/plugins/claude/scripts/lib/review-prompt.mjs b/plugins/claude/scripts/lib/review-prompt.mjs index ab4196a7..1366b330 100644 --- a/plugins/claude/scripts/lib/review-prompt.mjs +++ b/plugins/claude/scripts/lib/review-prompt.mjs @@ -765,16 +765,25 @@ function codeCorrectlyHandlesPermissionError(lower) { // Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a // source object within the next ~48 chars) so each regex stays well under the complexity cap while // preserving the positional "i/we ... cue ... target, one sentence" semantics. -const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to +// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. +const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; +const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; +const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; function reviewerFirstPersonBlock(lower) { - const m = REVIEWER_BLOCK_CUE.exec(lower); - if (!m) return false; - const start = m.index + m[0].length; - return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); + for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { + const m = cue.exec(lower); + if (!m) continue; + const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); + if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; + } + return false; } function reviewProcessBlockedSignal(lowerRaw) { - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") + // so the block patterns and the literal list below match a single canonical spelling. + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", @@ -1455,9 +1464,11 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) + || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); } // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time @@ -1488,21 +1499,30 @@ const CONCRETE_FINDING_CODE_LOCUS = [ // defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", // "none of the keys") does not suppress it. const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns +// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it +// unmatchable inside contractions ("isn't"), so it never fired. +const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; +const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; +const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; +const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT.test(clause) + return DISMISSAL_NEGATED_DEFECT_A.test(clause) + || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_NO_X.test(clause) + || DISMISSAL_CORRECTLY_A.test(clause) + || DISMISSAL_CORRECTLY_B.test(clause) || DISMISSAL_LOOKS.test(clause) || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); } diff --git a/plugins/gemini/scripts/lib/review-prompt.mjs b/plugins/gemini/scripts/lib/review-prompt.mjs index ab4196a7..1366b330 100644 --- a/plugins/gemini/scripts/lib/review-prompt.mjs +++ b/plugins/gemini/scripts/lib/review-prompt.mjs @@ -765,16 +765,25 @@ function codeCorrectlyHandlesPermissionError(lower) { // Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a // source object within the next ~48 chars) so each regex stays well under the complexity cap while // preserving the positional "i/we ... cue ... target, one sentence" semantics. -const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to +// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. +const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; +const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; +const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; function reviewerFirstPersonBlock(lower) { - const m = REVIEWER_BLOCK_CUE.exec(lower); - if (!m) return false; - const start = m.index + m[0].length; - return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); + for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { + const m = cue.exec(lower); + if (!m) continue; + const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); + if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; + } + return false; } function reviewProcessBlockedSignal(lowerRaw) { - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") + // so the block patterns and the literal list below match a single canonical spelling. + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", @@ -1455,9 +1464,11 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) + || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); } // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time @@ -1488,21 +1499,30 @@ const CONCRETE_FINDING_CODE_LOCUS = [ // defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", // "none of the keys") does not suppress it. const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns +// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it +// unmatchable inside contractions ("isn't"), so it never fired. +const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; +const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; +const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; +const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT.test(clause) + return DISMISSAL_NEGATED_DEFECT_A.test(clause) + || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_NO_X.test(clause) + || DISMISSAL_CORRECTLY_A.test(clause) + || DISMISSAL_CORRECTLY_B.test(clause) || DISMISSAL_LOOKS.test(clause) || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); } diff --git a/plugins/grok/scripts/lib/review-prompt.mjs b/plugins/grok/scripts/lib/review-prompt.mjs index ab4196a7..1366b330 100644 --- a/plugins/grok/scripts/lib/review-prompt.mjs +++ b/plugins/grok/scripts/lib/review-prompt.mjs @@ -765,16 +765,25 @@ function codeCorrectlyHandlesPermissionError(lower) { // Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a // source object within the next ~48 chars) so each regex stays well under the complexity cap while // preserving the positional "i/we ... cue ... target, one sentence" semantics. -const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to +// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. +const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; +const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; +const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; function reviewerFirstPersonBlock(lower) { - const m = REVIEWER_BLOCK_CUE.exec(lower); - if (!m) return false; - const start = m.index + m[0].length; - return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); + for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { + const m = cue.exec(lower); + if (!m) continue; + const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); + if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; + } + return false; } function reviewProcessBlockedSignal(lowerRaw) { - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") + // so the block patterns and the literal list below match a single canonical spelling. + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", @@ -1455,9 +1464,11 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) + || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); } // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time @@ -1488,21 +1499,30 @@ const CONCRETE_FINDING_CODE_LOCUS = [ // defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", // "none of the keys") does not suppress it. const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns +// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it +// unmatchable inside contractions ("isn't"), so it never fired. +const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; +const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; +const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; +const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT.test(clause) + return DISMISSAL_NEGATED_DEFECT_A.test(clause) + || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_NO_X.test(clause) + || DISMISSAL_CORRECTLY_A.test(clause) + || DISMISSAL_CORRECTLY_B.test(clause) || DISMISSAL_LOOKS.test(clause) || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); } diff --git a/plugins/kimi/scripts/lib/review-prompt.mjs b/plugins/kimi/scripts/lib/review-prompt.mjs index ab4196a7..1366b330 100644 --- a/plugins/kimi/scripts/lib/review-prompt.mjs +++ b/plugins/kimi/scripts/lib/review-prompt.mjs @@ -765,16 +765,25 @@ function codeCorrectlyHandlesPermissionError(lower) { // Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a // source object within the next ~48 chars) so each regex stays well under the complexity cap while // preserving the positional "i/we ... cue ... target, one sentence" semantics. -const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to +// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. +const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; +const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; +const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; function reviewerFirstPersonBlock(lower) { - const m = REVIEWER_BLOCK_CUE.exec(lower); - if (!m) return false; - const start = m.index + m[0].length; - return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); + for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { + const m = cue.exec(lower); + if (!m) continue; + const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); + if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; + } + return false; } function reviewProcessBlockedSignal(lowerRaw) { - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") + // so the block patterns and the literal list below match a single canonical spelling. + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", @@ -1455,9 +1464,11 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) + || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); } // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time @@ -1488,21 +1499,30 @@ const CONCRETE_FINDING_CODE_LOCUS = [ // defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", // "none of the keys") does not suppress it. const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns +// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it +// unmatchable inside contractions ("isn't"), so it never fired. +const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; +const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; +const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; +const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT.test(clause) + return DISMISSAL_NEGATED_DEFECT_A.test(clause) + || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_NO_X.test(clause) + || DISMISSAL_CORRECTLY_A.test(clause) + || DISMISSAL_CORRECTLY_B.test(clause) || DISMISSAL_LOOKS.test(clause) || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); } diff --git a/relay/relay-gemini/scripts/lib/review-prompt.mjs b/relay/relay-gemini/scripts/lib/review-prompt.mjs index ab4196a7..1366b330 100644 --- a/relay/relay-gemini/scripts/lib/review-prompt.mjs +++ b/relay/relay-gemini/scripts/lib/review-prompt.mjs @@ -765,16 +765,25 @@ function codeCorrectlyHandlesPermissionError(lower) { // Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a // source object within the next ~48 chars) so each regex stays well under the complexity cap while // preserving the positional "i/we ... cue ... target, one sentence" semantics. -const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to +// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. +const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; +const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; +const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; function reviewerFirstPersonBlock(lower) { - const m = REVIEWER_BLOCK_CUE.exec(lower); - if (!m) return false; - const start = m.index + m[0].length; - return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); + for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { + const m = cue.exec(lower); + if (!m) continue; + const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); + if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; + } + return false; } function reviewProcessBlockedSignal(lowerRaw) { - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") + // so the block patterns and the literal list below match a single canonical spelling. + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", @@ -1455,9 +1464,11 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) + || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); } // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time @@ -1488,21 +1499,30 @@ const CONCRETE_FINDING_CODE_LOCUS = [ // defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", // "none of the keys") does not suppress it. const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns +// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it +// unmatchable inside contractions ("isn't"), so it never fired. +const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; +const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; +const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; +const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT.test(clause) + return DISMISSAL_NEGATED_DEFECT_A.test(clause) + || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_NO_X.test(clause) + || DISMISSAL_CORRECTLY_A.test(clause) + || DISMISSAL_CORRECTLY_B.test(clause) || DISMISSAL_LOOKS.test(clause) || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); } diff --git a/relay/relay-grok/scripts/lib/review-prompt.mjs b/relay/relay-grok/scripts/lib/review-prompt.mjs index ab4196a7..1366b330 100644 --- a/relay/relay-grok/scripts/lib/review-prompt.mjs +++ b/relay/relay-grok/scripts/lib/review-prompt.mjs @@ -765,16 +765,25 @@ function codeCorrectlyHandlesPermissionError(lower) { // Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a // source object within the next ~48 chars) so each regex stays well under the complexity cap while // preserving the positional "i/we ... cue ... target, one sentence" semantics. -const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to +// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. +const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; +const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; +const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; function reviewerFirstPersonBlock(lower) { - const m = REVIEWER_BLOCK_CUE.exec(lower); - if (!m) return false; - const start = m.index + m[0].length; - return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); + for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { + const m = cue.exec(lower); + if (!m) continue; + const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); + if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; + } + return false; } function reviewProcessBlockedSignal(lowerRaw) { - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") + // so the block patterns and the literal list below match a single canonical spelling. + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", @@ -1455,9 +1464,11 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) + || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); } // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time @@ -1488,21 +1499,30 @@ const CONCRETE_FINDING_CODE_LOCUS = [ // defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", // "none of the keys") does not suppress it. const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns +// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it +// unmatchable inside contractions ("isn't"), so it never fired. +const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; +const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; +const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; +const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT.test(clause) + return DISMISSAL_NEGATED_DEFECT_A.test(clause) + || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_NO_X.test(clause) + || DISMISSAL_CORRECTLY_A.test(clause) + || DISMISSAL_CORRECTLY_B.test(clause) || DISMISSAL_LOOKS.test(clause) || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); } diff --git a/relay/relay-kimi/scripts/lib/review-prompt.mjs b/relay/relay-kimi/scripts/lib/review-prompt.mjs index ab4196a7..1366b330 100644 --- a/relay/relay-kimi/scripts/lib/review-prompt.mjs +++ b/relay/relay-kimi/scripts/lib/review-prompt.mjs @@ -765,16 +765,25 @@ function codeCorrectlyHandlesPermissionError(lower) { // Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a // source object within the next ~48 chars) so each regex stays well under the complexity cap while // preserving the positional "i/we ... cue ... target, one sentence" semantics. -const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to +// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. +const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; +const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; +const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; function reviewerFirstPersonBlock(lower) { - const m = REVIEWER_BLOCK_CUE.exec(lower); - if (!m) return false; - const start = m.index + m[0].length; - return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); + for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { + const m = cue.exec(lower); + if (!m) continue; + const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); + if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; + } + return false; } function reviewProcessBlockedSignal(lowerRaw) { - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") + // so the block patterns and the literal list below match a single canonical spelling. + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", @@ -1455,9 +1464,11 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) + || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); } // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time @@ -1488,21 +1499,30 @@ const CONCRETE_FINDING_CODE_LOCUS = [ // defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", // "none of the keys") does not suppress it. const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns +// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it +// unmatchable inside contractions ("isn't"), so it never fired. +const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; +const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; +const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; +const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT.test(clause) + return DISMISSAL_NEGATED_DEFECT_A.test(clause) + || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_NO_X.test(clause) + || DISMISSAL_CORRECTLY_A.test(clause) + || DISMISSAL_CORRECTLY_B.test(clause) || DISMISSAL_LOOKS.test(clause) || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); } diff --git a/scripts/lib/review-prompt.mjs b/scripts/lib/review-prompt.mjs index ab4196a7..1366b330 100644 --- a/scripts/lib/review-prompt.mjs +++ b/scripts/lib/review-prompt.mjs @@ -765,16 +765,25 @@ function codeCorrectlyHandlesPermissionError(lower) { // Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a // source object within the next ~48 chars) so each regex stays well under the complexity cap while // preserving the positional "i/we ... cue ... target, one sentence" semantics. -const REVIEWER_BLOCK_CUE = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could ?n['o]?t|can ?n['o]?t|cannot|was ?n['o]?t able|were ?n['o]?t able|did ?n['o]?t|never|only|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b|\bthe (?:diff|source|file|selected|module|contents)\b/i; +// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to +// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. +const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; +const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; +const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; +const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; function reviewerFirstPersonBlock(lower) { - const m = REVIEWER_BLOCK_CUE.exec(lower); - if (!m) return false; - const start = m.index + m[0].length; - return REVIEWER_BLOCK_TARGET.test(lower.slice(start, start + 48)); + for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { + const m = cue.exec(lower); + if (!m) continue; + const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); + if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; + } + return false; } function reviewProcessBlockedSignal(lowerRaw) { - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'"); + // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") + // so the block patterns and the literal list below match a single canonical spelling. + const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); if (reviewerFirstPersonBlock(lower)) return true; return includesAny(lower, [ "no inspection was possible", @@ -1455,9 +1464,11 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB = /\b(?:subtracts?|adds? to|drops?|leaks?|swallows?|throws?|never (?:called|awaited|closed))\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) || DEFECT_CUE_VERB.test(clause); + return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) + || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); } // Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): // these run on adversarial external-review text, so each must be provably linear-time @@ -1488,21 +1499,30 @@ const CONCRETE_FINDING_CODE_LOCUS = [ // defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", // "none of the keys") does not suppress it. const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -const DISMISSAL_NEGATED_DEFECT = /\b(?:no|not|never|none|without|n['o]?t)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?|defects?|regressions?|blockers?|off-by-one)\b/i; +// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns +// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it +// unmatchable inside contractions ("isn't"), so it never fired. +const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; +const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b|\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed|returns?|catches?|caught|falls? back)\b/i; +const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; +const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; +const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; +const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT.test(clause) + return DISMISSAL_NEGATED_DEFECT_A.test(clause) + || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_CORRECTLY.test(clause) + || DISMISSAL_NO_X.test(clause) + || DISMISSAL_CORRECTLY_A.test(clause) + || DISMISSAL_CORRECTLY_B.test(clause) || DISMISSAL_LOOKS.test(clause) || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); } From 8988e37271dbea032ffd1215cc4e4eecc2a88bff Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 22 Jun 2026 14:39:14 +0900 Subject: [PATCH 08/15] fix(relay): revert spoofable not_reviewed foreign-path suppression PR #237's not_reviewed foreign-path branch cleared a gap when a review named an out-of-packet file AND (claimed to have) inspected the selected source -- a suppression `main` flags. An adversarial sweep plus a 7-case main-vs-branch matrix proved it unfixable by enumeration: every text-based inspection signal is spoofable, and the only non-spoofable gate (named file provably absent from sourceFiles) regresses a no-engagement APPROVE (flag -> clean), breaking fail-toward-flagging. A disclaimer-enumeration hardening attempt (F3) was likewise leaky. Revert to main's contract: a could-not-inspect gap is suppressed ONLY when the reviewer EXPLICITLY marks it out of scope. Removes the foreign-path branch, selectedSourceInspected, namesNonSelectedFileGapLine, NON_SELECTED_FILE_TOKEN_RE, and the F3 disclaimer list across the source plus 8 synced copies. Net effect on not_reviewed is strictly toward flagging (verified == main on the matrix; adversarial pass: 0 confirmed bugs; residual holes pre-existing on main, tracked in #238). Keeps F2: DISMISSAL_SHOULD_NOT_PASSIVE restores the passive "should not be affected" dismissal alternative that the a5c2868 regex split silently dropped, with a split-identity property test pinning every pre-split alternative so a future split cannot narrow it again. Verified: review-prompt 416/416, full suite 2597 pass / 0 fail, coverage 85.00% met, sync --check rc=0, lint pass. Refs #232 #235 #236 #238 Co-Authored-By: Claude Opus 4.8 --- .../scripts/lib/review-prompt.mjs | 65 ++--- plugins/claude/scripts/lib/review-prompt.mjs | 65 ++--- plugins/gemini/scripts/lib/review-prompt.mjs | 65 ++--- plugins/grok/scripts/lib/review-prompt.mjs | 65 ++--- plugins/kimi/scripts/lib/review-prompt.mjs | 65 ++--- .../scripts/lib/review-prompt.mjs | 65 ++--- .../relay-grok/scripts/lib/review-prompt.mjs | 65 ++--- .../relay-kimi/scripts/lib/review-prompt.mjs | 65 ++--- scripts/lib/review-prompt.mjs | 65 ++--- tests/unit/review-prompt.test.mjs | 232 ++++++++---------- 10 files changed, 295 insertions(+), 522 deletions(-) diff --git a/plugins/api-reviewers/scripts/lib/review-prompt.mjs b/plugins/api-reviewers/scripts/lib/review-prompt.mjs index 1366b330..c9652b60 100644 --- a/plugins/api-reviewers/scripts/lib/review-prompt.mjs +++ b/plugins/api-reviewers/scripts/lib/review-prompt.mjs @@ -1162,7 +1162,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1206,9 +1206,15 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { +// A could-not-inspect line is suppressed ONLY when the reviewer EXPLICITLY marks it +// out of scope. Inferring out-of-scope from "a foreign file is named + the selected +// source was (claimed) inspected" was tried (PR #237) and reverted: any text-based +// inspection signal is spoofable, and the only non-spoofable alternative (named file +// is provably not in the packet) silently suppresses a no-engagement APPROVE, which +// breaks the fail-toward-flagging invariant. The explicit marker is the contract. +function isOutOfScopeInspectionGapLine(lower) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - if (includesAny(lower, [ + return includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1222,41 +1228,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou "not part of this packet", "not supplied", "not included in the prompt", - ])) return true; - // Foreign-path branch: a could-not-inspect gap attributed to a concrete file - // that is NOT the selected source is an out-of-scope gap, not a denial that the - // selected source itself was reviewed. Suppress ONLY when the line names such a - // foreign path, does not generically deny the selected source, does not name the - // selected path, AND the review independently proves the selected source WAS - // inspected. When in doubt, fall through to false so the line stays flaggable. - if (mentionsSelectedSourceGeneric(lower)) return false; - if (mentionsSelectedSourcePath(lower, selectedSource)) return false; - if (!selectedSourceInspected) return false; - return namesNonSelectedFileGapLine(lower, selectedSource); -} - -// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename -// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, -// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries -// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED -// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time -// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; -function namesNonSelectedFileGapLine(lower, selectedSource) { - const selectedPaths = (selectedSource?.files ?? []) - .map((file) => String(file?.path ?? "").toLowerCase()) - .filter(Boolean); - NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; - let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - while (match !== null) { - const path = match[1]; - const isSelected = selectedPaths.some((selected) => ( - selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) - )); - if (!isSelected) return true; - match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - } - return false; + ]); } function isPriorReviewCommentsGapLine(lower) { @@ -1335,14 +1307,10 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); - const selectedSourceInspected = mentionsSelectedSourceInspection( - normalizeReviewSearchText(text).toLowerCase(), - selectedSource, - ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); }) .join("\n") @@ -1444,6 +1412,11 @@ function isNegatedPermissionBlockLine(line) { ); } +// Used only by the tiny-source concise-review exemption (conciseTinyReview). This is a best-effort +// signal, NOT a security gate: a text inspection claim is spoofable, so it must never be used to +// SUPPRESS a not_reviewed flag (that was the reverted PR #237 foreign-path branch). Here it only +// grants a length exemption to an already-structured review of a <512-byte source, where a false +// claim buys nothing material. function mentionsSelectedSourceInspection(lowerText, selectedSource) { if (!includesAny(lowerText, SELECTED_SOURCE_INSPECTION_VERBS)) return false; return mentionsSelectedSourcePath(lowerText, selectedSource); @@ -1505,6 +1478,11 @@ const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|p const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch +// above handles "should not "; the past-participle impact words are a distinct surface that the +// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. +// Kept as a separate small pattern so the split-identity property test below pins every alternative. +const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; @@ -1518,6 +1496,7 @@ function clauseIsDismissal(clause) { return DISMISSAL_NEGATED_DEFECT_A.test(clause) || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) || DISMISSAL_NO_X.test(clause) diff --git a/plugins/claude/scripts/lib/review-prompt.mjs b/plugins/claude/scripts/lib/review-prompt.mjs index 1366b330..c9652b60 100644 --- a/plugins/claude/scripts/lib/review-prompt.mjs +++ b/plugins/claude/scripts/lib/review-prompt.mjs @@ -1162,7 +1162,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1206,9 +1206,15 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { +// A could-not-inspect line is suppressed ONLY when the reviewer EXPLICITLY marks it +// out of scope. Inferring out-of-scope from "a foreign file is named + the selected +// source was (claimed) inspected" was tried (PR #237) and reverted: any text-based +// inspection signal is spoofable, and the only non-spoofable alternative (named file +// is provably not in the packet) silently suppresses a no-engagement APPROVE, which +// breaks the fail-toward-flagging invariant. The explicit marker is the contract. +function isOutOfScopeInspectionGapLine(lower) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - if (includesAny(lower, [ + return includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1222,41 +1228,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou "not part of this packet", "not supplied", "not included in the prompt", - ])) return true; - // Foreign-path branch: a could-not-inspect gap attributed to a concrete file - // that is NOT the selected source is an out-of-scope gap, not a denial that the - // selected source itself was reviewed. Suppress ONLY when the line names such a - // foreign path, does not generically deny the selected source, does not name the - // selected path, AND the review independently proves the selected source WAS - // inspected. When in doubt, fall through to false so the line stays flaggable. - if (mentionsSelectedSourceGeneric(lower)) return false; - if (mentionsSelectedSourcePath(lower, selectedSource)) return false; - if (!selectedSourceInspected) return false; - return namesNonSelectedFileGapLine(lower, selectedSource); -} - -// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename -// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, -// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries -// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED -// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time -// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; -function namesNonSelectedFileGapLine(lower, selectedSource) { - const selectedPaths = (selectedSource?.files ?? []) - .map((file) => String(file?.path ?? "").toLowerCase()) - .filter(Boolean); - NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; - let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - while (match !== null) { - const path = match[1]; - const isSelected = selectedPaths.some((selected) => ( - selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) - )); - if (!isSelected) return true; - match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - } - return false; + ]); } function isPriorReviewCommentsGapLine(lower) { @@ -1335,14 +1307,10 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); - const selectedSourceInspected = mentionsSelectedSourceInspection( - normalizeReviewSearchText(text).toLowerCase(), - selectedSource, - ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); }) .join("\n") @@ -1444,6 +1412,11 @@ function isNegatedPermissionBlockLine(line) { ); } +// Used only by the tiny-source concise-review exemption (conciseTinyReview). This is a best-effort +// signal, NOT a security gate: a text inspection claim is spoofable, so it must never be used to +// SUPPRESS a not_reviewed flag (that was the reverted PR #237 foreign-path branch). Here it only +// grants a length exemption to an already-structured review of a <512-byte source, where a false +// claim buys nothing material. function mentionsSelectedSourceInspection(lowerText, selectedSource) { if (!includesAny(lowerText, SELECTED_SOURCE_INSPECTION_VERBS)) return false; return mentionsSelectedSourcePath(lowerText, selectedSource); @@ -1505,6 +1478,11 @@ const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|p const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch +// above handles "should not "; the past-participle impact words are a distinct surface that the +// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. +// Kept as a separate small pattern so the split-identity property test below pins every alternative. +const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; @@ -1518,6 +1496,7 @@ function clauseIsDismissal(clause) { return DISMISSAL_NEGATED_DEFECT_A.test(clause) || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) || DISMISSAL_NO_X.test(clause) diff --git a/plugins/gemini/scripts/lib/review-prompt.mjs b/plugins/gemini/scripts/lib/review-prompt.mjs index 1366b330..c9652b60 100644 --- a/plugins/gemini/scripts/lib/review-prompt.mjs +++ b/plugins/gemini/scripts/lib/review-prompt.mjs @@ -1162,7 +1162,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1206,9 +1206,15 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { +// A could-not-inspect line is suppressed ONLY when the reviewer EXPLICITLY marks it +// out of scope. Inferring out-of-scope from "a foreign file is named + the selected +// source was (claimed) inspected" was tried (PR #237) and reverted: any text-based +// inspection signal is spoofable, and the only non-spoofable alternative (named file +// is provably not in the packet) silently suppresses a no-engagement APPROVE, which +// breaks the fail-toward-flagging invariant. The explicit marker is the contract. +function isOutOfScopeInspectionGapLine(lower) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - if (includesAny(lower, [ + return includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1222,41 +1228,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou "not part of this packet", "not supplied", "not included in the prompt", - ])) return true; - // Foreign-path branch: a could-not-inspect gap attributed to a concrete file - // that is NOT the selected source is an out-of-scope gap, not a denial that the - // selected source itself was reviewed. Suppress ONLY when the line names such a - // foreign path, does not generically deny the selected source, does not name the - // selected path, AND the review independently proves the selected source WAS - // inspected. When in doubt, fall through to false so the line stays flaggable. - if (mentionsSelectedSourceGeneric(lower)) return false; - if (mentionsSelectedSourcePath(lower, selectedSource)) return false; - if (!selectedSourceInspected) return false; - return namesNonSelectedFileGapLine(lower, selectedSource); -} - -// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename -// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, -// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries -// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED -// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time -// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; -function namesNonSelectedFileGapLine(lower, selectedSource) { - const selectedPaths = (selectedSource?.files ?? []) - .map((file) => String(file?.path ?? "").toLowerCase()) - .filter(Boolean); - NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; - let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - while (match !== null) { - const path = match[1]; - const isSelected = selectedPaths.some((selected) => ( - selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) - )); - if (!isSelected) return true; - match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - } - return false; + ]); } function isPriorReviewCommentsGapLine(lower) { @@ -1335,14 +1307,10 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); - const selectedSourceInspected = mentionsSelectedSourceInspection( - normalizeReviewSearchText(text).toLowerCase(), - selectedSource, - ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); }) .join("\n") @@ -1444,6 +1412,11 @@ function isNegatedPermissionBlockLine(line) { ); } +// Used only by the tiny-source concise-review exemption (conciseTinyReview). This is a best-effort +// signal, NOT a security gate: a text inspection claim is spoofable, so it must never be used to +// SUPPRESS a not_reviewed flag (that was the reverted PR #237 foreign-path branch). Here it only +// grants a length exemption to an already-structured review of a <512-byte source, where a false +// claim buys nothing material. function mentionsSelectedSourceInspection(lowerText, selectedSource) { if (!includesAny(lowerText, SELECTED_SOURCE_INSPECTION_VERBS)) return false; return mentionsSelectedSourcePath(lowerText, selectedSource); @@ -1505,6 +1478,11 @@ const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|p const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch +// above handles "should not "; the past-participle impact words are a distinct surface that the +// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. +// Kept as a separate small pattern so the split-identity property test below pins every alternative. +const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; @@ -1518,6 +1496,7 @@ function clauseIsDismissal(clause) { return DISMISSAL_NEGATED_DEFECT_A.test(clause) || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) || DISMISSAL_NO_X.test(clause) diff --git a/plugins/grok/scripts/lib/review-prompt.mjs b/plugins/grok/scripts/lib/review-prompt.mjs index 1366b330..c9652b60 100644 --- a/plugins/grok/scripts/lib/review-prompt.mjs +++ b/plugins/grok/scripts/lib/review-prompt.mjs @@ -1162,7 +1162,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1206,9 +1206,15 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { +// A could-not-inspect line is suppressed ONLY when the reviewer EXPLICITLY marks it +// out of scope. Inferring out-of-scope from "a foreign file is named + the selected +// source was (claimed) inspected" was tried (PR #237) and reverted: any text-based +// inspection signal is spoofable, and the only non-spoofable alternative (named file +// is provably not in the packet) silently suppresses a no-engagement APPROVE, which +// breaks the fail-toward-flagging invariant. The explicit marker is the contract. +function isOutOfScopeInspectionGapLine(lower) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - if (includesAny(lower, [ + return includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1222,41 +1228,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou "not part of this packet", "not supplied", "not included in the prompt", - ])) return true; - // Foreign-path branch: a could-not-inspect gap attributed to a concrete file - // that is NOT the selected source is an out-of-scope gap, not a denial that the - // selected source itself was reviewed. Suppress ONLY when the line names such a - // foreign path, does not generically deny the selected source, does not name the - // selected path, AND the review independently proves the selected source WAS - // inspected. When in doubt, fall through to false so the line stays flaggable. - if (mentionsSelectedSourceGeneric(lower)) return false; - if (mentionsSelectedSourcePath(lower, selectedSource)) return false; - if (!selectedSourceInspected) return false; - return namesNonSelectedFileGapLine(lower, selectedSource); -} - -// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename -// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, -// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries -// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED -// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time -// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; -function namesNonSelectedFileGapLine(lower, selectedSource) { - const selectedPaths = (selectedSource?.files ?? []) - .map((file) => String(file?.path ?? "").toLowerCase()) - .filter(Boolean); - NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; - let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - while (match !== null) { - const path = match[1]; - const isSelected = selectedPaths.some((selected) => ( - selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) - )); - if (!isSelected) return true; - match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - } - return false; + ]); } function isPriorReviewCommentsGapLine(lower) { @@ -1335,14 +1307,10 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); - const selectedSourceInspected = mentionsSelectedSourceInspection( - normalizeReviewSearchText(text).toLowerCase(), - selectedSource, - ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); }) .join("\n") @@ -1444,6 +1412,11 @@ function isNegatedPermissionBlockLine(line) { ); } +// Used only by the tiny-source concise-review exemption (conciseTinyReview). This is a best-effort +// signal, NOT a security gate: a text inspection claim is spoofable, so it must never be used to +// SUPPRESS a not_reviewed flag (that was the reverted PR #237 foreign-path branch). Here it only +// grants a length exemption to an already-structured review of a <512-byte source, where a false +// claim buys nothing material. function mentionsSelectedSourceInspection(lowerText, selectedSource) { if (!includesAny(lowerText, SELECTED_SOURCE_INSPECTION_VERBS)) return false; return mentionsSelectedSourcePath(lowerText, selectedSource); @@ -1505,6 +1478,11 @@ const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|p const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch +// above handles "should not "; the past-participle impact words are a distinct surface that the +// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. +// Kept as a separate small pattern so the split-identity property test below pins every alternative. +const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; @@ -1518,6 +1496,7 @@ function clauseIsDismissal(clause) { return DISMISSAL_NEGATED_DEFECT_A.test(clause) || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) || DISMISSAL_NO_X.test(clause) diff --git a/plugins/kimi/scripts/lib/review-prompt.mjs b/plugins/kimi/scripts/lib/review-prompt.mjs index 1366b330..c9652b60 100644 --- a/plugins/kimi/scripts/lib/review-prompt.mjs +++ b/plugins/kimi/scripts/lib/review-prompt.mjs @@ -1162,7 +1162,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1206,9 +1206,15 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { +// A could-not-inspect line is suppressed ONLY when the reviewer EXPLICITLY marks it +// out of scope. Inferring out-of-scope from "a foreign file is named + the selected +// source was (claimed) inspected" was tried (PR #237) and reverted: any text-based +// inspection signal is spoofable, and the only non-spoofable alternative (named file +// is provably not in the packet) silently suppresses a no-engagement APPROVE, which +// breaks the fail-toward-flagging invariant. The explicit marker is the contract. +function isOutOfScopeInspectionGapLine(lower) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - if (includesAny(lower, [ + return includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1222,41 +1228,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou "not part of this packet", "not supplied", "not included in the prompt", - ])) return true; - // Foreign-path branch: a could-not-inspect gap attributed to a concrete file - // that is NOT the selected source is an out-of-scope gap, not a denial that the - // selected source itself was reviewed. Suppress ONLY when the line names such a - // foreign path, does not generically deny the selected source, does not name the - // selected path, AND the review independently proves the selected source WAS - // inspected. When in doubt, fall through to false so the line stays flaggable. - if (mentionsSelectedSourceGeneric(lower)) return false; - if (mentionsSelectedSourcePath(lower, selectedSource)) return false; - if (!selectedSourceInspected) return false; - return namesNonSelectedFileGapLine(lower, selectedSource); -} - -// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename -// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, -// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries -// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED -// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time -// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; -function namesNonSelectedFileGapLine(lower, selectedSource) { - const selectedPaths = (selectedSource?.files ?? []) - .map((file) => String(file?.path ?? "").toLowerCase()) - .filter(Boolean); - NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; - let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - while (match !== null) { - const path = match[1]; - const isSelected = selectedPaths.some((selected) => ( - selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) - )); - if (!isSelected) return true; - match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - } - return false; + ]); } function isPriorReviewCommentsGapLine(lower) { @@ -1335,14 +1307,10 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); - const selectedSourceInspected = mentionsSelectedSourceInspection( - normalizeReviewSearchText(text).toLowerCase(), - selectedSource, - ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); }) .join("\n") @@ -1444,6 +1412,11 @@ function isNegatedPermissionBlockLine(line) { ); } +// Used only by the tiny-source concise-review exemption (conciseTinyReview). This is a best-effort +// signal, NOT a security gate: a text inspection claim is spoofable, so it must never be used to +// SUPPRESS a not_reviewed flag (that was the reverted PR #237 foreign-path branch). Here it only +// grants a length exemption to an already-structured review of a <512-byte source, where a false +// claim buys nothing material. function mentionsSelectedSourceInspection(lowerText, selectedSource) { if (!includesAny(lowerText, SELECTED_SOURCE_INSPECTION_VERBS)) return false; return mentionsSelectedSourcePath(lowerText, selectedSource); @@ -1505,6 +1478,11 @@ const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|p const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch +// above handles "should not "; the past-participle impact words are a distinct surface that the +// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. +// Kept as a separate small pattern so the split-identity property test below pins every alternative. +const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; @@ -1518,6 +1496,7 @@ function clauseIsDismissal(clause) { return DISMISSAL_NEGATED_DEFECT_A.test(clause) || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) || DISMISSAL_NO_X.test(clause) diff --git a/relay/relay-gemini/scripts/lib/review-prompt.mjs b/relay/relay-gemini/scripts/lib/review-prompt.mjs index 1366b330..c9652b60 100644 --- a/relay/relay-gemini/scripts/lib/review-prompt.mjs +++ b/relay/relay-gemini/scripts/lib/review-prompt.mjs @@ -1162,7 +1162,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1206,9 +1206,15 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { +// A could-not-inspect line is suppressed ONLY when the reviewer EXPLICITLY marks it +// out of scope. Inferring out-of-scope from "a foreign file is named + the selected +// source was (claimed) inspected" was tried (PR #237) and reverted: any text-based +// inspection signal is spoofable, and the only non-spoofable alternative (named file +// is provably not in the packet) silently suppresses a no-engagement APPROVE, which +// breaks the fail-toward-flagging invariant. The explicit marker is the contract. +function isOutOfScopeInspectionGapLine(lower) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - if (includesAny(lower, [ + return includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1222,41 +1228,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou "not part of this packet", "not supplied", "not included in the prompt", - ])) return true; - // Foreign-path branch: a could-not-inspect gap attributed to a concrete file - // that is NOT the selected source is an out-of-scope gap, not a denial that the - // selected source itself was reviewed. Suppress ONLY when the line names such a - // foreign path, does not generically deny the selected source, does not name the - // selected path, AND the review independently proves the selected source WAS - // inspected. When in doubt, fall through to false so the line stays flaggable. - if (mentionsSelectedSourceGeneric(lower)) return false; - if (mentionsSelectedSourcePath(lower, selectedSource)) return false; - if (!selectedSourceInspected) return false; - return namesNonSelectedFileGapLine(lower, selectedSource); -} - -// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename -// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, -// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries -// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED -// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time -// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; -function namesNonSelectedFileGapLine(lower, selectedSource) { - const selectedPaths = (selectedSource?.files ?? []) - .map((file) => String(file?.path ?? "").toLowerCase()) - .filter(Boolean); - NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; - let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - while (match !== null) { - const path = match[1]; - const isSelected = selectedPaths.some((selected) => ( - selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) - )); - if (!isSelected) return true; - match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - } - return false; + ]); } function isPriorReviewCommentsGapLine(lower) { @@ -1335,14 +1307,10 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); - const selectedSourceInspected = mentionsSelectedSourceInspection( - normalizeReviewSearchText(text).toLowerCase(), - selectedSource, - ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); }) .join("\n") @@ -1444,6 +1412,11 @@ function isNegatedPermissionBlockLine(line) { ); } +// Used only by the tiny-source concise-review exemption (conciseTinyReview). This is a best-effort +// signal, NOT a security gate: a text inspection claim is spoofable, so it must never be used to +// SUPPRESS a not_reviewed flag (that was the reverted PR #237 foreign-path branch). Here it only +// grants a length exemption to an already-structured review of a <512-byte source, where a false +// claim buys nothing material. function mentionsSelectedSourceInspection(lowerText, selectedSource) { if (!includesAny(lowerText, SELECTED_SOURCE_INSPECTION_VERBS)) return false; return mentionsSelectedSourcePath(lowerText, selectedSource); @@ -1505,6 +1478,11 @@ const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|p const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch +// above handles "should not "; the past-participle impact words are a distinct surface that the +// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. +// Kept as a separate small pattern so the split-identity property test below pins every alternative. +const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; @@ -1518,6 +1496,7 @@ function clauseIsDismissal(clause) { return DISMISSAL_NEGATED_DEFECT_A.test(clause) || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) || DISMISSAL_NO_X.test(clause) diff --git a/relay/relay-grok/scripts/lib/review-prompt.mjs b/relay/relay-grok/scripts/lib/review-prompt.mjs index 1366b330..c9652b60 100644 --- a/relay/relay-grok/scripts/lib/review-prompt.mjs +++ b/relay/relay-grok/scripts/lib/review-prompt.mjs @@ -1162,7 +1162,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1206,9 +1206,15 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { +// A could-not-inspect line is suppressed ONLY when the reviewer EXPLICITLY marks it +// out of scope. Inferring out-of-scope from "a foreign file is named + the selected +// source was (claimed) inspected" was tried (PR #237) and reverted: any text-based +// inspection signal is spoofable, and the only non-spoofable alternative (named file +// is provably not in the packet) silently suppresses a no-engagement APPROVE, which +// breaks the fail-toward-flagging invariant. The explicit marker is the contract. +function isOutOfScopeInspectionGapLine(lower) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - if (includesAny(lower, [ + return includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1222,41 +1228,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou "not part of this packet", "not supplied", "not included in the prompt", - ])) return true; - // Foreign-path branch: a could-not-inspect gap attributed to a concrete file - // that is NOT the selected source is an out-of-scope gap, not a denial that the - // selected source itself was reviewed. Suppress ONLY when the line names such a - // foreign path, does not generically deny the selected source, does not name the - // selected path, AND the review independently proves the selected source WAS - // inspected. When in doubt, fall through to false so the line stays flaggable. - if (mentionsSelectedSourceGeneric(lower)) return false; - if (mentionsSelectedSourcePath(lower, selectedSource)) return false; - if (!selectedSourceInspected) return false; - return namesNonSelectedFileGapLine(lower, selectedSource); -} - -// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename -// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, -// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries -// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED -// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time -// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; -function namesNonSelectedFileGapLine(lower, selectedSource) { - const selectedPaths = (selectedSource?.files ?? []) - .map((file) => String(file?.path ?? "").toLowerCase()) - .filter(Boolean); - NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; - let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - while (match !== null) { - const path = match[1]; - const isSelected = selectedPaths.some((selected) => ( - selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) - )); - if (!isSelected) return true; - match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - } - return false; + ]); } function isPriorReviewCommentsGapLine(lower) { @@ -1335,14 +1307,10 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); - const selectedSourceInspected = mentionsSelectedSourceInspection( - normalizeReviewSearchText(text).toLowerCase(), - selectedSource, - ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); }) .join("\n") @@ -1444,6 +1412,11 @@ function isNegatedPermissionBlockLine(line) { ); } +// Used only by the tiny-source concise-review exemption (conciseTinyReview). This is a best-effort +// signal, NOT a security gate: a text inspection claim is spoofable, so it must never be used to +// SUPPRESS a not_reviewed flag (that was the reverted PR #237 foreign-path branch). Here it only +// grants a length exemption to an already-structured review of a <512-byte source, where a false +// claim buys nothing material. function mentionsSelectedSourceInspection(lowerText, selectedSource) { if (!includesAny(lowerText, SELECTED_SOURCE_INSPECTION_VERBS)) return false; return mentionsSelectedSourcePath(lowerText, selectedSource); @@ -1505,6 +1478,11 @@ const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|p const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch +// above handles "should not "; the past-participle impact words are a distinct surface that the +// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. +// Kept as a separate small pattern so the split-identity property test below pins every alternative. +const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; @@ -1518,6 +1496,7 @@ function clauseIsDismissal(clause) { return DISMISSAL_NEGATED_DEFECT_A.test(clause) || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) || DISMISSAL_NO_X.test(clause) diff --git a/relay/relay-kimi/scripts/lib/review-prompt.mjs b/relay/relay-kimi/scripts/lib/review-prompt.mjs index 1366b330..c9652b60 100644 --- a/relay/relay-kimi/scripts/lib/review-prompt.mjs +++ b/relay/relay-kimi/scripts/lib/review-prompt.mjs @@ -1162,7 +1162,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1206,9 +1206,15 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { +// A could-not-inspect line is suppressed ONLY when the reviewer EXPLICITLY marks it +// out of scope. Inferring out-of-scope from "a foreign file is named + the selected +// source was (claimed) inspected" was tried (PR #237) and reverted: any text-based +// inspection signal is spoofable, and the only non-spoofable alternative (named file +// is provably not in the packet) silently suppresses a no-engagement APPROVE, which +// breaks the fail-toward-flagging invariant. The explicit marker is the contract. +function isOutOfScopeInspectionGapLine(lower) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - if (includesAny(lower, [ + return includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1222,41 +1228,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou "not part of this packet", "not supplied", "not included in the prompt", - ])) return true; - // Foreign-path branch: a could-not-inspect gap attributed to a concrete file - // that is NOT the selected source is an out-of-scope gap, not a denial that the - // selected source itself was reviewed. Suppress ONLY when the line names such a - // foreign path, does not generically deny the selected source, does not name the - // selected path, AND the review independently proves the selected source WAS - // inspected. When in doubt, fall through to false so the line stays flaggable. - if (mentionsSelectedSourceGeneric(lower)) return false; - if (mentionsSelectedSourcePath(lower, selectedSource)) return false; - if (!selectedSourceInspected) return false; - return namesNonSelectedFileGapLine(lower, selectedSource); -} - -// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename -// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, -// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries -// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED -// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time -// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; -function namesNonSelectedFileGapLine(lower, selectedSource) { - const selectedPaths = (selectedSource?.files ?? []) - .map((file) => String(file?.path ?? "").toLowerCase()) - .filter(Boolean); - NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; - let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - while (match !== null) { - const path = match[1]; - const isSelected = selectedPaths.some((selected) => ( - selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) - )); - if (!isSelected) return true; - match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - } - return false; + ]); } function isPriorReviewCommentsGapLine(lower) { @@ -1335,14 +1307,10 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); - const selectedSourceInspected = mentionsSelectedSourceInspection( - normalizeReviewSearchText(text).toLowerCase(), - selectedSource, - ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); }) .join("\n") @@ -1444,6 +1412,11 @@ function isNegatedPermissionBlockLine(line) { ); } +// Used only by the tiny-source concise-review exemption (conciseTinyReview). This is a best-effort +// signal, NOT a security gate: a text inspection claim is spoofable, so it must never be used to +// SUPPRESS a not_reviewed flag (that was the reverted PR #237 foreign-path branch). Here it only +// grants a length exemption to an already-structured review of a <512-byte source, where a false +// claim buys nothing material. function mentionsSelectedSourceInspection(lowerText, selectedSource) { if (!includesAny(lowerText, SELECTED_SOURCE_INSPECTION_VERBS)) return false; return mentionsSelectedSourcePath(lowerText, selectedSource); @@ -1505,6 +1478,11 @@ const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|p const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch +// above handles "should not "; the past-participle impact words are a distinct surface that the +// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. +// Kept as a separate small pattern so the split-identity property test below pins every alternative. +const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; @@ -1518,6 +1496,7 @@ function clauseIsDismissal(clause) { return DISMISSAL_NEGATED_DEFECT_A.test(clause) || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) || DISMISSAL_NO_X.test(clause) diff --git a/scripts/lib/review-prompt.mjs b/scripts/lib/review-prompt.mjs index 1366b330..c9652b60 100644 --- a/scripts/lib/review-prompt.mjs +++ b/scripts/lib/review-prompt.mjs @@ -1162,7 +1162,7 @@ function lineDeniesSelectedSourceInspection(line, selectedSource) { if (isSelectedSourceInspectionMechanicsDiscussionLine(lower)) return false; if (isNegatedSelectedSourceInspectionAnalysisLine(lower)) return false; if (isLocalFileScopeBoundaryLine(lower)) return false; - if (isOutOfScopeInspectionGapLine(lower, selectedSource, false) && !mentionsSelectedSourceGeneric(lower)) return false; + if (isOutOfScopeInspectionGapLine(lower) && !mentionsSelectedSourceGeneric(lower)) return false; if (!includesAny(lower, ["did not inspect", "not inspected", "could not inspect", "unable to inspect"])) { return false; } @@ -1206,9 +1206,15 @@ function isSelectedSourceInspectionMechanicsDiscussionLine(lower) { ]); } -function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSourceInspected = false) { +// A could-not-inspect line is suppressed ONLY when the reviewer EXPLICITLY marks it +// out of scope. Inferring out-of-scope from "a foreign file is named + the selected +// source was (claimed) inspected" was tried (PR #237) and reverted: any text-based +// inspection signal is spoofable, and the only non-spoofable alternative (named file +// is provably not in the packet) silently suppresses a no-engagement APPROVE, which +// breaks the fail-toward-flagging invariant. The explicit marker is the contract. +function isOutOfScopeInspectionGapLine(lower) { if (!includesAny(lower, ["could not inspect", "unable to inspect", "not inspected", "not reviewed"])) return false; - if (includesAny(lower, [ + return includesAny(lower, [ "out of scope", "outside the packet", "outside of the packet", @@ -1222,41 +1228,7 @@ function isOutOfScopeInspectionGapLine(lower, selectedSource = null, selectedSou "not part of this packet", "not supplied", "not included in the prompt", - ])) return true; - // Foreign-path branch: a could-not-inspect gap attributed to a concrete file - // that is NOT the selected source is an out-of-scope gap, not a denial that the - // selected source itself was reviewed. Suppress ONLY when the line names such a - // foreign path, does not generically deny the selected source, does not name the - // selected path, AND the review independently proves the selected source WAS - // inspected. When in doubt, fall through to false so the line stays flaggable. - if (mentionsSelectedSourceGeneric(lower)) return false; - if (mentionsSelectedSourcePath(lower, selectedSource)) return false; - if (!selectedSourceInspected) return false; - return namesNonSelectedFileGapLine(lower, selectedSource); -} - -// Multi-dot filenames (webpack.config.js, index.test.ts) must tokenize whole, else a basename -// denial of a dir-prefixed selected source is mis-read as a foreign file and wrongly suppressed, -// letting a genuine not_reviewed denial bypass the gate. The optional (?:\.seg){0,8} group carries -// the interior dotted segments before the final extension. Every quantifier is UPPER-BOUNDED -// (segments {1,128}/{1,64}, depth {0,32}/{0,8}, ext {1,6}) so the token scan stays linear-time -// (S5852 / ReDoS-safe) — do NOT relax these back to *,+. -const NON_SELECTED_FILE_TOKEN_RE = /(?:^|[\s`'"([{,;:])((?:[a-z0-9_-]{1,128}\/){0,32}[a-z0-9_-]{1,128}(?:\.[a-z0-9_-]{1,64}){0,8}\.[a-z0-9]{1,6})(?=$|[\s`'")\]}.,;:])/g; -function namesNonSelectedFileGapLine(lower, selectedSource) { - const selectedPaths = (selectedSource?.files ?? []) - .map((file) => String(file?.path ?? "").toLowerCase()) - .filter(Boolean); - NON_SELECTED_FILE_TOKEN_RE.lastIndex = 0; - let match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - while (match !== null) { - const path = match[1]; - const isSelected = selectedPaths.some((selected) => ( - selected === path || selected.endsWith(`/${path}`) || path.endsWith(`/${selected}`) - )); - if (!isSelected) return true; - match = NON_SELECTED_FILE_TOKEN_RE.exec(lower); - } - return false; + ]); } function isPriorReviewCommentsGapLine(lower) { @@ -1335,14 +1307,10 @@ function semanticFailureReasons(text, looksShallow, selectedSource = null) { && !isPermissionFailureExampleLine(unmarkedLower) ); }); - const selectedSourceInspected = mentionsSelectedSourceInspection( - normalizeReviewSearchText(text).toLowerCase(), - selectedSource, - ); const semanticText = semanticLines .filter((line) => { const lower = unmarkReviewText(line).toLowerCase(); - return !isOutOfScopeInspectionGapLine(lower, selectedSource, selectedSourceInspected) + return !isOutOfScopeInspectionGapLine(lower) && !isPriorReviewCommentsGapLine(lower); }) .join("\n") @@ -1444,6 +1412,11 @@ function isNegatedPermissionBlockLine(line) { ); } +// Used only by the tiny-source concise-review exemption (conciseTinyReview). This is a best-effort +// signal, NOT a security gate: a text inspection claim is spoofable, so it must never be used to +// SUPPRESS a not_reviewed flag (that was the reverted PR #237 foreign-path branch). Here it only +// grants a length exemption to an already-structured review of a <512-byte source, where a false +// claim buys nothing material. function mentionsSelectedSourceInspection(lowerText, selectedSource) { if (!includesAny(lowerText, SELECTED_SOURCE_INSPECTION_VERBS)) return false; return mentionsSelectedSourcePath(lowerText, selectedSource); @@ -1505,6 +1478,11 @@ const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|p const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; +// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch +// above handles "should not "; the past-participle impact words are a distinct surface that the +// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. +// Kept as a separate small pattern so the split-identity property test below pins every alternative. +const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; @@ -1518,6 +1496,7 @@ function clauseIsDismissal(clause) { return DISMISSAL_NEGATED_DEFECT_A.test(clause) || DISMISSAL_NEGATED_DEFECT_B.test(clause) || DISMISSAL_SHOULD_NOT.test(clause) + || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) || DISMISSAL_NOTHING.test(clause) || DISMISSAL_ABSENCE.test(clause) || DISMISSAL_NO_X.test(clause) diff --git a/tests/unit/review-prompt.test.mjs b/tests/unit/review-prompt.test.mjs index ce3261f4..428fa5fa 100644 --- a/tests/unit/review-prompt.test.mjs +++ b/tests/unit/review-prompt.test.mjs @@ -4060,60 +4060,56 @@ test("root2 detector1: long structured APPROVE with reviewer-subject read-denial // Detector 2 — not_reviewed -------------------------------------------------- -test("root2 detector2: foreign-path inspection gaps with proven selected-source inspection do not flag", async () => { - const cases = [ - { - selected: "cart.js", - result: [ - "Verdict: APPROVE", - "Blocking findings", - "- None.", - "Non-blocking concerns", - "- The supplied cart.js diff was fully reviewed and the arithmetic is correct.", - "- I could not inspect the upstream caller in routes.js, but the diff is internally consistent.", - ROOT2_PAD, - ].join("\n"), - }, - { - selected: "src/auth/login.js", - result: [ - "Verdict: APPROVE", - "Blocking findings", - "- None.", - "Non-blocking concerns", - "- The selected source src/auth/login.js was fully inspected; the token comparison is constant-time.", - "- I was unable to inspect middleware/session.js, so its interaction is noted as a gap.", - ROOT2_PAD, - ].join("\n"), - }, - { - selected: "src/order/process.js", - result: [ - "Verdict: REQUEST CHANGES", - "Blocking findings", - "- I reviewed the full supplied source src/order/process.js and the rounding is wrong.", - "Non-blocking concerns", - "- I could not inspect config/defaults.json, but the bug is determinable from the diff.", - ROOT2_PAD, - ].join("\n"), - }, +// PR #237 added a "foreign-path" suppression branch that cleared a not_reviewed gap when the review +// named a file outside the packet AND (claimed to have) inspected the selected source. It was REVERTED: +// every text-based inspection signal is spoofable, and the only non-spoofable gate (named file is +// provably not in the packet) silently passes a no-engagement APPROVE — both break fail-toward-flagging. +// The contract is now main's: a could-not-inspect gap is suppressed ONLY when the reviewer EXPLICITLY +// marks it out of scope. This pins BOTH directions of that contract through the composed manifest path. +test("root2 detector2: a foreign-file gap flags not_reviewed unless EXPLICITLY marked out of scope", async () => { + const base = (gapLine) => [ + "Verdict: APPROVE", + "Blocking findings", + "- None.", + "Non-blocking concerns", + "- The supplied cart.js diff was fully reviewed and the arithmetic is correct.", + `- ${gapLine}`, + ROOT2_PAD, + ].join("\n"); + // No explicit marker -> the gap FLAGS (conservative, safe direction), even though the review claims + // to have inspected the selected source. A spoofable inspection claim must NOT buy suppression. + const FLAG = [ + "I could not inspect the upstream caller in routes.js, but the diff is internally consistent.", + "I was unable to inspect middleware/session.js, so its interaction is noted as a gap.", + ]; + // Explicit out-of-scope marker -> the same gap is suppressed (clean). The marker is the contract. + const SUPPRESS = [ + "I could not inspect routes.js, which is out of scope for this packet; the supplied diff was fully reviewed.", + "I could not inspect middleware/session.js, which is not part of this packet; the supplied diff was fully reviewed.", ]; for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - for (const { selected, result } of cases) { + for (const gapLine of FLAG) { const manifest = targetBuildReviewAuditManifest({ prompt: "rendered prompt", - sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], - result, + sourceFiles: [{ path: "cart.js", text: "export const value = 1;\n" }], + result: base(gapLine), status: "completed", errorCode: null, }); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), - false, - `[${name}] not_reviewed should be absent for selected=${selected}`, - ); - assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] failed_review_slot should be false for selected=${selected}`); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), true, `[${name}] unmarked foreign-file gap must flag not_reviewed: ${gapLine}`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] unmarked foreign-file gap must be failed_review_slot: ${gapLine}`); + } + for (const gapLine of SUPPRESS) { + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "cart.js", text: "export const value = 1;\n" }], + result: base(gapLine), + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), false, `[${name}] explicitly out-of-scope gap must stay suppressed: ${gapLine}`); + assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] explicitly out-of-scope gap must be clean: ${gapLine}`); } } }); @@ -4134,7 +4130,7 @@ test("root2 detector2: not-reviewed verdict still flags not_reviewed", async () }); test("root2 detector2: generic selected-source denial still flags not_reviewed", async () => { - // mentionsSelectedSourceGeneric true -> foreign-path branch bails. + // Generic "selected source ... not inspected" denial with no explicit out-of-scope marker -> flags. for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); const manifest = targetBuildReviewAuditManifest({ @@ -4155,7 +4151,7 @@ test("root2 detector2: generic selected-source denial still flags not_reviewed", }); test("root2 detector2: self-referential could-not-inspect of the selected path still flags", async () => { - // mentionsSelectedSourcePath true -> foreign-path branch bails. + // A denial naming the selected path itself, with no explicit out-of-scope marker -> flags. for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); const manifest = targetBuildReviewAuditManifest({ @@ -4176,8 +4172,7 @@ test("root2 detector2: self-referential could-not-inspect of the selected path s }); test("root2 detector2: hyphenated pass-through prose with no file token still flags", async () => { - // No file token -> namesNonSelectedFileGapLine false -> foreign-path branch - // returns false -> the bare "could not inspect" substring fires not_reviewed. + // A bare "could not inspect" with no explicit out-of-scope marker fires not_reviewed. for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); const manifest = targetBuildReviewAuditManifest({ @@ -4197,9 +4192,8 @@ test("root2 detector2: hyphenated pass-through prose with no file token still fl }); test("root2 detector2: foreign-path gap WITHOUT proven selected-source inspection still flags", async () => { - // ADVERSARIAL GUARD REGRESSION: names only a foreign path, no generic token, - // and NEVER applies an inspection verb to the selected path -> selectedSourceInspected - // false -> Edit 2a's `if (!selectedSourceInspected) return false;` blocks suppression. + // ADVERSARIAL: a hand-wave APPROVE that names a foreign file and rests the approval on the commit + // message, with no explicit out-of-scope marker, must flag not_reviewed (conservative reverted path). for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); const manifest = targetBuildReviewAuditManifest({ @@ -4432,86 +4426,6 @@ test("root2 detector3: long-but-realistic call loci still escape the shallow fla } }); -test("root2 detector2: multi-dot selected-source basename denial still flags not_reviewed (PR #237 comment 1)", async () => { - // Regression: NON_SELECTED_FILE_TOKEN_RE must tokenize multi-dot filenames whole. Otherwise a - // basename denial of a directory-prefixed multi-dot SELECTED source is mis-read as a foreign-file - // gap and wrongly suppressed, letting a genuine selected-source denial bypass not_reviewed. Both - // directions: the selected-source basename denial MUST flag; a genuine foreign multi-dot gap with - // proven selected inspection MUST stay suppressed (the safe direction is not over-corrected). - const FLAG = [ - { - selected: "config/webpack.config.js", - result: [ - "Verdict: APPROVE", - "Blocking findings", - "- None.", - "Non-blocking concerns", - "- The selected source config/webpack.config.js was fully inspected; the structure is valid.", - "- I could not inspect webpack.config.js override defaults, so that path is a gap.", - ROOT2_PAD, - ].join("\n"), - }, - { - selected: "src/index.test.ts", - result: [ - "Verdict: APPROVE", - "Blocking findings", - "- None.", - "Non-blocking concerns", - "- The selected source src/index.test.ts was fully inspected; the assertions are sound.", - "- I was unable to inspect index.test.ts fixtures, so that remains a gap.", - ROOT2_PAD, - ].join("\n"), - }, - ]; - const SUPPRESS = [ - { - selected: "src/app.js", - result: [ - "Verdict: APPROVE", - "Blocking findings", - "- None.", - "Non-blocking concerns", - "- The selected source src/app.js was fully inspected; the handler is correct.", - "- I could not inspect webpack.config.js, but the diff is internally consistent.", - ROOT2_PAD, - ].join("\n"), - }, - ]; - for (const [name, file] of REVIEW_PROMPT_MODULES) { - const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - for (const { selected, result } of FLAG) { - const manifest = targetBuildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], - result, - status: "completed", - errorCode: null, - }); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), - true, - `[${name}] not_reviewed should be present for multi-dot selected=${selected}`, - ); - assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true for multi-dot selected=${selected}`); - } - for (const { selected, result } of SUPPRESS) { - const manifest = targetBuildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], - result, - status: "completed", - errorCode: null, - }); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("not_reviewed"), - false, - `[${name}] a genuine foreign multi-dot gap must stay suppressed for selected=${selected}`, - ); - } - } -}); - test("root2 detector3: negation-bearing defect cues do not mis-flag concrete reviews as shallow (PR #237 comment 2)", async () => { // Regression: valid defect cues that contain negation words ("never called", "should not", // "does not free") must not trip CONCRETE_FINDING_NEGATION when they carry a real call locus. @@ -4604,8 +4518,13 @@ test("root2 detector1: reviewer-process blocks (contraction/paraphrase/unicode) test("root2 detector3: praise/confirmation reusing defect vocabulary flags shallow_output; real findings stay clean", async () => { // Reviewer/sweep-found false-negatives (decidable subset): "should not ", - // " as expected/promised", LGTM. Surface-undecidable residual ("throws sensibly", - // hedges) is tracked in #238 for Way-2 advisory disposition, not asserted here. + // " as expected/promised", LGTM, and the passive "should not be affected" family (F2, below). + // The positive-sentiment praise subclass ("throws sensibly", "drops cleanly", "throws a helpful + // error") is SURFACE-UNDECIDABLE — it is token-identical to a real finding ("cleanly drops the + // final page"), so a keyword classifier cannot separate them without flagging correct reviews; + // a 76-case adversarial sweep disproved the lexicon approach (14 FPs + unbounded synonym leaks). + // That subclass, and the off-lexicon dismissal tail ("harmless"/"benign"/"no real risk"), are + // tracked in #236/#238 for the Way-2 advisory-disposition redesign, not patched by enumeration. const FLAG = [ "Verdict: APPROVE\nfoo() should not be a problem", "Verdict: APPROVE\nparseConfig() should not cause issues", @@ -4635,3 +4554,46 @@ test("root2 detector3: praise/confirmation reusing defect vocabulary flags shall } } }); + +// F2 + split-identity. The a5c2868 "behavior-identical" regex split silently dropped the passive +// "should not be affected" dismissal alternative, so a hand-wave APPROVE passed as a concrete +// finding (UNSAFE). This test pins the passive family AND enumerates a canonical phrase for every +// pre-split DISMISSAL_SHOULD_NOT alternative, so a future split cannot silently narrow a dismissal +// again (the root cause that let F2 ship). Each phrase co-locates a "should not" cue with a code +// locus, so the dismissal regex is the load-bearing classifier. +test("root2 detector3 F2: passive + every pre-split should-not dismissal flags shallow_output (split-identity)", async () => { + const SHOULD_NOT_CANON = [ + // F2 passive reassurance (the dropped alternative + its impact-verb family) + "should not be affected", + "should not be impacted", + "should not be touched", + "should not be altered", + "should not be disturbed", + // pre-split copular reassurance + "should not be a problem", + "should not be an issue", + "should not be a concern", + "should not be a blocker", + "should not be a big deal", + // pre-split active branches + "should not cause problems", + "should not cause trouble", + "should not create problems", + "should not introduce a regression", + "should not regress", + "should not matter", + "should not break", + "should not hurt", + "should not harm", + "should not affect anything", + "should not affect the output", + ]; + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: target } = await loadReviewPromptModule(file); + for (const tail of SHOULD_NOT_CANON) { + const result = `Verdict: APPROVE\nfoo() ${tail}`; + const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: "export const value = 1;\n" }], result, status: "completed", errorCode: null }); + assert.equal(m.review_quality.semantic_failure_reasons.includes("shallow_output"), true, `[${name}] should-not dismissal must flag shallow_output: ${tail}`); + } + } +}); From b75e10d21ab34a822c1bd9bd09a88b8b8eef2144 Mon Sep 17 00:00:00 2001 From: Test User Date: Mon, 22 Jun 2026 17:34:37 +0900 Subject: [PATCH 09/15] fix(relay): revert leaky permission-praise suppressor; complete detector oracles PR #237 round-2 review (GLM/GPT/Claude), every finding reproduced through buildReviewAuditManifest with a main-vs-branch delta: the a2a7be1 isPermissionHandlingPraiseLine suppressor let a GENUINELY permission-blocked APPROVE pass CLEAN whenever a line also praised the code's EACCES handling. Its enumerated un-suppression guard (reviewProcessBlockedSignal) missed a cross-line "read" block, a third-person block, two-word "can not", a U+FF07 glyph, and "my open" -- 5 live false-negatives (the gravest class: un-reviewed code clearing review). Revert the suppressor subsystem entirely (isPermissionHandlingPraiseLine + codeCorrectlyHandlesPermissionError + reviewerFirstPersonBlock + reviewProcessBlockedSignal + REVIEWER_BLOCK_*; all a2a7be1-added, absent on main, consumed only by the suppressor). Post-revert any line carrying a permission literal flags permission_blocked (== main). A review that merely praises EACCES handling is now conservatively over-flagged: an accepted SAFE false positive; the precision goal is Way-2 #238. Across source + 8 synced copies. Close the MAJOR oracle-gap finding (test-only, pins EXISTING behavior): add a hasDefectCue four-way split-identity oracle (every DEFECT_CUE_* alternative escapes shallow alone) and complete SHOULD_NOT_CANON to all 9 DISMISSAL_SHOULD_NOT_PASSIVE participles (was 5/10), so a future "behavior- identical" split cannot silently drop a cue/participle. NOT shipped (deferred to Way-2 #238 with reproductions): the dismissal-lexicon completions for negated-impact reassurance / negated defect-noun / cause-synonyms. An independent adversarial sweep proved they are surface-undecidable (the F1 class): a 3-way (main / PR-baseline / candidate) verification showed they over-suppress realistic REAL findings ("no overflow guard", "should not introduce X yet it does") -- 6 over-suppression regressions vs the PR baseline. Like F1, enumeration trades dismissal-only FN fixes for real-finding FPs; routed to the advisory-disposition redesign instead. Verified: review-prompt 418/418, full suite green, coverage 85% met, sync --check rc=0, lint pass. Refs #232 #235 #236 #238 Co-Authored-By: Claude Opus 4.8 --- .../scripts/lib/review-prompt.mjs | 152 ------------------ plugins/claude/scripts/lib/review-prompt.mjs | 152 ------------------ plugins/gemini/scripts/lib/review-prompt.mjs | 152 ------------------ plugins/grok/scripts/lib/review-prompt.mjs | 152 ------------------ plugins/kimi/scripts/lib/review-prompt.mjs | 152 ------------------ .../scripts/lib/review-prompt.mjs | 152 ------------------ .../relay-grok/scripts/lib/review-prompt.mjs | 152 ------------------ .../relay-kimi/scripts/lib/review-prompt.mjs | 152 ------------------ scripts/lib/review-prompt.mjs | 152 ------------------ tests/unit/review-prompt.test.mjs | 130 ++++++++++++--- 10 files changed, 110 insertions(+), 1388 deletions(-) diff --git a/plugins/api-reviewers/scripts/lib/review-prompt.mjs b/plugins/api-reviewers/scripts/lib/review-prompt.mjs index c9652b60..81d5cb8e 100644 --- a/plugins/api-reviewers/scripts/lib/review-prompt.mjs +++ b/plugins/api-reviewers/scripts/lib/review-prompt.mjs @@ -402,7 +402,6 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, - isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -699,157 +698,6 @@ function isCodePermissionConcernLine(lower) { ]); } -// A reviewer describing reviewed code that CORRECTLY HANDLES a permission -// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting -// that the review process was itself blocked. Suppress only when the line both -// (a) praises the code's handling of the error, and (b) carries no signal that -// the REVIEW PROCESS could not read/inspect/access the selected source. When in -// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. -function isPermissionHandlingPraiseLine(lower) { - if (!includesPermissionFailureLiteral(lower)) return false; - if (!codeCorrectlyHandlesPermissionError(lower)) return false; - return !reviewProcessBlockedSignal(lower); -} - -function codeCorrectlyHandlesPermissionError(lower) { - return includesAny(lower, [ - "correctly handles", - "correctly handle", - "handles eacces", - "handle eacces", - "handles eperm", - "handle eperm", - "handles the permission", - "handle the permission", - "gracefully handles", - "gracefully handle", - "handled correctly", - "handled gracefully", - "catches the error", - "catches the permission", - "catches eacces", - "catches eperm", - "catch eacces", - "catch eperm", - "is caught", - "are caught", - "falls back", - "fall back", - "graceful fallback", - "degrades gracefully", - "surfaces a typed error", - "surfaces an error", - "returns a clear", - "returns a typed", - "is the right behavior", - "is the correct behavior", - "the right behavior", - "correct behavior", - ]); -} - -// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was -// unable to read/inspect/access the source. Mirrors the genuine-block surface -// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase -// so a real read-denial is never masked by incidental handling-praise wording. -// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one -// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the -// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken -// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs -// emit ("couldn't") match the same as straight ones. -// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ -// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / -// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block -// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." -// object alternative below. -// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a -// source object within the next ~48 chars) so each regex stays well under the complexity cap while -// preserving the positional "i/we ... cue ... target, one sentence" semantics. -// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to -// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. -const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; -const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; -const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; -function reviewerFirstPersonBlock(lower) { - for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { - const m = cue.exec(lower); - if (!m) continue; - const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); - if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; - } - return false; -} -function reviewProcessBlockedSignal(lowerRaw) { - // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") - // so the block patterns and the literal list below match a single canonical spelling. - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); - if (reviewerFirstPersonBlock(lower)) return true; - return includesAny(lower, [ - "no inspection was possible", - "could not be inspected", - "were not inspected", - "was not inspected", - "not inspected", - "could not inspect", - "cannot inspect", - "can't inspect", - "unable to inspect", - "did not inspect", - "could not access", - "cannot access", - "can't access", - "unable to access", - "prevented me", - "prevented file access", - "prevented access", - "permission block prevented", - "permission blocks prevented", - "while reading", - "while inspecting", - "i could not", - "i was unable", - "i was blocked", - "review was blocked", - "blocked from reading", - "blocked from inspecting", - "i could not read", - "i cannot read", - "i can't read", - "i was unable to read", - // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the - // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the - // source file"). A first-person read-denial is caught by the regex above or "i could not read". - // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: - // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader - // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine - // block names the artifact with one of the explicit denial verbs above or via the first-person - // regex; a bare noun is not, by itself, a review-process-block signal. - "only reviewed the diff", - "only read the diff", - "only saw the diff", - "only had the diff", - "based only on the diff", - "from the diff alone", - "diff summary", - "denied opening", - "denied reading", - "denied access to", - "review attempt hit", - "review attempt failed", - "review attempt was blocked", - "approving without reading", - "approve without reading", - "approved without reading", - "approving without inspecting", - "approved without inspecting", - "without actually reading", - "without actually inspecting", - "without ever reading", - "without ever inspecting", - ]); -} - function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) diff --git a/plugins/claude/scripts/lib/review-prompt.mjs b/plugins/claude/scripts/lib/review-prompt.mjs index c9652b60..81d5cb8e 100644 --- a/plugins/claude/scripts/lib/review-prompt.mjs +++ b/plugins/claude/scripts/lib/review-prompt.mjs @@ -402,7 +402,6 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, - isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -699,157 +698,6 @@ function isCodePermissionConcernLine(lower) { ]); } -// A reviewer describing reviewed code that CORRECTLY HANDLES a permission -// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting -// that the review process was itself blocked. Suppress only when the line both -// (a) praises the code's handling of the error, and (b) carries no signal that -// the REVIEW PROCESS could not read/inspect/access the selected source. When in -// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. -function isPermissionHandlingPraiseLine(lower) { - if (!includesPermissionFailureLiteral(lower)) return false; - if (!codeCorrectlyHandlesPermissionError(lower)) return false; - return !reviewProcessBlockedSignal(lower); -} - -function codeCorrectlyHandlesPermissionError(lower) { - return includesAny(lower, [ - "correctly handles", - "correctly handle", - "handles eacces", - "handle eacces", - "handles eperm", - "handle eperm", - "handles the permission", - "handle the permission", - "gracefully handles", - "gracefully handle", - "handled correctly", - "handled gracefully", - "catches the error", - "catches the permission", - "catches eacces", - "catches eperm", - "catch eacces", - "catch eperm", - "is caught", - "are caught", - "falls back", - "fall back", - "graceful fallback", - "degrades gracefully", - "surfaces a typed error", - "surfaces an error", - "returns a clear", - "returns a typed", - "is the right behavior", - "is the correct behavior", - "the right behavior", - "correct behavior", - ]); -} - -// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was -// unable to read/inspect/access the source. Mirrors the genuine-block surface -// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase -// so a real read-denial is never masked by incidental handling-praise wording. -// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one -// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the -// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken -// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs -// emit ("couldn't") match the same as straight ones. -// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ -// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / -// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block -// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." -// object alternative below. -// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a -// source object within the next ~48 chars) so each regex stays well under the complexity cap while -// preserving the positional "i/we ... cue ... target, one sentence" semantics. -// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to -// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. -const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; -const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; -const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; -function reviewerFirstPersonBlock(lower) { - for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { - const m = cue.exec(lower); - if (!m) continue; - const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); - if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; - } - return false; -} -function reviewProcessBlockedSignal(lowerRaw) { - // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") - // so the block patterns and the literal list below match a single canonical spelling. - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); - if (reviewerFirstPersonBlock(lower)) return true; - return includesAny(lower, [ - "no inspection was possible", - "could not be inspected", - "were not inspected", - "was not inspected", - "not inspected", - "could not inspect", - "cannot inspect", - "can't inspect", - "unable to inspect", - "did not inspect", - "could not access", - "cannot access", - "can't access", - "unable to access", - "prevented me", - "prevented file access", - "prevented access", - "permission block prevented", - "permission blocks prevented", - "while reading", - "while inspecting", - "i could not", - "i was unable", - "i was blocked", - "review was blocked", - "blocked from reading", - "blocked from inspecting", - "i could not read", - "i cannot read", - "i can't read", - "i was unable to read", - // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the - // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the - // source file"). A first-person read-denial is caught by the regex above or "i could not read". - // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: - // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader - // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine - // block names the artifact with one of the explicit denial verbs above or via the first-person - // regex; a bare noun is not, by itself, a review-process-block signal. - "only reviewed the diff", - "only read the diff", - "only saw the diff", - "only had the diff", - "based only on the diff", - "from the diff alone", - "diff summary", - "denied opening", - "denied reading", - "denied access to", - "review attempt hit", - "review attempt failed", - "review attempt was blocked", - "approving without reading", - "approve without reading", - "approved without reading", - "approving without inspecting", - "approved without inspecting", - "without actually reading", - "without actually inspecting", - "without ever reading", - "without ever inspecting", - ]); -} - function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) diff --git a/plugins/gemini/scripts/lib/review-prompt.mjs b/plugins/gemini/scripts/lib/review-prompt.mjs index c9652b60..81d5cb8e 100644 --- a/plugins/gemini/scripts/lib/review-prompt.mjs +++ b/plugins/gemini/scripts/lib/review-prompt.mjs @@ -402,7 +402,6 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, - isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -699,157 +698,6 @@ function isCodePermissionConcernLine(lower) { ]); } -// A reviewer describing reviewed code that CORRECTLY HANDLES a permission -// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting -// that the review process was itself blocked. Suppress only when the line both -// (a) praises the code's handling of the error, and (b) carries no signal that -// the REVIEW PROCESS could not read/inspect/access the selected source. When in -// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. -function isPermissionHandlingPraiseLine(lower) { - if (!includesPermissionFailureLiteral(lower)) return false; - if (!codeCorrectlyHandlesPermissionError(lower)) return false; - return !reviewProcessBlockedSignal(lower); -} - -function codeCorrectlyHandlesPermissionError(lower) { - return includesAny(lower, [ - "correctly handles", - "correctly handle", - "handles eacces", - "handle eacces", - "handles eperm", - "handle eperm", - "handles the permission", - "handle the permission", - "gracefully handles", - "gracefully handle", - "handled correctly", - "handled gracefully", - "catches the error", - "catches the permission", - "catches eacces", - "catches eperm", - "catch eacces", - "catch eperm", - "is caught", - "are caught", - "falls back", - "fall back", - "graceful fallback", - "degrades gracefully", - "surfaces a typed error", - "surfaces an error", - "returns a clear", - "returns a typed", - "is the right behavior", - "is the correct behavior", - "the right behavior", - "correct behavior", - ]); -} - -// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was -// unable to read/inspect/access the source. Mirrors the genuine-block surface -// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase -// so a real read-denial is never masked by incidental handling-praise wording. -// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one -// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the -// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken -// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs -// emit ("couldn't") match the same as straight ones. -// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ -// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / -// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block -// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." -// object alternative below. -// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a -// source object within the next ~48 chars) so each regex stays well under the complexity cap while -// preserving the positional "i/we ... cue ... target, one sentence" semantics. -// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to -// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. -const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; -const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; -const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; -function reviewerFirstPersonBlock(lower) { - for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { - const m = cue.exec(lower); - if (!m) continue; - const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); - if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; - } - return false; -} -function reviewProcessBlockedSignal(lowerRaw) { - // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") - // so the block patterns and the literal list below match a single canonical spelling. - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); - if (reviewerFirstPersonBlock(lower)) return true; - return includesAny(lower, [ - "no inspection was possible", - "could not be inspected", - "were not inspected", - "was not inspected", - "not inspected", - "could not inspect", - "cannot inspect", - "can't inspect", - "unable to inspect", - "did not inspect", - "could not access", - "cannot access", - "can't access", - "unable to access", - "prevented me", - "prevented file access", - "prevented access", - "permission block prevented", - "permission blocks prevented", - "while reading", - "while inspecting", - "i could not", - "i was unable", - "i was blocked", - "review was blocked", - "blocked from reading", - "blocked from inspecting", - "i could not read", - "i cannot read", - "i can't read", - "i was unable to read", - // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the - // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the - // source file"). A first-person read-denial is caught by the regex above or "i could not read". - // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: - // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader - // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine - // block names the artifact with one of the explicit denial verbs above or via the first-person - // regex; a bare noun is not, by itself, a review-process-block signal. - "only reviewed the diff", - "only read the diff", - "only saw the diff", - "only had the diff", - "based only on the diff", - "from the diff alone", - "diff summary", - "denied opening", - "denied reading", - "denied access to", - "review attempt hit", - "review attempt failed", - "review attempt was blocked", - "approving without reading", - "approve without reading", - "approved without reading", - "approving without inspecting", - "approved without inspecting", - "without actually reading", - "without actually inspecting", - "without ever reading", - "without ever inspecting", - ]); -} - function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) diff --git a/plugins/grok/scripts/lib/review-prompt.mjs b/plugins/grok/scripts/lib/review-prompt.mjs index c9652b60..81d5cb8e 100644 --- a/plugins/grok/scripts/lib/review-prompt.mjs +++ b/plugins/grok/scripts/lib/review-prompt.mjs @@ -402,7 +402,6 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, - isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -699,157 +698,6 @@ function isCodePermissionConcernLine(lower) { ]); } -// A reviewer describing reviewed code that CORRECTLY HANDLES a permission -// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting -// that the review process was itself blocked. Suppress only when the line both -// (a) praises the code's handling of the error, and (b) carries no signal that -// the REVIEW PROCESS could not read/inspect/access the selected source. When in -// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. -function isPermissionHandlingPraiseLine(lower) { - if (!includesPermissionFailureLiteral(lower)) return false; - if (!codeCorrectlyHandlesPermissionError(lower)) return false; - return !reviewProcessBlockedSignal(lower); -} - -function codeCorrectlyHandlesPermissionError(lower) { - return includesAny(lower, [ - "correctly handles", - "correctly handle", - "handles eacces", - "handle eacces", - "handles eperm", - "handle eperm", - "handles the permission", - "handle the permission", - "gracefully handles", - "gracefully handle", - "handled correctly", - "handled gracefully", - "catches the error", - "catches the permission", - "catches eacces", - "catches eperm", - "catch eacces", - "catch eperm", - "is caught", - "are caught", - "falls back", - "fall back", - "graceful fallback", - "degrades gracefully", - "surfaces a typed error", - "surfaces an error", - "returns a clear", - "returns a typed", - "is the right behavior", - "is the correct behavior", - "the right behavior", - "correct behavior", - ]); -} - -// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was -// unable to read/inspect/access the source. Mirrors the genuine-block surface -// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase -// so a real read-denial is never masked by incidental handling-praise wording. -// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one -// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the -// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken -// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs -// emit ("couldn't") match the same as straight ones. -// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ -// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / -// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block -// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." -// object alternative below. -// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a -// source object within the next ~48 chars) so each regex stays well under the complexity cap while -// preserving the positional "i/we ... cue ... target, one sentence" semantics. -// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to -// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. -const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; -const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; -const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; -function reviewerFirstPersonBlock(lower) { - for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { - const m = cue.exec(lower); - if (!m) continue; - const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); - if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; - } - return false; -} -function reviewProcessBlockedSignal(lowerRaw) { - // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") - // so the block patterns and the literal list below match a single canonical spelling. - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); - if (reviewerFirstPersonBlock(lower)) return true; - return includesAny(lower, [ - "no inspection was possible", - "could not be inspected", - "were not inspected", - "was not inspected", - "not inspected", - "could not inspect", - "cannot inspect", - "can't inspect", - "unable to inspect", - "did not inspect", - "could not access", - "cannot access", - "can't access", - "unable to access", - "prevented me", - "prevented file access", - "prevented access", - "permission block prevented", - "permission blocks prevented", - "while reading", - "while inspecting", - "i could not", - "i was unable", - "i was blocked", - "review was blocked", - "blocked from reading", - "blocked from inspecting", - "i could not read", - "i cannot read", - "i can't read", - "i was unable to read", - // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the - // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the - // source file"). A first-person read-denial is caught by the regex above or "i could not read". - // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: - // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader - // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine - // block names the artifact with one of the explicit denial verbs above or via the first-person - // regex; a bare noun is not, by itself, a review-process-block signal. - "only reviewed the diff", - "only read the diff", - "only saw the diff", - "only had the diff", - "based only on the diff", - "from the diff alone", - "diff summary", - "denied opening", - "denied reading", - "denied access to", - "review attempt hit", - "review attempt failed", - "review attempt was blocked", - "approving without reading", - "approve without reading", - "approved without reading", - "approving without inspecting", - "approved without inspecting", - "without actually reading", - "without actually inspecting", - "without ever reading", - "without ever inspecting", - ]); -} - function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) diff --git a/plugins/kimi/scripts/lib/review-prompt.mjs b/plugins/kimi/scripts/lib/review-prompt.mjs index c9652b60..81d5cb8e 100644 --- a/plugins/kimi/scripts/lib/review-prompt.mjs +++ b/plugins/kimi/scripts/lib/review-prompt.mjs @@ -402,7 +402,6 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, - isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -699,157 +698,6 @@ function isCodePermissionConcernLine(lower) { ]); } -// A reviewer describing reviewed code that CORRECTLY HANDLES a permission -// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting -// that the review process was itself blocked. Suppress only when the line both -// (a) praises the code's handling of the error, and (b) carries no signal that -// the REVIEW PROCESS could not read/inspect/access the selected source. When in -// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. -function isPermissionHandlingPraiseLine(lower) { - if (!includesPermissionFailureLiteral(lower)) return false; - if (!codeCorrectlyHandlesPermissionError(lower)) return false; - return !reviewProcessBlockedSignal(lower); -} - -function codeCorrectlyHandlesPermissionError(lower) { - return includesAny(lower, [ - "correctly handles", - "correctly handle", - "handles eacces", - "handle eacces", - "handles eperm", - "handle eperm", - "handles the permission", - "handle the permission", - "gracefully handles", - "gracefully handle", - "handled correctly", - "handled gracefully", - "catches the error", - "catches the permission", - "catches eacces", - "catches eperm", - "catch eacces", - "catch eperm", - "is caught", - "are caught", - "falls back", - "fall back", - "graceful fallback", - "degrades gracefully", - "surfaces a typed error", - "surfaces an error", - "returns a clear", - "returns a typed", - "is the right behavior", - "is the correct behavior", - "the right behavior", - "correct behavior", - ]); -} - -// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was -// unable to read/inspect/access the source. Mirrors the genuine-block surface -// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase -// so a real read-denial is never masked by incidental handling-praise wording. -// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one -// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the -// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken -// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs -// emit ("couldn't") match the same as straight ones. -// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ -// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / -// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block -// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." -// object alternative below. -// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a -// source object within the next ~48 chars) so each regex stays well under the complexity cap while -// preserving the positional "i/we ... cue ... target, one sentence" semantics. -// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to -// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. -const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; -const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; -const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; -function reviewerFirstPersonBlock(lower) { - for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { - const m = cue.exec(lower); - if (!m) continue; - const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); - if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; - } - return false; -} -function reviewProcessBlockedSignal(lowerRaw) { - // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") - // so the block patterns and the literal list below match a single canonical spelling. - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); - if (reviewerFirstPersonBlock(lower)) return true; - return includesAny(lower, [ - "no inspection was possible", - "could not be inspected", - "were not inspected", - "was not inspected", - "not inspected", - "could not inspect", - "cannot inspect", - "can't inspect", - "unable to inspect", - "did not inspect", - "could not access", - "cannot access", - "can't access", - "unable to access", - "prevented me", - "prevented file access", - "prevented access", - "permission block prevented", - "permission blocks prevented", - "while reading", - "while inspecting", - "i could not", - "i was unable", - "i was blocked", - "review was blocked", - "blocked from reading", - "blocked from inspecting", - "i could not read", - "i cannot read", - "i can't read", - "i was unable to read", - // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the - // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the - // source file"). A first-person read-denial is caught by the regex above or "i could not read". - // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: - // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader - // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine - // block names the artifact with one of the explicit denial verbs above or via the first-person - // regex; a bare noun is not, by itself, a review-process-block signal. - "only reviewed the diff", - "only read the diff", - "only saw the diff", - "only had the diff", - "based only on the diff", - "from the diff alone", - "diff summary", - "denied opening", - "denied reading", - "denied access to", - "review attempt hit", - "review attempt failed", - "review attempt was blocked", - "approving without reading", - "approve without reading", - "approved without reading", - "approving without inspecting", - "approved without inspecting", - "without actually reading", - "without actually inspecting", - "without ever reading", - "without ever inspecting", - ]); -} - function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) diff --git a/relay/relay-gemini/scripts/lib/review-prompt.mjs b/relay/relay-gemini/scripts/lib/review-prompt.mjs index c9652b60..81d5cb8e 100644 --- a/relay/relay-gemini/scripts/lib/review-prompt.mjs +++ b/relay/relay-gemini/scripts/lib/review-prompt.mjs @@ -402,7 +402,6 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, - isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -699,157 +698,6 @@ function isCodePermissionConcernLine(lower) { ]); } -// A reviewer describing reviewed code that CORRECTLY HANDLES a permission -// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting -// that the review process was itself blocked. Suppress only when the line both -// (a) praises the code's handling of the error, and (b) carries no signal that -// the REVIEW PROCESS could not read/inspect/access the selected source. When in -// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. -function isPermissionHandlingPraiseLine(lower) { - if (!includesPermissionFailureLiteral(lower)) return false; - if (!codeCorrectlyHandlesPermissionError(lower)) return false; - return !reviewProcessBlockedSignal(lower); -} - -function codeCorrectlyHandlesPermissionError(lower) { - return includesAny(lower, [ - "correctly handles", - "correctly handle", - "handles eacces", - "handle eacces", - "handles eperm", - "handle eperm", - "handles the permission", - "handle the permission", - "gracefully handles", - "gracefully handle", - "handled correctly", - "handled gracefully", - "catches the error", - "catches the permission", - "catches eacces", - "catches eperm", - "catch eacces", - "catch eperm", - "is caught", - "are caught", - "falls back", - "fall back", - "graceful fallback", - "degrades gracefully", - "surfaces a typed error", - "surfaces an error", - "returns a clear", - "returns a typed", - "is the right behavior", - "is the correct behavior", - "the right behavior", - "correct behavior", - ]); -} - -// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was -// unable to read/inspect/access the source. Mirrors the genuine-block surface -// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase -// so a real read-denial is never masked by incidental handling-praise wording. -// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one -// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the -// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken -// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs -// emit ("couldn't") match the same as straight ones. -// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ -// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / -// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block -// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." -// object alternative below. -// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a -// source object within the next ~48 chars) so each regex stays well under the complexity cap while -// preserving the positional "i/we ... cue ... target, one sentence" semantics. -// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to -// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. -const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; -const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; -const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; -function reviewerFirstPersonBlock(lower) { - for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { - const m = cue.exec(lower); - if (!m) continue; - const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); - if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; - } - return false; -} -function reviewProcessBlockedSignal(lowerRaw) { - // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") - // so the block patterns and the literal list below match a single canonical spelling. - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); - if (reviewerFirstPersonBlock(lower)) return true; - return includesAny(lower, [ - "no inspection was possible", - "could not be inspected", - "were not inspected", - "was not inspected", - "not inspected", - "could not inspect", - "cannot inspect", - "can't inspect", - "unable to inspect", - "did not inspect", - "could not access", - "cannot access", - "can't access", - "unable to access", - "prevented me", - "prevented file access", - "prevented access", - "permission block prevented", - "permission blocks prevented", - "while reading", - "while inspecting", - "i could not", - "i was unable", - "i was blocked", - "review was blocked", - "blocked from reading", - "blocked from inspecting", - "i could not read", - "i cannot read", - "i can't read", - "i was unable to read", - // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the - // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the - // source file"). A first-person read-denial is caught by the regex above or "i could not read". - // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: - // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader - // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine - // block names the artifact with one of the explicit denial verbs above or via the first-person - // regex; a bare noun is not, by itself, a review-process-block signal. - "only reviewed the diff", - "only read the diff", - "only saw the diff", - "only had the diff", - "based only on the diff", - "from the diff alone", - "diff summary", - "denied opening", - "denied reading", - "denied access to", - "review attempt hit", - "review attempt failed", - "review attempt was blocked", - "approving without reading", - "approve without reading", - "approved without reading", - "approving without inspecting", - "approved without inspecting", - "without actually reading", - "without actually inspecting", - "without ever reading", - "without ever inspecting", - ]); -} - function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) diff --git a/relay/relay-grok/scripts/lib/review-prompt.mjs b/relay/relay-grok/scripts/lib/review-prompt.mjs index c9652b60..81d5cb8e 100644 --- a/relay/relay-grok/scripts/lib/review-prompt.mjs +++ b/relay/relay-grok/scripts/lib/review-prompt.mjs @@ -402,7 +402,6 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, - isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -699,157 +698,6 @@ function isCodePermissionConcernLine(lower) { ]); } -// A reviewer describing reviewed code that CORRECTLY HANDLES a permission -// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting -// that the review process was itself blocked. Suppress only when the line both -// (a) praises the code's handling of the error, and (b) carries no signal that -// the REVIEW PROCESS could not read/inspect/access the selected source. When in -// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. -function isPermissionHandlingPraiseLine(lower) { - if (!includesPermissionFailureLiteral(lower)) return false; - if (!codeCorrectlyHandlesPermissionError(lower)) return false; - return !reviewProcessBlockedSignal(lower); -} - -function codeCorrectlyHandlesPermissionError(lower) { - return includesAny(lower, [ - "correctly handles", - "correctly handle", - "handles eacces", - "handle eacces", - "handles eperm", - "handle eperm", - "handles the permission", - "handle the permission", - "gracefully handles", - "gracefully handle", - "handled correctly", - "handled gracefully", - "catches the error", - "catches the permission", - "catches eacces", - "catches eperm", - "catch eacces", - "catch eperm", - "is caught", - "are caught", - "falls back", - "fall back", - "graceful fallback", - "degrades gracefully", - "surfaces a typed error", - "surfaces an error", - "returns a clear", - "returns a typed", - "is the right behavior", - "is the correct behavior", - "the right behavior", - "correct behavior", - ]); -} - -// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was -// unable to read/inspect/access the source. Mirrors the genuine-block surface -// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase -// so a real read-denial is never masked by incidental handling-praise wording. -// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one -// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the -// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken -// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs -// emit ("couldn't") match the same as straight ones. -// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ -// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / -// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block -// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." -// object alternative below. -// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a -// source object within the next ~48 chars) so each regex stays well under the complexity cap while -// preserving the positional "i/we ... cue ... target, one sentence" semantics. -// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to -// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. -const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; -const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; -const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; -function reviewerFirstPersonBlock(lower) { - for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { - const m = cue.exec(lower); - if (!m) continue; - const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); - if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; - } - return false; -} -function reviewProcessBlockedSignal(lowerRaw) { - // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") - // so the block patterns and the literal list below match a single canonical spelling. - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); - if (reviewerFirstPersonBlock(lower)) return true; - return includesAny(lower, [ - "no inspection was possible", - "could not be inspected", - "were not inspected", - "was not inspected", - "not inspected", - "could not inspect", - "cannot inspect", - "can't inspect", - "unable to inspect", - "did not inspect", - "could not access", - "cannot access", - "can't access", - "unable to access", - "prevented me", - "prevented file access", - "prevented access", - "permission block prevented", - "permission blocks prevented", - "while reading", - "while inspecting", - "i could not", - "i was unable", - "i was blocked", - "review was blocked", - "blocked from reading", - "blocked from inspecting", - "i could not read", - "i cannot read", - "i can't read", - "i was unable to read", - // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the - // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the - // source file"). A first-person read-denial is caught by the regex above or "i could not read". - // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: - // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader - // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine - // block names the artifact with one of the explicit denial verbs above or via the first-person - // regex; a bare noun is not, by itself, a review-process-block signal. - "only reviewed the diff", - "only read the diff", - "only saw the diff", - "only had the diff", - "based only on the diff", - "from the diff alone", - "diff summary", - "denied opening", - "denied reading", - "denied access to", - "review attempt hit", - "review attempt failed", - "review attempt was blocked", - "approving without reading", - "approve without reading", - "approved without reading", - "approving without inspecting", - "approved without inspecting", - "without actually reading", - "without actually inspecting", - "without ever reading", - "without ever inspecting", - ]); -} - function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) diff --git a/relay/relay-kimi/scripts/lib/review-prompt.mjs b/relay/relay-kimi/scripts/lib/review-prompt.mjs index c9652b60..81d5cb8e 100644 --- a/relay/relay-kimi/scripts/lib/review-prompt.mjs +++ b/relay/relay-kimi/scripts/lib/review-prompt.mjs @@ -402,7 +402,6 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, - isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -699,157 +698,6 @@ function isCodePermissionConcernLine(lower) { ]); } -// A reviewer describing reviewed code that CORRECTLY HANDLES a permission -// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting -// that the review process was itself blocked. Suppress only when the line both -// (a) praises the code's handling of the error, and (b) carries no signal that -// the REVIEW PROCESS could not read/inspect/access the selected source. When in -// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. -function isPermissionHandlingPraiseLine(lower) { - if (!includesPermissionFailureLiteral(lower)) return false; - if (!codeCorrectlyHandlesPermissionError(lower)) return false; - return !reviewProcessBlockedSignal(lower); -} - -function codeCorrectlyHandlesPermissionError(lower) { - return includesAny(lower, [ - "correctly handles", - "correctly handle", - "handles eacces", - "handle eacces", - "handles eperm", - "handle eperm", - "handles the permission", - "handle the permission", - "gracefully handles", - "gracefully handle", - "handled correctly", - "handled gracefully", - "catches the error", - "catches the permission", - "catches eacces", - "catches eperm", - "catch eacces", - "catch eperm", - "is caught", - "are caught", - "falls back", - "fall back", - "graceful fallback", - "degrades gracefully", - "surfaces a typed error", - "surfaces an error", - "returns a clear", - "returns a typed", - "is the right behavior", - "is the correct behavior", - "the right behavior", - "correct behavior", - ]); -} - -// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was -// unable to read/inspect/access the source. Mirrors the genuine-block surface -// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase -// so a real read-denial is never masked by incidental handling-praise wording. -// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one -// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the -// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken -// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs -// emit ("couldn't") match the same as straight ones. -// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ -// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / -// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block -// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." -// object alternative below. -// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a -// source object within the next ~48 chars) so each regex stays well under the complexity cap while -// preserving the positional "i/we ... cue ... target, one sentence" semantics. -// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to -// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. -const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; -const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; -const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; -function reviewerFirstPersonBlock(lower) { - for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { - const m = cue.exec(lower); - if (!m) continue; - const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); - if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; - } - return false; -} -function reviewProcessBlockedSignal(lowerRaw) { - // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") - // so the block patterns and the literal list below match a single canonical spelling. - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); - if (reviewerFirstPersonBlock(lower)) return true; - return includesAny(lower, [ - "no inspection was possible", - "could not be inspected", - "were not inspected", - "was not inspected", - "not inspected", - "could not inspect", - "cannot inspect", - "can't inspect", - "unable to inspect", - "did not inspect", - "could not access", - "cannot access", - "can't access", - "unable to access", - "prevented me", - "prevented file access", - "prevented access", - "permission block prevented", - "permission blocks prevented", - "while reading", - "while inspecting", - "i could not", - "i was unable", - "i was blocked", - "review was blocked", - "blocked from reading", - "blocked from inspecting", - "i could not read", - "i cannot read", - "i can't read", - "i was unable to read", - // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the - // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the - // source file"). A first-person read-denial is caught by the regex above or "i could not read". - // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: - // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader - // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine - // block names the artifact with one of the explicit denial verbs above or via the first-person - // regex; a bare noun is not, by itself, a review-process-block signal. - "only reviewed the diff", - "only read the diff", - "only saw the diff", - "only had the diff", - "based only on the diff", - "from the diff alone", - "diff summary", - "denied opening", - "denied reading", - "denied access to", - "review attempt hit", - "review attempt failed", - "review attempt was blocked", - "approving without reading", - "approve without reading", - "approved without reading", - "approving without inspecting", - "approved without inspecting", - "without actually reading", - "without actually inspecting", - "without ever reading", - "without ever inspecting", - ]); -} - function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) diff --git a/scripts/lib/review-prompt.mjs b/scripts/lib/review-prompt.mjs index c9652b60..81d5cb8e 100644 --- a/scripts/lib/review-prompt.mjs +++ b/scripts/lib/review-prompt.mjs @@ -402,7 +402,6 @@ const PERMISSION_FAILURE_EXAMPLE_DETECTORS = Object.freeze([ isPermissionClassifierFixtureLine, isPermissionTermExampleLine, isCodePermissionConcernLine, - isPermissionHandlingPraiseLine, ]); function isPermissionFailureExampleLine(lower) { @@ -699,157 +698,6 @@ function isCodePermissionConcernLine(lower) { ]); } -// A reviewer describing reviewed code that CORRECTLY HANDLES a permission -// error (EACCES/EPERM/"permission denied") is reviewing behavior, not reporting -// that the review process was itself blocked. Suppress only when the line both -// (a) praises the code's handling of the error, and (b) carries no signal that -// the REVIEW PROCESS could not read/inspect/access the selected source. When in -// doubt — i.e. any review-process-blocked phrase is present — do NOT suppress. -function isPermissionHandlingPraiseLine(lower) { - if (!includesPermissionFailureLiteral(lower)) return false; - if (!codeCorrectlyHandlesPermissionError(lower)) return false; - return !reviewProcessBlockedSignal(lower); -} - -function codeCorrectlyHandlesPermissionError(lower) { - return includesAny(lower, [ - "correctly handles", - "correctly handle", - "handles eacces", - "handle eacces", - "handles eperm", - "handle eperm", - "handles the permission", - "handle the permission", - "gracefully handles", - "gracefully handle", - "handled correctly", - "handled gracefully", - "catches the error", - "catches the permission", - "catches eacces", - "catches eperm", - "catch eacces", - "catch eperm", - "is caught", - "are caught", - "falls back", - "fall back", - "graceful fallback", - "degrades gracefully", - "surfaces a typed error", - "surfaces an error", - "returns a clear", - "returns a typed", - "is the right behavior", - "is the correct behavior", - "the right behavior", - "correct behavior", - ]); -} - -// Signals that the REVIEW PROCESS (the reviewer), not the reviewed code, was -// unable to read/inspect/access the source. Mirrors the genuine-block surface -// used by lineDeniesSelectedSourceInspection / hasConcretePermissionActionPhrase -// so a real read-denial is never masked by incidental handling-praise wording. -// FIRST-PERSON reviewer block: "i/we" + a no-inspection cue + an inspection verb, within one -// sentence ([^.\n], bounded for linear time). Anchored to i/we so a THIRD-PERSON description of the -// reviewed CODE ("the loader handles EACCES when it cannot read the source file") is NOT mistaken -// for a blocked review process. Apostrophes are normalized first so the curly contractions LLMs -// emit ("couldn't") match the same as straight ones. -// NOTE: the inspection verbs are deliberately INSPECTION-specific (inspect/read/open/access/examine/ -// load/verify/confirm). The perception verbs see/saw/view are EXCLUDED — "i did not see any issues" / -// "we never saw a crash" are no-finding PRAISE, not review-process blocks. A genuine perception block -// names the artifact ("i never saw the source"), which still matches via the "the source/file/..." -// object alternative below. -// Split into a CUE pattern (i/we + a no-inspection cue) and a TARGET pattern (inspection verb or a -// source object within the next ~48 chars) so each regex stays well under the complexity cap while -// preserving the positional "i/we ... cue ... target, one sentence" semantics. -// Cue + target are each split in two (kept under the complexity cap). Contractions are expanded to -// full forms by the normalizer below, so the cue patterns need no apostrophe-variant branches. -const REVIEWER_BLOCK_CUE_A = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:could not|cannot|did not|never|only)\b/i; -const REVIEWER_BLOCK_CUE_B = /\b(?:i|we)\b[^.\n]{0,30}?\b(?:was not able|were not able|unable to|lacked? access|no access)\b/i; -const REVIEWER_BLOCK_TARGET_VERB = /\b(?:inspect|read|open|access|examine|verif|confirm|load)(?:ed|ing|y|ies|ied)?\b/i; -const REVIEWER_BLOCK_TARGET_OBJ = /\bthe (?:diff|source|file|selected|module|contents)\b/i; -function reviewerFirstPersonBlock(lower) { - for (const cue of [REVIEWER_BLOCK_CUE_A, REVIEWER_BLOCK_CUE_B]) { - const m = cue.exec(lower); - if (!m) continue; - const tail = lower.slice(m.index + m[0].length, m.index + m[0].length + 48); - if (REVIEWER_BLOCK_TARGET_VERB.test(tail) || REVIEWER_BLOCK_TARGET_OBJ.test(tail)) return true; - } - return false; -} -function reviewProcessBlockedSignal(lowerRaw) { - // Normalize curly apostrophes, then expand contractions to full forms ("couldn't" -> "could not") - // so the block patterns and the literal list below match a single canonical spelling. - const lower = String(lowerRaw ?? "").replace(/[‘’ʼ′]/g, "'").replace(/\bcan't\b/g, "cannot").replace(/n't\b/g, " not"); - if (reviewerFirstPersonBlock(lower)) return true; - return includesAny(lower, [ - "no inspection was possible", - "could not be inspected", - "were not inspected", - "was not inspected", - "not inspected", - "could not inspect", - "cannot inspect", - "can't inspect", - "unable to inspect", - "did not inspect", - "could not access", - "cannot access", - "can't access", - "unable to access", - "prevented me", - "prevented file access", - "prevented access", - "permission block prevented", - "permission blocks prevented", - "while reading", - "while inspecting", - "i could not", - "i was unable", - "i was blocked", - "review was blocked", - "blocked from reading", - "blocked from inspecting", - "i could not read", - "i cannot read", - "i can't read", - "i was unable to read", - // NOTE: non-first-person "could/cannot read the source/file/selected" were also REMOVED for the - // same reason as the bare nouns — they fire on third-person code praise ("it cannot read the - // source file"). A first-person read-denial is caught by the regex above or "i could not read". - // NOTE: bare artifact nouns ("source file"/"selected source"/"target file"/...) were REMOVED: - // alone they fired on code-handling PRAISE that merely names the reviewed artifact ("the loader - // correctly handles EACCES when it cannot read the source file"), a false positive. A genuine - // block names the artifact with one of the explicit denial verbs above or via the first-person - // regex; a bare noun is not, by itself, a review-process-block signal. - "only reviewed the diff", - "only read the diff", - "only saw the diff", - "only had the diff", - "based only on the diff", - "from the diff alone", - "diff summary", - "denied opening", - "denied reading", - "denied access to", - "review attempt hit", - "review attempt failed", - "review attempt was blocked", - "approving without reading", - "approve without reading", - "approved without reading", - "approving without inspecting", - "approved without inspecting", - "without actually reading", - "without actually inspecting", - "without ever reading", - "without ever inspecting", - ]); -} - function isPermissionLiteralListLine(lower) { return (permissionFailureCodeTokenCount(lower) >= 2 || isQuotedPermissionLiteralListLine(lower)) && !hasConcretePermissionActionPhrase(lower) diff --git a/tests/unit/review-prompt.test.mjs b/tests/unit/review-prompt.test.mjs index 428fa5fa..e9c1c2e0 100644 --- a/tests/unit/review-prompt.test.mjs +++ b/tests/unit/review-prompt.test.mjs @@ -3975,17 +3975,42 @@ const ROOT2_PAD = [ // coverage gate satisfied. targetBuildReviewAuditManifest is the per-module // entry point; `name` is appended to every assert message for per-copy attribution. -test("root2 detector1: handling-praise reviews do not flag permission_blocked", async () => { - const cases = [ +test("root2 detector1: code-handling praise with a permission literal conservatively flags permission_blocked (suppressor reverted -> #238)", async () => { + // The a2a7be1 isPermissionHandlingPraiseLine suppressor was REVERTED: its enumerated un-suppression + // guard leaked, letting a genuinely permission-blocked APPROVE pass CLEAN (5 reproductions: cross-line + // "read", third-person, "can not", U+FF07 glyph, "my open"). Post-revert any line carrying a permission + // LITERAL flags permission_blocked (== main behavior) — failing toward flagging. Reviews that merely + // PRAISE the code's EACCES handling are now over-flagged: an ACCEPTED false positive (SAFE direction); + // the precision goal (don't flag code-praise) is deferred to the Way-2 advisory disposition in #238. + const FLAGS = [ "Verdict: Approve\nThe new code correctly handles EACCES when it cannot read the optional config file: it catches the error, logs a warning, and falls back to documented defaults.", "Verdict: Approve\nThe writer correctly catches EPERM thrown by fs.writeSync and surfaces a typed error to the caller instead of crashing.", "Verdict: Approve\nThe diff returns a clear 'permission denied' message to the API client when the user lacks the scope, which is the right behavior.", - // GUARD REGRESSION: reviewed-process subject must stay unflagged after Edit 1b. "Verdict: Approve\nWhen the process cannot read the optional cache file it raises EACCES; the new code catches the error and falls back gracefully, which is the correct behavior.", ]; + // No permission LITERAL present -> NOT a permission_blocked signal: we do not over-flag every mention + // of error handling, only lines carrying EACCES/EPERM/"permission denied". + const CLEAN = [ + "Verdict: Approve\nThe loader gracefully handles missing-file errors and falls back to defaults, which is the right behavior; I inspected the selected source and confirmed it is correct.", + ]; for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - for (const result of cases) { + for (const result of FLAGS) { + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "src/example.js", text: "export const value = 1;\n" }], + result: `${result}\n${ROOT2_PAD}`, + status: "completed", + errorCode: null, + }); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("permission_blocked"), + true, + `[${name}] permission_blocked should be present (conservative) for: ${result}`, + ); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true for: ${result}`); + } + for (const result of CLEAN) { const manifest = targetBuildReviewAuditManifest({ prompt: "rendered prompt", sourceFiles: [{ path: "src/example.js", text: "export const value = 1;\n" }], @@ -3996,9 +4021,8 @@ test("root2 detector1: handling-praise reviews do not flag permission_blocked", assert.equal( manifest.review_quality.semantic_failure_reasons.includes("permission_blocked"), false, - `[${name}] permission_blocked should be absent for: ${result}`, + `[${name}] no permission literal -> permission_blocked absent for: ${result}`, ); - assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] failed_review_slot should be false for: ${result}`); } } }); @@ -4484,8 +4508,9 @@ const PERM_PAD = [ test("root2 detector1: reviewer-process blocks (contraction/paraphrase/unicode) still flag permission_blocked", async () => { // Reviewer-found false-negatives (Kimi/GPT). Each carries a permission literal AND a genuine - // first-person read-block; handling-praise must NOT suppress it. (No-literal blocks are a - // separate, pre-existing raising-detector gap tracked in #238, not asserted here.) + // first-person read-block. After the praise-suppressor revert (#238) any permission literal flags + // permission_blocked, so these flag a fortiori (the suppressor that previously hid them is gone). + // (No-literal blocks remain a pre-existing raising-detector gap tracked in #238, not asserted here.) const FLAG = [ "Verdict: APPROVE\nThe code correctly handles EACCES. I couldn't inspect the actual file due to the error.", "Verdict: APPROVE\nThe code correctly handles permission denied. I wasn't able to inspect the source.", @@ -4495,13 +4520,6 @@ test("root2 detector1: reviewer-process blocks (contraction/paraphrase/unicode) "Verdict: APPROVE\nPermission denied handling falls back well, yet I lacked access to verify against the real file.", "Verdict: APPROVE\nThe review attempt hit EACCES permission denied opening src/auth.js; the code correctly handles EACCES by falling back.", ]; - // Pure code-handling praise that names the artifact / observes no findings must STAY suppressed. - const CLEAN = [ - "Verdict: APPROVE\nThe code correctly handles EACCES by falling back to defaults, which is the right behavior.", - "Verdict: APPROVE\nThe loader correctly handles EACCES when it cannot read the source file, returning a clear typed error.", - "Verdict: APPROVE\nI inspected the selected source and confirmed the code correctly handles EACCES on the config read.", - "Verdict: APPROVE\nI did not see any issues; EACCES is handled correctly by the loader and falls back cleanly.", - ]; for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: target } = await loadReviewPromptModule(file); for (const body of FLAG) { @@ -4509,10 +4527,6 @@ test("root2 detector1: reviewer-process blocks (contraction/paraphrase/unicode) assert.equal(m.review_quality.semantic_failure_reasons.includes("permission_blocked"), true, `[${name}] permission_blocked should be present for: ${body}`); assert.equal(m.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true for: ${body}`); } - for (const body of CLEAN) { - const m = target({ prompt: "p", sourceFiles: [{ path: "src/config-loader.js", text: "export const value = 1;\n" }], result: `${body}\n${PERM_PAD}`, status: "completed", errorCode: null }); - assert.equal(m.review_quality.failed_review_slot, false, `[${name}] code-handling praise must stay clean for: ${body}`); - } } }); @@ -4563,12 +4577,17 @@ test("root2 detector3: praise/confirmation reusing defect vocabulary flags shall // locus, so the dismissal regex is the load-bearing classifier. test("root2 detector3 F2: passive + every pre-split should-not dismissal flags shallow_output (split-identity)", async () => { const SHOULD_NOT_CANON = [ - // F2 passive reassurance (the dropped alternative + its impact-verb family) + // F2 passive reassurance — EVERY impact participle in IMPACT_REASSURANCE_NEG (all 9, was 5/10). "should not be affected", "should not be impacted", "should not be touched", "should not be altered", "should not be disturbed", + "should not be changed", + "should not be disrupted", + "should not be noticeable", + "should not be visible", + "should not be a factor", // pre-split copular reassurance "should not be a problem", "should not be an issue", @@ -4597,3 +4616,74 @@ test("root2 detector3 F2: passive + every pre-split should-not dismissal flags s } } }); + +// --- PR #237 review round 2 (GLM/GPT/Claude): fixes verified through buildReviewAuditManifest --- + +// B1 revert reproductions. The a2a7be1 permission-praise suppressor let a GENUINELY blocked APPROVE +// pass CLEAN whenever a line also praised the code's EACCES handling, because its enumerated +// un-suppression guard missed: a cross-line "read" block, a third-person block, two-word "can not", +// a fullwidth-apostrophe (U+FF07) glyph, and "my open". All five carry a permission literal, so after +// the revert they flag permission_blocked (== main). Fails if the suppressor is ever re-introduced. +test("root2 detector1: genuinely-blocked APPROVE with EACCES handling-praise flags permission_blocked (suppressor-revert regressions)", async () => { + const PAD = [ + "The structured review body has normal sections and enough neutral detail to clear the shallow length threshold.", + "Base and head metadata were considered while keeping blocking and non-blocking sections separate here.", + "This padding is neutral and carries no permission, inspection, or defect trigger wording of its own.", + ].join("\n"); + const FLAG = [ + ["The code correctly handles EACCES by falling back to a typed error on line 7.", + "I could not read the selected source file, so I am approving based on the diff alone."].join("\n"), + "The handler correctly handles EACCES, but the sandbox prevented inspection so the selected source was never opened during this review.", + "I can not read the selected source due to EACCES, yet the code correctly handles EACCES and degrades gracefully.", + "The code correctly handles EACCES by falling back, but I couldn't inspect the selected source.", + "The loader hit EACCES on my open of the selected source; since the code correctly handles EACCES I am approving anyway.", + ]; + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: target } = await loadReviewPromptModule(file); + for (const body of FLAG) { + const result = `Verdict: APPROVE\nBlocking findings\n- None.\n${body}\n${PAD}`; + const m = target({ prompt: "p", sourceFiles: [{ path: "src/auth.js", text: "export const value = 1;\n" }], result, status: "completed", errorCode: null }); + assert.equal(m.review_quality.semantic_failure_reasons.includes("permission_blocked"), true, `[${name}] permission_blocked must be present for: ${body}`); + assert.equal(m.review_quality.failed_review_slot, true, `[${name}] failed_review_slot must be true for: ${body}`); + } + } +}); + +// hasDefectCue four-way split oracle: each DEFECT_CUE_* alternative, used ALONE next to a code locus, +// must register a concrete finding (looks_shallow:false). If a future "behavior-identical" split drops +// any alternative, the corresponding case flips to shallow and this fails (the MAJOR test-gap closed). +test("root2 detector3: hasDefectCue split-identity — every defect-cue alternative escapes shallow alone", async () => { + const CUES = [ + "uses a global instead of the injected client", + "mutates the array rather than copying it", + "should return the count", + "fails to close the handle", + "does not free the slot", + "has an off-by-one", + "has a null deref", + "has a use-after-free", + "has a race condition", + "can overflow", + "can underflow", + "is incorrect", + "returns the wrong index", + "uses the wrong order", + "subtracts one too many", + "adds to the wrong bucket", + "drops the last element", + "leaks the buffer", + "swallows the error", + "throws on empty input", + "is never called", + "is never awaited", + "is never closed", + ]; + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: target } = await loadReviewPromptModule(file); + for (const cue of CUES) { + const result = `Verdict: REQUEST CHANGES\nnextPage() ${cue}.`; + const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: "export const value = 1;\n" }], result, status: "completed", errorCode: null }); + assert.equal(m.review_quality.looks_shallow, false, `[${name}] defect cue must register a concrete finding: ${cue}`); + } + } +}); From 32cd266b6e801bbc83ae268c7392a2b3e184e003 Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 25 Jun 2026 07:11:08 +0900 Subject: [PATCH 10/15] fix: ground concise review loci in selected source --- plugins/agy/scripts/lib/review-prompt.mjs | 123 +++++++++++++++- .../scripts/lib/review-prompt.mjs | 123 +++++++++++++++- plugins/claude/scripts/lib/review-prompt.mjs | 123 +++++++++++++++- plugins/gemini/scripts/lib/review-prompt.mjs | 123 +++++++++++++++- plugins/grok/scripts/lib/review-prompt.mjs | 123 +++++++++++++++- plugins/kimi/scripts/lib/review-prompt.mjs | 123 +++++++++++++++- relay/relay-agy/scripts/lib/review-prompt.mjs | 123 +++++++++++++++- .../scripts/lib/review-prompt.mjs | 123 +++++++++++++++- .../relay-grok/scripts/lib/review-prompt.mjs | 123 +++++++++++++++- .../relay-kimi/scripts/lib/review-prompt.mjs | 123 +++++++++++++++- scripts/lib/review-prompt.mjs | 123 +++++++++++++++- tests/unit/review-prompt.test.mjs | 137 ++++++++++++++++-- 12 files changed, 1393 insertions(+), 97 deletions(-) diff --git a/plugins/agy/scripts/lib/review-prompt.mjs b/plugins/agy/scripts/lib/review-prompt.mjs index 41540ce0..1156f038 100644 --- a/plugins/agy/scripts/lib/review-prompt.mjs +++ b/plugins/agy/scripts/lib/review-prompt.mjs @@ -100,6 +100,45 @@ function sourceManifest(sourceFiles = []) { }); } +const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; + +function sourcePathBasename(path) { + const value = String(path ?? ""); + const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); + return slash === -1 ? value : value.slice(slash + 1); +} + +function sourcePathStem(basename) { + const value = String(basename ?? ""); + const dot = value.lastIndexOf("."); + return dot > 0 ? value.slice(0, dot) : value; +} + +function sourceSymbolIndex(sourceFiles = []) { + const files = Array.isArray(sourceFiles) ? sourceFiles : []; + if (files.length === 0) return null; + const identifiers = new Set(); + const basenames = new Set(); + for (const file of files) { + const basename = sourcePathBasename(file?.path).toLowerCase(); + if (basename) { + basenames.add(basename); + const stem = sourcePathStem(basename); + if (stem) basenames.add(stem); + } + const content = contentBuffer(file); + const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); + SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; + for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { + identifiers.add(match[0].toLowerCase()); + } + } + return Object.freeze({ + identifiers: Object.freeze(identifiers), + basenames: Object.freeze(basenames), + }); +} + function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1299,10 +1338,13 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. +const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) + || (code >= 65 && code <= 90) + || (code >= 97 && code <= 122) + || char === "_" + || char === "$" + || char === "-"; +} + +function isInternalCodeTokenPeriod(text, index) { + return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); +} + +function splitReviewClauses(text) { + const clauses = []; + let start = 0; + for (let index = 0; index < text.length; index += 1) { + const char = text[index]; + const split = char === "\n" + || char === ";" + || char === "!" + || char === "?" + || (char === "." && !isInternalCodeTokenPeriod(text, index)); + if (split) { + clauses.push(text.slice(start, index)); + start = index + 1; + } + } + clauses.push(text.slice(start)); + return clauses; +} + +function hasSourceGrounding(sourceSymbols) { + return sourceSymbols !== null && typeof sourceSymbols === "object"; +} + +function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { + const shouldGround = hasSourceGrounding(sourceSymbols); + for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { + if (!shouldGround) return true; + const basename = sourcePathBasename(match[1]).toLowerCase(); + if (sourceSymbols.basenames?.has(basename)) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { + if (!shouldGround) return true; + if (sourceSymbols.identifiers?.has(match[1].toLowerCase())) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { + if (!shouldGround) return true; + // Member loci are grounded without retaining source text: both sides must be + // attested as identifiers somewhere in the selected source packet. + if ( + sourceSymbols.identifiers?.has(match[1].toLowerCase()) + && sourceSymbols.identifiers?.has(match[2].toLowerCase()) + ) { + return true; + } + } + return false; +} + +function hasConcreteFinding(text, sourceSymbols = null) { const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); + const clauses = splitReviewClauses(value); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1400,6 +1505,7 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, + sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1424,7 +1530,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1476,6 +1582,7 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); + const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1537,7 +1644,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/plugins/api-reviewers/scripts/lib/review-prompt.mjs b/plugins/api-reviewers/scripts/lib/review-prompt.mjs index 41540ce0..1156f038 100644 --- a/plugins/api-reviewers/scripts/lib/review-prompt.mjs +++ b/plugins/api-reviewers/scripts/lib/review-prompt.mjs @@ -100,6 +100,45 @@ function sourceManifest(sourceFiles = []) { }); } +const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; + +function sourcePathBasename(path) { + const value = String(path ?? ""); + const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); + return slash === -1 ? value : value.slice(slash + 1); +} + +function sourcePathStem(basename) { + const value = String(basename ?? ""); + const dot = value.lastIndexOf("."); + return dot > 0 ? value.slice(0, dot) : value; +} + +function sourceSymbolIndex(sourceFiles = []) { + const files = Array.isArray(sourceFiles) ? sourceFiles : []; + if (files.length === 0) return null; + const identifiers = new Set(); + const basenames = new Set(); + for (const file of files) { + const basename = sourcePathBasename(file?.path).toLowerCase(); + if (basename) { + basenames.add(basename); + const stem = sourcePathStem(basename); + if (stem) basenames.add(stem); + } + const content = contentBuffer(file); + const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); + SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; + for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { + identifiers.add(match[0].toLowerCase()); + } + } + return Object.freeze({ + identifiers: Object.freeze(identifiers), + basenames: Object.freeze(basenames), + }); +} + function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1299,10 +1338,13 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. +const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) + || (code >= 65 && code <= 90) + || (code >= 97 && code <= 122) + || char === "_" + || char === "$" + || char === "-"; +} + +function isInternalCodeTokenPeriod(text, index) { + return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); +} + +function splitReviewClauses(text) { + const clauses = []; + let start = 0; + for (let index = 0; index < text.length; index += 1) { + const char = text[index]; + const split = char === "\n" + || char === ";" + || char === "!" + || char === "?" + || (char === "." && !isInternalCodeTokenPeriod(text, index)); + if (split) { + clauses.push(text.slice(start, index)); + start = index + 1; + } + } + clauses.push(text.slice(start)); + return clauses; +} + +function hasSourceGrounding(sourceSymbols) { + return sourceSymbols !== null && typeof sourceSymbols === "object"; +} + +function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { + const shouldGround = hasSourceGrounding(sourceSymbols); + for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { + if (!shouldGround) return true; + const basename = sourcePathBasename(match[1]).toLowerCase(); + if (sourceSymbols.basenames?.has(basename)) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { + if (!shouldGround) return true; + if (sourceSymbols.identifiers?.has(match[1].toLowerCase())) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { + if (!shouldGround) return true; + // Member loci are grounded without retaining source text: both sides must be + // attested as identifiers somewhere in the selected source packet. + if ( + sourceSymbols.identifiers?.has(match[1].toLowerCase()) + && sourceSymbols.identifiers?.has(match[2].toLowerCase()) + ) { + return true; + } + } + return false; +} + +function hasConcreteFinding(text, sourceSymbols = null) { const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); + const clauses = splitReviewClauses(value); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1400,6 +1505,7 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, + sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1424,7 +1530,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1476,6 +1582,7 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); + const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1537,7 +1644,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/plugins/claude/scripts/lib/review-prompt.mjs b/plugins/claude/scripts/lib/review-prompt.mjs index 41540ce0..1156f038 100644 --- a/plugins/claude/scripts/lib/review-prompt.mjs +++ b/plugins/claude/scripts/lib/review-prompt.mjs @@ -100,6 +100,45 @@ function sourceManifest(sourceFiles = []) { }); } +const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; + +function sourcePathBasename(path) { + const value = String(path ?? ""); + const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); + return slash === -1 ? value : value.slice(slash + 1); +} + +function sourcePathStem(basename) { + const value = String(basename ?? ""); + const dot = value.lastIndexOf("."); + return dot > 0 ? value.slice(0, dot) : value; +} + +function sourceSymbolIndex(sourceFiles = []) { + const files = Array.isArray(sourceFiles) ? sourceFiles : []; + if (files.length === 0) return null; + const identifiers = new Set(); + const basenames = new Set(); + for (const file of files) { + const basename = sourcePathBasename(file?.path).toLowerCase(); + if (basename) { + basenames.add(basename); + const stem = sourcePathStem(basename); + if (stem) basenames.add(stem); + } + const content = contentBuffer(file); + const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); + SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; + for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { + identifiers.add(match[0].toLowerCase()); + } + } + return Object.freeze({ + identifiers: Object.freeze(identifiers), + basenames: Object.freeze(basenames), + }); +} + function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1299,10 +1338,13 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. +const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) + || (code >= 65 && code <= 90) + || (code >= 97 && code <= 122) + || char === "_" + || char === "$" + || char === "-"; +} + +function isInternalCodeTokenPeriod(text, index) { + return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); +} + +function splitReviewClauses(text) { + const clauses = []; + let start = 0; + for (let index = 0; index < text.length; index += 1) { + const char = text[index]; + const split = char === "\n" + || char === ";" + || char === "!" + || char === "?" + || (char === "." && !isInternalCodeTokenPeriod(text, index)); + if (split) { + clauses.push(text.slice(start, index)); + start = index + 1; + } + } + clauses.push(text.slice(start)); + return clauses; +} + +function hasSourceGrounding(sourceSymbols) { + return sourceSymbols !== null && typeof sourceSymbols === "object"; +} + +function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { + const shouldGround = hasSourceGrounding(sourceSymbols); + for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { + if (!shouldGround) return true; + const basename = sourcePathBasename(match[1]).toLowerCase(); + if (sourceSymbols.basenames?.has(basename)) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { + if (!shouldGround) return true; + if (sourceSymbols.identifiers?.has(match[1].toLowerCase())) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { + if (!shouldGround) return true; + // Member loci are grounded without retaining source text: both sides must be + // attested as identifiers somewhere in the selected source packet. + if ( + sourceSymbols.identifiers?.has(match[1].toLowerCase()) + && sourceSymbols.identifiers?.has(match[2].toLowerCase()) + ) { + return true; + } + } + return false; +} + +function hasConcreteFinding(text, sourceSymbols = null) { const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); + const clauses = splitReviewClauses(value); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1400,6 +1505,7 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, + sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1424,7 +1530,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1476,6 +1582,7 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); + const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1537,7 +1644,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/plugins/gemini/scripts/lib/review-prompt.mjs b/plugins/gemini/scripts/lib/review-prompt.mjs index 41540ce0..1156f038 100644 --- a/plugins/gemini/scripts/lib/review-prompt.mjs +++ b/plugins/gemini/scripts/lib/review-prompt.mjs @@ -100,6 +100,45 @@ function sourceManifest(sourceFiles = []) { }); } +const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; + +function sourcePathBasename(path) { + const value = String(path ?? ""); + const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); + return slash === -1 ? value : value.slice(slash + 1); +} + +function sourcePathStem(basename) { + const value = String(basename ?? ""); + const dot = value.lastIndexOf("."); + return dot > 0 ? value.slice(0, dot) : value; +} + +function sourceSymbolIndex(sourceFiles = []) { + const files = Array.isArray(sourceFiles) ? sourceFiles : []; + if (files.length === 0) return null; + const identifiers = new Set(); + const basenames = new Set(); + for (const file of files) { + const basename = sourcePathBasename(file?.path).toLowerCase(); + if (basename) { + basenames.add(basename); + const stem = sourcePathStem(basename); + if (stem) basenames.add(stem); + } + const content = contentBuffer(file); + const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); + SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; + for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { + identifiers.add(match[0].toLowerCase()); + } + } + return Object.freeze({ + identifiers: Object.freeze(identifiers), + basenames: Object.freeze(basenames), + }); +} + function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1299,10 +1338,13 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. +const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) + || (code >= 65 && code <= 90) + || (code >= 97 && code <= 122) + || char === "_" + || char === "$" + || char === "-"; +} + +function isInternalCodeTokenPeriod(text, index) { + return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); +} + +function splitReviewClauses(text) { + const clauses = []; + let start = 0; + for (let index = 0; index < text.length; index += 1) { + const char = text[index]; + const split = char === "\n" + || char === ";" + || char === "!" + || char === "?" + || (char === "." && !isInternalCodeTokenPeriod(text, index)); + if (split) { + clauses.push(text.slice(start, index)); + start = index + 1; + } + } + clauses.push(text.slice(start)); + return clauses; +} + +function hasSourceGrounding(sourceSymbols) { + return sourceSymbols !== null && typeof sourceSymbols === "object"; +} + +function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { + const shouldGround = hasSourceGrounding(sourceSymbols); + for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { + if (!shouldGround) return true; + const basename = sourcePathBasename(match[1]).toLowerCase(); + if (sourceSymbols.basenames?.has(basename)) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { + if (!shouldGround) return true; + if (sourceSymbols.identifiers?.has(match[1].toLowerCase())) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { + if (!shouldGround) return true; + // Member loci are grounded without retaining source text: both sides must be + // attested as identifiers somewhere in the selected source packet. + if ( + sourceSymbols.identifiers?.has(match[1].toLowerCase()) + && sourceSymbols.identifiers?.has(match[2].toLowerCase()) + ) { + return true; + } + } + return false; +} + +function hasConcreteFinding(text, sourceSymbols = null) { const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); + const clauses = splitReviewClauses(value); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1400,6 +1505,7 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, + sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1424,7 +1530,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1476,6 +1582,7 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); + const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1537,7 +1644,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/plugins/grok/scripts/lib/review-prompt.mjs b/plugins/grok/scripts/lib/review-prompt.mjs index 41540ce0..1156f038 100644 --- a/plugins/grok/scripts/lib/review-prompt.mjs +++ b/plugins/grok/scripts/lib/review-prompt.mjs @@ -100,6 +100,45 @@ function sourceManifest(sourceFiles = []) { }); } +const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; + +function sourcePathBasename(path) { + const value = String(path ?? ""); + const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); + return slash === -1 ? value : value.slice(slash + 1); +} + +function sourcePathStem(basename) { + const value = String(basename ?? ""); + const dot = value.lastIndexOf("."); + return dot > 0 ? value.slice(0, dot) : value; +} + +function sourceSymbolIndex(sourceFiles = []) { + const files = Array.isArray(sourceFiles) ? sourceFiles : []; + if (files.length === 0) return null; + const identifiers = new Set(); + const basenames = new Set(); + for (const file of files) { + const basename = sourcePathBasename(file?.path).toLowerCase(); + if (basename) { + basenames.add(basename); + const stem = sourcePathStem(basename); + if (stem) basenames.add(stem); + } + const content = contentBuffer(file); + const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); + SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; + for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { + identifiers.add(match[0].toLowerCase()); + } + } + return Object.freeze({ + identifiers: Object.freeze(identifiers), + basenames: Object.freeze(basenames), + }); +} + function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1299,10 +1338,13 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. +const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) + || (code >= 65 && code <= 90) + || (code >= 97 && code <= 122) + || char === "_" + || char === "$" + || char === "-"; +} + +function isInternalCodeTokenPeriod(text, index) { + return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); +} + +function splitReviewClauses(text) { + const clauses = []; + let start = 0; + for (let index = 0; index < text.length; index += 1) { + const char = text[index]; + const split = char === "\n" + || char === ";" + || char === "!" + || char === "?" + || (char === "." && !isInternalCodeTokenPeriod(text, index)); + if (split) { + clauses.push(text.slice(start, index)); + start = index + 1; + } + } + clauses.push(text.slice(start)); + return clauses; +} + +function hasSourceGrounding(sourceSymbols) { + return sourceSymbols !== null && typeof sourceSymbols === "object"; +} + +function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { + const shouldGround = hasSourceGrounding(sourceSymbols); + for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { + if (!shouldGround) return true; + const basename = sourcePathBasename(match[1]).toLowerCase(); + if (sourceSymbols.basenames?.has(basename)) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { + if (!shouldGround) return true; + if (sourceSymbols.identifiers?.has(match[1].toLowerCase())) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { + if (!shouldGround) return true; + // Member loci are grounded without retaining source text: both sides must be + // attested as identifiers somewhere in the selected source packet. + if ( + sourceSymbols.identifiers?.has(match[1].toLowerCase()) + && sourceSymbols.identifiers?.has(match[2].toLowerCase()) + ) { + return true; + } + } + return false; +} + +function hasConcreteFinding(text, sourceSymbols = null) { const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); + const clauses = splitReviewClauses(value); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1400,6 +1505,7 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, + sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1424,7 +1530,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1476,6 +1582,7 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); + const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1537,7 +1644,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/plugins/kimi/scripts/lib/review-prompt.mjs b/plugins/kimi/scripts/lib/review-prompt.mjs index 41540ce0..1156f038 100644 --- a/plugins/kimi/scripts/lib/review-prompt.mjs +++ b/plugins/kimi/scripts/lib/review-prompt.mjs @@ -100,6 +100,45 @@ function sourceManifest(sourceFiles = []) { }); } +const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; + +function sourcePathBasename(path) { + const value = String(path ?? ""); + const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); + return slash === -1 ? value : value.slice(slash + 1); +} + +function sourcePathStem(basename) { + const value = String(basename ?? ""); + const dot = value.lastIndexOf("."); + return dot > 0 ? value.slice(0, dot) : value; +} + +function sourceSymbolIndex(sourceFiles = []) { + const files = Array.isArray(sourceFiles) ? sourceFiles : []; + if (files.length === 0) return null; + const identifiers = new Set(); + const basenames = new Set(); + for (const file of files) { + const basename = sourcePathBasename(file?.path).toLowerCase(); + if (basename) { + basenames.add(basename); + const stem = sourcePathStem(basename); + if (stem) basenames.add(stem); + } + const content = contentBuffer(file); + const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); + SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; + for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { + identifiers.add(match[0].toLowerCase()); + } + } + return Object.freeze({ + identifiers: Object.freeze(identifiers), + basenames: Object.freeze(basenames), + }); +} + function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1299,10 +1338,13 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. +const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) + || (code >= 65 && code <= 90) + || (code >= 97 && code <= 122) + || char === "_" + || char === "$" + || char === "-"; +} + +function isInternalCodeTokenPeriod(text, index) { + return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); +} + +function splitReviewClauses(text) { + const clauses = []; + let start = 0; + for (let index = 0; index < text.length; index += 1) { + const char = text[index]; + const split = char === "\n" + || char === ";" + || char === "!" + || char === "?" + || (char === "." && !isInternalCodeTokenPeriod(text, index)); + if (split) { + clauses.push(text.slice(start, index)); + start = index + 1; + } + } + clauses.push(text.slice(start)); + return clauses; +} + +function hasSourceGrounding(sourceSymbols) { + return sourceSymbols !== null && typeof sourceSymbols === "object"; +} + +function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { + const shouldGround = hasSourceGrounding(sourceSymbols); + for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { + if (!shouldGround) return true; + const basename = sourcePathBasename(match[1]).toLowerCase(); + if (sourceSymbols.basenames?.has(basename)) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { + if (!shouldGround) return true; + if (sourceSymbols.identifiers?.has(match[1].toLowerCase())) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { + if (!shouldGround) return true; + // Member loci are grounded without retaining source text: both sides must be + // attested as identifiers somewhere in the selected source packet. + if ( + sourceSymbols.identifiers?.has(match[1].toLowerCase()) + && sourceSymbols.identifiers?.has(match[2].toLowerCase()) + ) { + return true; + } + } + return false; +} + +function hasConcreteFinding(text, sourceSymbols = null) { const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); + const clauses = splitReviewClauses(value); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1400,6 +1505,7 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, + sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1424,7 +1530,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1476,6 +1582,7 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); + const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1537,7 +1644,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/relay/relay-agy/scripts/lib/review-prompt.mjs b/relay/relay-agy/scripts/lib/review-prompt.mjs index 41540ce0..1156f038 100644 --- a/relay/relay-agy/scripts/lib/review-prompt.mjs +++ b/relay/relay-agy/scripts/lib/review-prompt.mjs @@ -100,6 +100,45 @@ function sourceManifest(sourceFiles = []) { }); } +const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; + +function sourcePathBasename(path) { + const value = String(path ?? ""); + const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); + return slash === -1 ? value : value.slice(slash + 1); +} + +function sourcePathStem(basename) { + const value = String(basename ?? ""); + const dot = value.lastIndexOf("."); + return dot > 0 ? value.slice(0, dot) : value; +} + +function sourceSymbolIndex(sourceFiles = []) { + const files = Array.isArray(sourceFiles) ? sourceFiles : []; + if (files.length === 0) return null; + const identifiers = new Set(); + const basenames = new Set(); + for (const file of files) { + const basename = sourcePathBasename(file?.path).toLowerCase(); + if (basename) { + basenames.add(basename); + const stem = sourcePathStem(basename); + if (stem) basenames.add(stem); + } + const content = contentBuffer(file); + const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); + SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; + for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { + identifiers.add(match[0].toLowerCase()); + } + } + return Object.freeze({ + identifiers: Object.freeze(identifiers), + basenames: Object.freeze(basenames), + }); +} + function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1299,10 +1338,13 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. +const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) + || (code >= 65 && code <= 90) + || (code >= 97 && code <= 122) + || char === "_" + || char === "$" + || char === "-"; +} + +function isInternalCodeTokenPeriod(text, index) { + return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); +} + +function splitReviewClauses(text) { + const clauses = []; + let start = 0; + for (let index = 0; index < text.length; index += 1) { + const char = text[index]; + const split = char === "\n" + || char === ";" + || char === "!" + || char === "?" + || (char === "." && !isInternalCodeTokenPeriod(text, index)); + if (split) { + clauses.push(text.slice(start, index)); + start = index + 1; + } + } + clauses.push(text.slice(start)); + return clauses; +} + +function hasSourceGrounding(sourceSymbols) { + return sourceSymbols !== null && typeof sourceSymbols === "object"; +} + +function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { + const shouldGround = hasSourceGrounding(sourceSymbols); + for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { + if (!shouldGround) return true; + const basename = sourcePathBasename(match[1]).toLowerCase(); + if (sourceSymbols.basenames?.has(basename)) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { + if (!shouldGround) return true; + if (sourceSymbols.identifiers?.has(match[1].toLowerCase())) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { + if (!shouldGround) return true; + // Member loci are grounded without retaining source text: both sides must be + // attested as identifiers somewhere in the selected source packet. + if ( + sourceSymbols.identifiers?.has(match[1].toLowerCase()) + && sourceSymbols.identifiers?.has(match[2].toLowerCase()) + ) { + return true; + } + } + return false; +} + +function hasConcreteFinding(text, sourceSymbols = null) { const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); + const clauses = splitReviewClauses(value); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1400,6 +1505,7 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, + sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1424,7 +1530,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1476,6 +1582,7 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); + const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1537,7 +1644,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/relay/relay-gemini/scripts/lib/review-prompt.mjs b/relay/relay-gemini/scripts/lib/review-prompt.mjs index 41540ce0..1156f038 100644 --- a/relay/relay-gemini/scripts/lib/review-prompt.mjs +++ b/relay/relay-gemini/scripts/lib/review-prompt.mjs @@ -100,6 +100,45 @@ function sourceManifest(sourceFiles = []) { }); } +const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; + +function sourcePathBasename(path) { + const value = String(path ?? ""); + const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); + return slash === -1 ? value : value.slice(slash + 1); +} + +function sourcePathStem(basename) { + const value = String(basename ?? ""); + const dot = value.lastIndexOf("."); + return dot > 0 ? value.slice(0, dot) : value; +} + +function sourceSymbolIndex(sourceFiles = []) { + const files = Array.isArray(sourceFiles) ? sourceFiles : []; + if (files.length === 0) return null; + const identifiers = new Set(); + const basenames = new Set(); + for (const file of files) { + const basename = sourcePathBasename(file?.path).toLowerCase(); + if (basename) { + basenames.add(basename); + const stem = sourcePathStem(basename); + if (stem) basenames.add(stem); + } + const content = contentBuffer(file); + const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); + SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; + for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { + identifiers.add(match[0].toLowerCase()); + } + } + return Object.freeze({ + identifiers: Object.freeze(identifiers), + basenames: Object.freeze(basenames), + }); +} + function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1299,10 +1338,13 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. +const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) + || (code >= 65 && code <= 90) + || (code >= 97 && code <= 122) + || char === "_" + || char === "$" + || char === "-"; +} + +function isInternalCodeTokenPeriod(text, index) { + return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); +} + +function splitReviewClauses(text) { + const clauses = []; + let start = 0; + for (let index = 0; index < text.length; index += 1) { + const char = text[index]; + const split = char === "\n" + || char === ";" + || char === "!" + || char === "?" + || (char === "." && !isInternalCodeTokenPeriod(text, index)); + if (split) { + clauses.push(text.slice(start, index)); + start = index + 1; + } + } + clauses.push(text.slice(start)); + return clauses; +} + +function hasSourceGrounding(sourceSymbols) { + return sourceSymbols !== null && typeof sourceSymbols === "object"; +} + +function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { + const shouldGround = hasSourceGrounding(sourceSymbols); + for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { + if (!shouldGround) return true; + const basename = sourcePathBasename(match[1]).toLowerCase(); + if (sourceSymbols.basenames?.has(basename)) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { + if (!shouldGround) return true; + if (sourceSymbols.identifiers?.has(match[1].toLowerCase())) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { + if (!shouldGround) return true; + // Member loci are grounded without retaining source text: both sides must be + // attested as identifiers somewhere in the selected source packet. + if ( + sourceSymbols.identifiers?.has(match[1].toLowerCase()) + && sourceSymbols.identifiers?.has(match[2].toLowerCase()) + ) { + return true; + } + } + return false; +} + +function hasConcreteFinding(text, sourceSymbols = null) { const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); + const clauses = splitReviewClauses(value); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1400,6 +1505,7 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, + sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1424,7 +1530,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1476,6 +1582,7 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); + const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1537,7 +1644,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/relay/relay-grok/scripts/lib/review-prompt.mjs b/relay/relay-grok/scripts/lib/review-prompt.mjs index 41540ce0..1156f038 100644 --- a/relay/relay-grok/scripts/lib/review-prompt.mjs +++ b/relay/relay-grok/scripts/lib/review-prompt.mjs @@ -100,6 +100,45 @@ function sourceManifest(sourceFiles = []) { }); } +const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; + +function sourcePathBasename(path) { + const value = String(path ?? ""); + const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); + return slash === -1 ? value : value.slice(slash + 1); +} + +function sourcePathStem(basename) { + const value = String(basename ?? ""); + const dot = value.lastIndexOf("."); + return dot > 0 ? value.slice(0, dot) : value; +} + +function sourceSymbolIndex(sourceFiles = []) { + const files = Array.isArray(sourceFiles) ? sourceFiles : []; + if (files.length === 0) return null; + const identifiers = new Set(); + const basenames = new Set(); + for (const file of files) { + const basename = sourcePathBasename(file?.path).toLowerCase(); + if (basename) { + basenames.add(basename); + const stem = sourcePathStem(basename); + if (stem) basenames.add(stem); + } + const content = contentBuffer(file); + const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); + SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; + for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { + identifiers.add(match[0].toLowerCase()); + } + } + return Object.freeze({ + identifiers: Object.freeze(identifiers), + basenames: Object.freeze(basenames), + }); +} + function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1299,10 +1338,13 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. +const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) + || (code >= 65 && code <= 90) + || (code >= 97 && code <= 122) + || char === "_" + || char === "$" + || char === "-"; +} + +function isInternalCodeTokenPeriod(text, index) { + return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); +} + +function splitReviewClauses(text) { + const clauses = []; + let start = 0; + for (let index = 0; index < text.length; index += 1) { + const char = text[index]; + const split = char === "\n" + || char === ";" + || char === "!" + || char === "?" + || (char === "." && !isInternalCodeTokenPeriod(text, index)); + if (split) { + clauses.push(text.slice(start, index)); + start = index + 1; + } + } + clauses.push(text.slice(start)); + return clauses; +} + +function hasSourceGrounding(sourceSymbols) { + return sourceSymbols !== null && typeof sourceSymbols === "object"; +} + +function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { + const shouldGround = hasSourceGrounding(sourceSymbols); + for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { + if (!shouldGround) return true; + const basename = sourcePathBasename(match[1]).toLowerCase(); + if (sourceSymbols.basenames?.has(basename)) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { + if (!shouldGround) return true; + if (sourceSymbols.identifiers?.has(match[1].toLowerCase())) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { + if (!shouldGround) return true; + // Member loci are grounded without retaining source text: both sides must be + // attested as identifiers somewhere in the selected source packet. + if ( + sourceSymbols.identifiers?.has(match[1].toLowerCase()) + && sourceSymbols.identifiers?.has(match[2].toLowerCase()) + ) { + return true; + } + } + return false; +} + +function hasConcreteFinding(text, sourceSymbols = null) { const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); + const clauses = splitReviewClauses(value); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1400,6 +1505,7 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, + sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1424,7 +1530,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1476,6 +1582,7 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); + const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1537,7 +1644,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/relay/relay-kimi/scripts/lib/review-prompt.mjs b/relay/relay-kimi/scripts/lib/review-prompt.mjs index 41540ce0..1156f038 100644 --- a/relay/relay-kimi/scripts/lib/review-prompt.mjs +++ b/relay/relay-kimi/scripts/lib/review-prompt.mjs @@ -100,6 +100,45 @@ function sourceManifest(sourceFiles = []) { }); } +const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; + +function sourcePathBasename(path) { + const value = String(path ?? ""); + const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); + return slash === -1 ? value : value.slice(slash + 1); +} + +function sourcePathStem(basename) { + const value = String(basename ?? ""); + const dot = value.lastIndexOf("."); + return dot > 0 ? value.slice(0, dot) : value; +} + +function sourceSymbolIndex(sourceFiles = []) { + const files = Array.isArray(sourceFiles) ? sourceFiles : []; + if (files.length === 0) return null; + const identifiers = new Set(); + const basenames = new Set(); + for (const file of files) { + const basename = sourcePathBasename(file?.path).toLowerCase(); + if (basename) { + basenames.add(basename); + const stem = sourcePathStem(basename); + if (stem) basenames.add(stem); + } + const content = contentBuffer(file); + const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); + SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; + for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { + identifiers.add(match[0].toLowerCase()); + } + } + return Object.freeze({ + identifiers: Object.freeze(identifiers), + basenames: Object.freeze(basenames), + }); +} + function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1299,10 +1338,13 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. +const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) + || (code >= 65 && code <= 90) + || (code >= 97 && code <= 122) + || char === "_" + || char === "$" + || char === "-"; +} + +function isInternalCodeTokenPeriod(text, index) { + return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); +} + +function splitReviewClauses(text) { + const clauses = []; + let start = 0; + for (let index = 0; index < text.length; index += 1) { + const char = text[index]; + const split = char === "\n" + || char === ";" + || char === "!" + || char === "?" + || (char === "." && !isInternalCodeTokenPeriod(text, index)); + if (split) { + clauses.push(text.slice(start, index)); + start = index + 1; + } + } + clauses.push(text.slice(start)); + return clauses; +} + +function hasSourceGrounding(sourceSymbols) { + return sourceSymbols !== null && typeof sourceSymbols === "object"; +} + +function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { + const shouldGround = hasSourceGrounding(sourceSymbols); + for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { + if (!shouldGround) return true; + const basename = sourcePathBasename(match[1]).toLowerCase(); + if (sourceSymbols.basenames?.has(basename)) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { + if (!shouldGround) return true; + if (sourceSymbols.identifiers?.has(match[1].toLowerCase())) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { + if (!shouldGround) return true; + // Member loci are grounded without retaining source text: both sides must be + // attested as identifiers somewhere in the selected source packet. + if ( + sourceSymbols.identifiers?.has(match[1].toLowerCase()) + && sourceSymbols.identifiers?.has(match[2].toLowerCase()) + ) { + return true; + } + } + return false; +} + +function hasConcreteFinding(text, sourceSymbols = null) { const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); + const clauses = splitReviewClauses(value); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1400,6 +1505,7 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, + sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1424,7 +1530,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1476,6 +1582,7 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); + const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1537,7 +1644,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/scripts/lib/review-prompt.mjs b/scripts/lib/review-prompt.mjs index 41540ce0..1156f038 100644 --- a/scripts/lib/review-prompt.mjs +++ b/scripts/lib/review-prompt.mjs @@ -100,6 +100,45 @@ function sourceManifest(sourceFiles = []) { }); } +const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; + +function sourcePathBasename(path) { + const value = String(path ?? ""); + const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); + return slash === -1 ? value : value.slice(slash + 1); +} + +function sourcePathStem(basename) { + const value = String(basename ?? ""); + const dot = value.lastIndexOf("."); + return dot > 0 ? value.slice(0, dot) : value; +} + +function sourceSymbolIndex(sourceFiles = []) { + const files = Array.isArray(sourceFiles) ? sourceFiles : []; + if (files.length === 0) return null; + const identifiers = new Set(); + const basenames = new Set(); + for (const file of files) { + const basename = sourcePathBasename(file?.path).toLowerCase(); + if (basename) { + basenames.add(basename); + const stem = sourcePathStem(basename); + if (stem) basenames.add(stem); + } + const content = contentBuffer(file); + const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); + SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; + for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { + identifiers.add(match[0].toLowerCase()); + } + } + return Object.freeze({ + identifiers: Object.freeze(identifiers), + basenames: Object.freeze(basenames), + }); +} + function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1299,10 +1338,13 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. +const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) + || (code >= 65 && code <= 90) + || (code >= 97 && code <= 122) + || char === "_" + || char === "$" + || char === "-"; +} + +function isInternalCodeTokenPeriod(text, index) { + return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); +} + +function splitReviewClauses(text) { + const clauses = []; + let start = 0; + for (let index = 0; index < text.length; index += 1) { + const char = text[index]; + const split = char === "\n" + || char === ";" + || char === "!" + || char === "?" + || (char === "." && !isInternalCodeTokenPeriod(text, index)); + if (split) { + clauses.push(text.slice(start, index)); + start = index + 1; + } + } + clauses.push(text.slice(start)); + return clauses; +} + +function hasSourceGrounding(sourceSymbols) { + return sourceSymbols !== null && typeof sourceSymbols === "object"; +} + +function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { + const shouldGround = hasSourceGrounding(sourceSymbols); + for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { + if (!shouldGround) return true; + const basename = sourcePathBasename(match[1]).toLowerCase(); + if (sourceSymbols.basenames?.has(basename)) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { + if (!shouldGround) return true; + if (sourceSymbols.identifiers?.has(match[1].toLowerCase())) return true; + } + for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { + if (!shouldGround) return true; + // Member loci are grounded without retaining source text: both sides must be + // attested as identifiers somewhere in the selected source packet. + if ( + sourceSymbols.identifiers?.has(match[1].toLowerCase()) + && sourceSymbols.identifiers?.has(match[2].toLowerCase()) + ) { + return true; + } + } + return false; +} + +function hasConcreteFinding(text, sourceSymbols = null) { const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); + const clauses = splitReviewClauses(value); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; + if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1400,6 +1505,7 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, + sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1424,7 +1530,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1476,6 +1582,7 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); + const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1537,7 +1644,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/tests/unit/review-prompt.test.mjs b/tests/unit/review-prompt.test.mjs index 004a72a3..c744c138 100644 --- a/tests/unit/review-prompt.test.mjs +++ b/tests/unit/review-prompt.test.mjs @@ -4279,6 +4279,113 @@ test("root2 detector3: terse-but-concrete reviews do not flag shallow_output", a } }); +test("root2 detector3: fabricated concise concrete loci still demote against source-bearing packets", async () => { + const spoofs = [ + "Verdict: APPROVE. init() leaks a buffer.", + "Verdict: APPROVE. foo() should return early.", + "Verdict: APPROVE. x() throws.", + "Verdict: APPROVE. parse() throws on bad input.", + "Verdict: APPROVE. Looks good, but foo() could throw.", + "Verdict: APPROVE. nonexistentFunction() should return early.", + ]; + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + for (const result of spoofs) { + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "src/sample.js", text: "export const value = 1;\n" }], + result, + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, true, `[${name}] fabricated locus should look shallow for: ${result}`); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), + true, + `[${name}] shallow_output should be present for fabricated locus: ${result}`, + ); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] fabricated locus should fail the review slot: ${result}`); + assert.equal(manifest.review_slot.verdict, "failed_slot", `[${name}] fabricated locus should be demoted: ${result}`); + assert.equal(manifest.review_slot.not_counted_reason, "source_sent_unusable", `[${name}] fabricated locus should not count: ${result}`); + } + const noSource = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [], + result: "Verdict: APPROVE. init() leaks a buffer.", + status: "completed", + errorCode: null, + }); + assert.equal(noSource.review_quality.looks_shallow, false, `[${name}] no-source fallback should preserve text-only concrete finding`); + assert.equal(noSource.review_quality.failed_review_slot, false, `[${name}] no-source fallback should still count the review`); + assert.equal(noSource.review_slot.verdict, "approved", `[${name}] no-source fallback approval should count`); + assert.equal(noSource.review_slot.not_counted_reason, "none", `[${name}] no-source fallback should have no not-counted reason`); + } +}); + +test("root2 detector3: attested concise source loci still count after grounding", async () => { + const sourceFiles = [{ + path: "src/cart.js", + text: [ + "export function total(items) {", + " return items.reduce((sum, item) => sum + item.price, 0);", + "}", + "export function validateInput(value) {", + " return value;", + "}", + "export const cart = { total };", + "cart.total = total;", + ].join("\n"), + }]; + const cases = [ + "Verdict: REQUEST_CHANGES. src/cart.js:42 returns the wrong total instead of adding tax.", + "Verdict: REQUEST CHANGES. total() subtracts discounts instead of adding them.", + "Verdict: REQUEST CHANGES. cart.total drops tax instead of adding it.", + "Verdict: REQUEST_CHANGES. The optional path around validateInput() should not return early for empty arrays.", + ]; + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + for (const result of cases) { + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles, + result, + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, false, `[${name}] attested locus should not look shallow for: ${result}`); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), + false, + `[${name}] shallow_output should be absent for attested locus: ${result}`, + ); + assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] attested locus should count: ${result}`); + assert.equal(manifest.review_slot.verdict, "request_changes", `[${name}] request-changes review should count: ${result}`); + assert.equal(manifest.review_slot.not_counted_reason, "none", `[${name}] attested locus should have no not-counted reason: ${result}`); + } + } +}); + +test("root2 detector3: source symbol grounding remains linear on large source packets", async () => { + const largeSource = `${"const attested = 1;\n".repeat(10000)}export function realHandler() { return attested; }\n`; + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const start = process.hrtime.bigint(); + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "src/large.js", text: largeSource }], + result: "Verdict: APPROVE. fabricatedMissing() throws on empty input.", + status: "completed", + errorCode: null, + }); + const elapsedMs = Number(process.hrtime.bigint() - start) / 1e6; + assert.ok( + elapsedMs < 2000, + `[${name}] source symbol grounding took ${elapsedMs.toFixed(1)}ms on a large source packet; extraction must stay linear.`, + ); + assert.equal(manifest.review_quality.looks_shallow, true, `[${name}] fabricated locus should still demote after large source extraction`); + } +}); + test("root2 detector3: bare-LGTM with no verdict still flags shallow_output", async () => { // Also yields missing_verdict (Root-3-owned); assert only shallow_output here. for (const [name, file] of REVIEW_PROMPT_MODULES) { @@ -4418,23 +4525,26 @@ test("root2 detector3: long-but-realistic call loci still escape the shallow fla const cases = [ { selected: "src/services/auth/scheduler.js", + text: "export function validateAndRefreshAuthToken() { return Date.now(); }\n", result: "Verdict: REQUEST CHANGES. The function validateAndRefreshAuthToken() returns the wrong expiry instead of the computed deadline", }, { selected: "lib/persistence/pool.js", + text: "export function acquireConnectionWithRetry() { return openSocket(); }\n", result: "Request changes: acquireConnectionWithRetry() leaks the socket and the cleanup path swallows the close error", }, { selected: "webhooks.js", + text: "export function processIncomingWebhookPayload() { return true; }\n", result: "Verdict: REQUEST CHANGES. processIncomingWebhookPayload() drops the signature header instead of validating it first", }, ]; for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - for (const { selected, result } of cases) { + for (const { selected, text, result } of cases) { const manifest = targetBuildReviewAuditManifest({ prompt: "rendered prompt", - sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], + sourceFiles: [{ path: selected, text }], result, status: "completed", errorCode: null, @@ -4455,9 +4565,9 @@ test("root2 detector3: negation-bearing defect cues do not mis-flag concrete rev // "does not free") must not trip CONCRETE_FINDING_NEGATION when they carry a real call locus. // Guard: a clause that strips to a GENUINE negation/absence must STAY flagged (no over-rescue). const NOT_SHALLOW = [ - { selected: "socket.js", result: "Verdict: REQUEST CHANGES. The socket close() is never called on the error path" }, - { selected: "validator.js", result: "Verdict: REQUEST CHANGES. validateInput() should not return early on empty arrays" }, - { selected: "pool.js", result: "Verdict: REQUEST CHANGES. acquire() does not free the slot when the request times out" }, + { selected: "socket.js", text: "export function close() { return true; }\n", result: "Verdict: REQUEST CHANGES. The socket close() is never called on the error path" }, + { selected: "validator.js", text: "export function validateInput() { return true; }\n", result: "Verdict: REQUEST CHANGES. validateInput() should not return early on empty arrays" }, + { selected: "pool.js", text: "export function acquire() { return true; }\n", result: "Verdict: REQUEST CHANGES. acquire() does not free the slot when the request times out" }, ]; const STILL_SHALLOW = [ { selected: "socket.js", result: "Verdict: APPROVE. close() is never called but that is no real problem here" }, @@ -4465,10 +4575,10 @@ test("root2 detector3: negation-bearing defect cues do not mis-flag concrete rev ]; for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - for (const { selected, result } of NOT_SHALLOW) { + for (const { selected, text, result } of NOT_SHALLOW) { const manifest = targetBuildReviewAuditManifest({ prompt: "rendered prompt", - sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], + sourceFiles: [{ path: selected, text }], result, status: "completed", errorCode: null, @@ -4556,6 +4666,15 @@ test("root2 detector3: praise/confirmation reusing defect vocabulary flags shall "Verdict: REQUEST_CHANGES\nlookup() returns the wrong index when none of the keys match, instead of throwing.", "Verdict: REQUEST_CHANGES\nthe happy path of encode() works as expected, but the empty-input branch throws and crashes.", ]; + const cleanSource = [ + "export function close() { return true; }", + "export function acquire() { return true; }", + "export function validateInput() { return true; }", + "export function parseInt() { return 1; }", + "export function indexInto() { return 1; }", + "export function lookup() { return 1; }", + "export function encode() { return true; }", + ].join("\n"); for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: target } = await loadReviewPromptModule(file); for (const result of FLAG) { @@ -4563,7 +4682,7 @@ test("root2 detector3: praise/confirmation reusing defect vocabulary flags shall assert.equal(m.review_quality.semantic_failure_reasons.includes("shallow_output"), true, `[${name}] shallow_output should be present for: ${result}`); } for (const result of CLEAN) { - const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: "export const value = 1;\n" }], result, status: "completed", errorCode: null }); + const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: cleanSource }], result, status: "completed", errorCode: null }); assert.equal(m.review_quality.looks_shallow, false, `[${name}] real finding must stay clean for: ${result}`); } } @@ -4682,7 +4801,7 @@ test("root2 detector3: hasDefectCue split-identity — every defect-cue alternat const { buildReviewAuditManifest: target } = await loadReviewPromptModule(file); for (const cue of CUES) { const result = `Verdict: REQUEST CHANGES\nnextPage() ${cue}.`; - const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: "export const value = 1;\n" }], result, status: "completed", errorCode: null }); + const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: "export function nextPage() { return 1; }\n" }], result, status: "completed", errorCode: null }); assert.equal(m.review_quality.looks_shallow, false, `[${name}] defect cue must register a concrete finding: ${cue}`); } } From 83aabfaadcea0272ff3c9f37fc7c2bd1134065cc Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 25 Jun 2026 07:11:31 +0900 Subject: [PATCH 11/15] test: cover agy review prompt copy --- tests/unit/review-prompt.test.mjs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/review-prompt.test.mjs b/tests/unit/review-prompt.test.mjs index c744c138..b4f87ccd 100644 --- a/tests/unit/review-prompt.test.mjs +++ b/tests/unit/review-prompt.test.mjs @@ -16,6 +16,7 @@ import { const REVIEW_PROMPT_MODULES = Object.freeze([ ["shared", "scripts/lib/review-prompt.mjs"], + ["agy", "plugins/agy/scripts/lib/review-prompt.mjs"], ["api-reviewers", "plugins/api-reviewers/scripts/lib/review-prompt.mjs"], ["claude", "plugins/claude/scripts/lib/review-prompt.mjs"], ["gemini", "plugins/gemini/scripts/lib/review-prompt.mjs"], From 0168a25ccbe0fe741dd1c1f9c2152ca5220ee0bf Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 25 Jun 2026 07:56:47 +0900 Subject: [PATCH 12/15] fix: require source grounding for concise concrete reviews --- plugins/agy/scripts/lib/review-prompt.mjs | 23 +--- .../scripts/lib/review-prompt.mjs | 23 +--- plugins/claude/scripts/lib/review-prompt.mjs | 23 +--- plugins/gemini/scripts/lib/review-prompt.mjs | 23 +--- plugins/grok/scripts/lib/review-prompt.mjs | 23 +--- plugins/kimi/scripts/lib/review-prompt.mjs | 23 +--- relay/relay-agy/scripts/lib/review-prompt.mjs | 23 +--- .../scripts/lib/review-prompt.mjs | 23 +--- .../relay-grok/scripts/lib/review-prompt.mjs | 23 +--- .../relay-kimi/scripts/lib/review-prompt.mjs | 23 +--- scripts/lib/review-prompt.mjs | 23 +--- tests/unit/review-prompt.test.mjs | 102 ++++++++++++++++-- 12 files changed, 150 insertions(+), 205 deletions(-) diff --git a/plugins/agy/scripts/lib/review-prompt.mjs b/plugins/agy/scripts/lib/review-prompt.mjs index 1156f038..1df6620f 100644 --- a/plugins/agy/scripts/lib/review-prompt.mjs +++ b/plugins/agy/scripts/lib/review-prompt.mjs @@ -1324,7 +1324,7 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtract(?:s|ed)?|adds? to|drops?|leaks?)\b/i; const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) @@ -1341,11 +1341,6 @@ function hasDefectCue(clause) { const CONCRETE_FINDING_PATH_LOCUS = /(? { + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ prompt: "rendered prompt", sourceFiles: [], - result: "Verdict: APPROVE. init() leaks a buffer.", + result, status: "completed", errorCode: null, }); - assert.equal(noSource.review_quality.looks_shallow, false, `[${name}] no-source fallback should preserve text-only concrete finding`); - assert.equal(noSource.review_quality.failed_review_slot, false, `[${name}] no-source fallback should still count the review`); - assert.equal(noSource.review_slot.verdict, "approved", `[${name}] no-source fallback approval should count`); - assert.equal(noSource.review_slot.not_counted_reason, "none", `[${name}] no-source fallback should have no not-counted reason`); + assert.equal(manifest.review_quality.looks_shallow, true, `[${name}] no-source concise locus should look shallow for: ${result}`); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), + true, + `[${name}] no-source concise locus should include shallow_output for: ${result}`, + ); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] no-source concise locus should fail the review slot: ${result}`); + assert.equal(manifest.review_slot.verdict, "failed_slot", `[${name}] no-source concise locus should be demoted: ${result}`); + assert.notEqual(manifest.review_slot.not_counted_reason, "none", `[${name}] no-source concise locus should not count: ${result}`); + } + }); +} + +test("root2 detector3: no-source substantive reviews still count outside the concise exemption", async () => { + const substantiveReview = [ + "Verdict: REQUEST CHANGES.", + "Blocking finding: realHandler() drops the retry delay instead of preserving the configured backoff.", + "The failure path retries immediately, which can create a tight loop when the upstream service is down.", + "The fix should carry the computed delay into the retry scheduler and preserve the cancellation check before enqueueing the next attempt.", + "I also checked the non-blocking notes and did not find any unrelated concerns that need to block this change.", + ROOT2_PAD, + ROOT2_PAD, + ].join("\n"); + assert.ok(substantiveReview.length >= 500, "test review must stay outside the concise-review threshold"); + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [], + result: substantiveReview, + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, false, `[${name}] no-source substantive review must not look shallow`); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), + false, + `[${name}] no-source substantive review must not include shallow_output`, + ); + assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] no-source substantive review should count`); + assert.equal(manifest.review_slot.verdict, "request_changes", `[${name}] no-source substantive request-changes review should count`); + assert.equal(manifest.review_slot.not_counted_reason, "none", `[${name}] no-source substantive review should have no not-counted reason`); } }); @@ -4366,6 +4417,42 @@ test("root2 detector3: attested concise source loci still count after grounding" } }); +test("root2 detector3: grounded member loci with past-tense defect cue still count", async () => { + const sourceFiles = [{ + path: "src/cart.js", + text: [ + "export class Cart {", + " constructor(items) { this.items = items; }", + " total() {", + " let sum = 0;", + " for (const item of this.items) { sum = sum - item.price; }", + " return sum;", + " }", + "}", + ].join("\n"), + }]; + const result = "Verdict: REQUEST_CHANGES. this.items is iterated but item.price is subtracted, returning the wrong total."; + for (const [name, file] of REVIEW_PROMPT_MODULES) { + const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles, + result, + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, false, `[${name}] grounded past-tense member finding should not look shallow`); + assert.equal( + manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), + false, + `[${name}] grounded past-tense member finding should not include shallow_output`, + ); + assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] grounded past-tense member finding should count`); + assert.equal(manifest.review_slot.verdict, "request_changes", `[${name}] grounded past-tense member finding should count as request_changes`); + assert.equal(manifest.review_slot.not_counted_reason, "none", `[${name}] grounded past-tense member finding should have no not-counted reason`); + } +}); + test("root2 detector3: source symbol grounding remains linear on large source packets", async () => { const largeSource = `${"const attested = 1;\n".repeat(10000)}export function realHandler() { return attested; }\n`; for (const [name, file] of REVIEW_PROMPT_MODULES) { @@ -4488,7 +4575,7 @@ test("root2 detector3: negated-finding variant does not count as concrete (still }); test("root2 detector3: bounded code-locus regexes stay linear-time on adversarial input (ReDoS S5852 guard)", async () => { - // CONCRETE_FINDING_CODE_LOCUS runs on adversarial external-review text. After bounding every + // The concrete-finding code-locus regexes run on adversarial external-review text. After bounding every // quantifier (the S5852 fix), all three locus regexes must stay linear-time. This input is a // defect-cue-bearing clause (so hasConcreteFinding evaluates every locus regex) followed by a // 200k-char pathological run with no terminating dot/paren — forcing each regex to scan to the @@ -4789,6 +4876,7 @@ test("root2 detector3: hasDefectCue split-identity — every defect-cue alternat "returns the wrong index", "uses the wrong order", "subtracts one too many", + "is subtracted from the total", "adds to the wrong bucket", "drops the last element", "leaks the buffer", From 5b676a18cc60f5803d4561c901e682de4a51213b Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 25 Jun 2026 08:20:47 +0900 Subject: [PATCH 13/15] revert: drop unrequested defect-cue lexicon expansion --- plugins/agy/scripts/lib/review-prompt.mjs | 2 +- .../scripts/lib/review-prompt.mjs | 2 +- plugins/claude/scripts/lib/review-prompt.mjs | 2 +- plugins/gemini/scripts/lib/review-prompt.mjs | 2 +- plugins/grok/scripts/lib/review-prompt.mjs | 2 +- plugins/kimi/scripts/lib/review-prompt.mjs | 2 +- relay/relay-agy/scripts/lib/review-prompt.mjs | 2 +- .../scripts/lib/review-prompt.mjs | 2 +- .../relay-grok/scripts/lib/review-prompt.mjs | 2 +- .../relay-kimi/scripts/lib/review-prompt.mjs | 2 +- scripts/lib/review-prompt.mjs | 2 +- tests/unit/review-prompt.test.mjs | 37 ------------------- 12 files changed, 11 insertions(+), 48 deletions(-) diff --git a/plugins/agy/scripts/lib/review-prompt.mjs b/plugins/agy/scripts/lib/review-prompt.mjs index 1df6620f..9979630d 100644 --- a/plugins/agy/scripts/lib/review-prompt.mjs +++ b/plugins/agy/scripts/lib/review-prompt.mjs @@ -1324,7 +1324,7 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtract(?:s|ed)?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) diff --git a/plugins/api-reviewers/scripts/lib/review-prompt.mjs b/plugins/api-reviewers/scripts/lib/review-prompt.mjs index 1df6620f..9979630d 100644 --- a/plugins/api-reviewers/scripts/lib/review-prompt.mjs +++ b/plugins/api-reviewers/scripts/lib/review-prompt.mjs @@ -1324,7 +1324,7 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtract(?:s|ed)?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) diff --git a/plugins/claude/scripts/lib/review-prompt.mjs b/plugins/claude/scripts/lib/review-prompt.mjs index 1df6620f..9979630d 100644 --- a/plugins/claude/scripts/lib/review-prompt.mjs +++ b/plugins/claude/scripts/lib/review-prompt.mjs @@ -1324,7 +1324,7 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtract(?:s|ed)?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) diff --git a/plugins/gemini/scripts/lib/review-prompt.mjs b/plugins/gemini/scripts/lib/review-prompt.mjs index 1df6620f..9979630d 100644 --- a/plugins/gemini/scripts/lib/review-prompt.mjs +++ b/plugins/gemini/scripts/lib/review-prompt.mjs @@ -1324,7 +1324,7 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtract(?:s|ed)?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) diff --git a/plugins/grok/scripts/lib/review-prompt.mjs b/plugins/grok/scripts/lib/review-prompt.mjs index 1df6620f..9979630d 100644 --- a/plugins/grok/scripts/lib/review-prompt.mjs +++ b/plugins/grok/scripts/lib/review-prompt.mjs @@ -1324,7 +1324,7 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtract(?:s|ed)?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) diff --git a/plugins/kimi/scripts/lib/review-prompt.mjs b/plugins/kimi/scripts/lib/review-prompt.mjs index 1df6620f..9979630d 100644 --- a/plugins/kimi/scripts/lib/review-prompt.mjs +++ b/plugins/kimi/scripts/lib/review-prompt.mjs @@ -1324,7 +1324,7 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtract(?:s|ed)?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) diff --git a/relay/relay-agy/scripts/lib/review-prompt.mjs b/relay/relay-agy/scripts/lib/review-prompt.mjs index 1df6620f..9979630d 100644 --- a/relay/relay-agy/scripts/lib/review-prompt.mjs +++ b/relay/relay-agy/scripts/lib/review-prompt.mjs @@ -1324,7 +1324,7 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtract(?:s|ed)?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) diff --git a/relay/relay-gemini/scripts/lib/review-prompt.mjs b/relay/relay-gemini/scripts/lib/review-prompt.mjs index 1df6620f..9979630d 100644 --- a/relay/relay-gemini/scripts/lib/review-prompt.mjs +++ b/relay/relay-gemini/scripts/lib/review-prompt.mjs @@ -1324,7 +1324,7 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtract(?:s|ed)?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) diff --git a/relay/relay-grok/scripts/lib/review-prompt.mjs b/relay/relay-grok/scripts/lib/review-prompt.mjs index 1df6620f..9979630d 100644 --- a/relay/relay-grok/scripts/lib/review-prompt.mjs +++ b/relay/relay-grok/scripts/lib/review-prompt.mjs @@ -1324,7 +1324,7 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtract(?:s|ed)?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) diff --git a/relay/relay-kimi/scripts/lib/review-prompt.mjs b/relay/relay-kimi/scripts/lib/review-prompt.mjs index 1df6620f..9979630d 100644 --- a/relay/relay-kimi/scripts/lib/review-prompt.mjs +++ b/relay/relay-kimi/scripts/lib/review-prompt.mjs @@ -1324,7 +1324,7 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtract(?:s|ed)?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) diff --git a/scripts/lib/review-prompt.mjs b/scripts/lib/review-prompt.mjs index 1df6620f..9979630d 100644 --- a/scripts/lib/review-prompt.mjs +++ b/scripts/lib/review-prompt.mjs @@ -1324,7 +1324,7 @@ const TINY_SOURCE_MAX_LINES = 5; // set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtract(?:s|ed)?|adds? to|drops?|leaks?)\b/i; +const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; function hasDefectCue(clause) { return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) diff --git a/tests/unit/review-prompt.test.mjs b/tests/unit/review-prompt.test.mjs index c766ec88..5f96a90a 100644 --- a/tests/unit/review-prompt.test.mjs +++ b/tests/unit/review-prompt.test.mjs @@ -4417,42 +4417,6 @@ test("root2 detector3: attested concise source loci still count after grounding" } }); -test("root2 detector3: grounded member loci with past-tense defect cue still count", async () => { - const sourceFiles = [{ - path: "src/cart.js", - text: [ - "export class Cart {", - " constructor(items) { this.items = items; }", - " total() {", - " let sum = 0;", - " for (const item of this.items) { sum = sum - item.price; }", - " return sum;", - " }", - "}", - ].join("\n"), - }]; - const result = "Verdict: REQUEST_CHANGES. this.items is iterated but item.price is subtracted, returning the wrong total."; - for (const [name, file] of REVIEW_PROMPT_MODULES) { - const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - const manifest = targetBuildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles, - result, - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.looks_shallow, false, `[${name}] grounded past-tense member finding should not look shallow`); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), - false, - `[${name}] grounded past-tense member finding should not include shallow_output`, - ); - assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] grounded past-tense member finding should count`); - assert.equal(manifest.review_slot.verdict, "request_changes", `[${name}] grounded past-tense member finding should count as request_changes`); - assert.equal(manifest.review_slot.not_counted_reason, "none", `[${name}] grounded past-tense member finding should have no not-counted reason`); - } -}); - test("root2 detector3: source symbol grounding remains linear on large source packets", async () => { const largeSource = `${"const attested = 1;\n".repeat(10000)}export function realHandler() { return attested; }\n`; for (const [name, file] of REVIEW_PROMPT_MODULES) { @@ -4876,7 +4840,6 @@ test("root2 detector3: hasDefectCue split-identity — every defect-cue alternat "returns the wrong index", "uses the wrong order", "subtracts one too many", - "is subtracted from the total", "adds to the wrong bucket", "drops the last element", "leaks the buffer", From 1e4aab7b3680871ead2c4d94ecdaed7ed69ba71b Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 25 Jun 2026 13:50:37 +0900 Subject: [PATCH 14/15] revert: drop blocked concise-review source-grounding (#235) Restore the review-prompt module to the 1365a1e merge-ready baseline. The source-symbol grounding for concise concrete reviews (32cd266 / 0168a25c) was unanimously blocked by the external fix-delta panel: spoofable by a fixed common-identifier template (~88% of packets) and it over-suppresses genuine prescriptive reviews. The concise-review false-approval hole is deferred to a deterministic-floor redesign (contain + escalate), tracked separately. Retains the independent agy review-prompt parity-test line from 83aabfa (orthogonal to grounding). review-prompt.test: 480/0. Co-Authored-By: Claude Opus 4.8 --- plugins/agy/scripts/lib/review-prompt.mjs | 114 +---------- .../scripts/lib/review-prompt.mjs | 114 +---------- plugins/claude/scripts/lib/review-prompt.mjs | 114 +---------- plugins/gemini/scripts/lib/review-prompt.mjs | 114 +---------- plugins/grok/scripts/lib/review-prompt.mjs | 114 +---------- plugins/kimi/scripts/lib/review-prompt.mjs | 114 +---------- relay/relay-agy/scripts/lib/review-prompt.mjs | 114 +---------- .../scripts/lib/review-prompt.mjs | 114 +---------- .../relay-grok/scripts/lib/review-prompt.mjs | 114 +---------- .../relay-kimi/scripts/lib/review-prompt.mjs | 114 +---------- scripts/lib/review-prompt.mjs | 114 +---------- tests/unit/review-prompt.test.mjs | 190 +----------------- 12 files changed, 120 insertions(+), 1324 deletions(-) diff --git a/plugins/agy/scripts/lib/review-prompt.mjs b/plugins/agy/scripts/lib/review-prompt.mjs index 9979630d..41540ce0 100644 --- a/plugins/agy/scripts/lib/review-prompt.mjs +++ b/plugins/agy/scripts/lib/review-prompt.mjs @@ -100,45 +100,6 @@ function sourceManifest(sourceFiles = []) { }); } -const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; - -function sourcePathBasename(path) { - const value = String(path ?? ""); - const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); - return slash === -1 ? value : value.slice(slash + 1); -} - -function sourcePathStem(basename) { - const value = String(basename ?? ""); - const dot = value.lastIndexOf("."); - return dot > 0 ? value.slice(0, dot) : value; -} - -function sourceSymbolIndex(sourceFiles = []) { - const files = Array.isArray(sourceFiles) ? sourceFiles : []; - if (files.length === 0) return null; - const identifiers = new Set(); - const basenames = new Set(); - for (const file of files) { - const basename = sourcePathBasename(file?.path).toLowerCase(); - if (basename) { - basenames.add(basename); - const stem = sourcePathStem(basename); - if (stem) basenames.add(stem); - } - const content = contentBuffer(file); - const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); - SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; - for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { - identifiers.add(match[0].toLowerCase()); - } - } - return Object.freeze({ - identifiers: Object.freeze(identifiers), - basenames: Object.freeze(basenames), - }); -} - function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1338,9 +1299,11 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) - || (code >= 65 && code <= 90) - || (code >= 97 && code <= 122) - || char === "_" - || char === "$" - || char === "-"; -} - -function isInternalCodeTokenPeriod(text, index) { - return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); -} - -function splitReviewClauses(text) { - const clauses = []; - let start = 0; - for (let index = 0; index < text.length; index += 1) { - const char = text[index]; - const split = char === "\n" - || char === ";" - || char === "!" - || char === "?" - || (char === "." && !isInternalCodeTokenPeriod(text, index)); - if (split) { - clauses.push(text.slice(start, index)); - start = index + 1; - } - } - clauses.push(text.slice(start)); - return clauses; -} - -function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { - for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { - const basename = sourcePathBasename(match[1]).toLowerCase(); - if (sourceSymbols?.basenames?.has(basename)) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { - if (sourceSymbols?.identifiers?.has(match[1].toLowerCase())) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { - // Member loci are grounded without retaining source text: both sides must be - // attested as identifiers somewhere in the selected source packet. - if ( - sourceSymbols?.identifiers?.has(match[1].toLowerCase()) - && sourceSymbols?.identifiers?.has(match[2].toLowerCase()) - ) { - return true; - } - } - return false; -} - -function hasConcreteFinding(text, sourceSymbols = null) { +function hasConcreteFinding(text) { const value = String(text ?? ""); - const clauses = splitReviewClauses(value); + const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1492,7 +1400,6 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, - sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1517,7 +1424,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1569,7 +1476,6 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); - const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1631,7 +1537,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/plugins/api-reviewers/scripts/lib/review-prompt.mjs b/plugins/api-reviewers/scripts/lib/review-prompt.mjs index 9979630d..41540ce0 100644 --- a/plugins/api-reviewers/scripts/lib/review-prompt.mjs +++ b/plugins/api-reviewers/scripts/lib/review-prompt.mjs @@ -100,45 +100,6 @@ function sourceManifest(sourceFiles = []) { }); } -const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; - -function sourcePathBasename(path) { - const value = String(path ?? ""); - const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); - return slash === -1 ? value : value.slice(slash + 1); -} - -function sourcePathStem(basename) { - const value = String(basename ?? ""); - const dot = value.lastIndexOf("."); - return dot > 0 ? value.slice(0, dot) : value; -} - -function sourceSymbolIndex(sourceFiles = []) { - const files = Array.isArray(sourceFiles) ? sourceFiles : []; - if (files.length === 0) return null; - const identifiers = new Set(); - const basenames = new Set(); - for (const file of files) { - const basename = sourcePathBasename(file?.path).toLowerCase(); - if (basename) { - basenames.add(basename); - const stem = sourcePathStem(basename); - if (stem) basenames.add(stem); - } - const content = contentBuffer(file); - const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); - SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; - for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { - identifiers.add(match[0].toLowerCase()); - } - } - return Object.freeze({ - identifiers: Object.freeze(identifiers), - basenames: Object.freeze(basenames), - }); -} - function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1338,9 +1299,11 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) - || (code >= 65 && code <= 90) - || (code >= 97 && code <= 122) - || char === "_" - || char === "$" - || char === "-"; -} - -function isInternalCodeTokenPeriod(text, index) { - return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); -} - -function splitReviewClauses(text) { - const clauses = []; - let start = 0; - for (let index = 0; index < text.length; index += 1) { - const char = text[index]; - const split = char === "\n" - || char === ";" - || char === "!" - || char === "?" - || (char === "." && !isInternalCodeTokenPeriod(text, index)); - if (split) { - clauses.push(text.slice(start, index)); - start = index + 1; - } - } - clauses.push(text.slice(start)); - return clauses; -} - -function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { - for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { - const basename = sourcePathBasename(match[1]).toLowerCase(); - if (sourceSymbols?.basenames?.has(basename)) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { - if (sourceSymbols?.identifiers?.has(match[1].toLowerCase())) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { - // Member loci are grounded without retaining source text: both sides must be - // attested as identifiers somewhere in the selected source packet. - if ( - sourceSymbols?.identifiers?.has(match[1].toLowerCase()) - && sourceSymbols?.identifiers?.has(match[2].toLowerCase()) - ) { - return true; - } - } - return false; -} - -function hasConcreteFinding(text, sourceSymbols = null) { +function hasConcreteFinding(text) { const value = String(text ?? ""); - const clauses = splitReviewClauses(value); + const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1492,7 +1400,6 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, - sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1517,7 +1424,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1569,7 +1476,6 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); - const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1631,7 +1537,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/plugins/claude/scripts/lib/review-prompt.mjs b/plugins/claude/scripts/lib/review-prompt.mjs index 9979630d..41540ce0 100644 --- a/plugins/claude/scripts/lib/review-prompt.mjs +++ b/plugins/claude/scripts/lib/review-prompt.mjs @@ -100,45 +100,6 @@ function sourceManifest(sourceFiles = []) { }); } -const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; - -function sourcePathBasename(path) { - const value = String(path ?? ""); - const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); - return slash === -1 ? value : value.slice(slash + 1); -} - -function sourcePathStem(basename) { - const value = String(basename ?? ""); - const dot = value.lastIndexOf("."); - return dot > 0 ? value.slice(0, dot) : value; -} - -function sourceSymbolIndex(sourceFiles = []) { - const files = Array.isArray(sourceFiles) ? sourceFiles : []; - if (files.length === 0) return null; - const identifiers = new Set(); - const basenames = new Set(); - for (const file of files) { - const basename = sourcePathBasename(file?.path).toLowerCase(); - if (basename) { - basenames.add(basename); - const stem = sourcePathStem(basename); - if (stem) basenames.add(stem); - } - const content = contentBuffer(file); - const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); - SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; - for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { - identifiers.add(match[0].toLowerCase()); - } - } - return Object.freeze({ - identifiers: Object.freeze(identifiers), - basenames: Object.freeze(basenames), - }); -} - function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1338,9 +1299,11 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) - || (code >= 65 && code <= 90) - || (code >= 97 && code <= 122) - || char === "_" - || char === "$" - || char === "-"; -} - -function isInternalCodeTokenPeriod(text, index) { - return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); -} - -function splitReviewClauses(text) { - const clauses = []; - let start = 0; - for (let index = 0; index < text.length; index += 1) { - const char = text[index]; - const split = char === "\n" - || char === ";" - || char === "!" - || char === "?" - || (char === "." && !isInternalCodeTokenPeriod(text, index)); - if (split) { - clauses.push(text.slice(start, index)); - start = index + 1; - } - } - clauses.push(text.slice(start)); - return clauses; -} - -function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { - for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { - const basename = sourcePathBasename(match[1]).toLowerCase(); - if (sourceSymbols?.basenames?.has(basename)) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { - if (sourceSymbols?.identifiers?.has(match[1].toLowerCase())) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { - // Member loci are grounded without retaining source text: both sides must be - // attested as identifiers somewhere in the selected source packet. - if ( - sourceSymbols?.identifiers?.has(match[1].toLowerCase()) - && sourceSymbols?.identifiers?.has(match[2].toLowerCase()) - ) { - return true; - } - } - return false; -} - -function hasConcreteFinding(text, sourceSymbols = null) { +function hasConcreteFinding(text) { const value = String(text ?? ""); - const clauses = splitReviewClauses(value); + const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1492,7 +1400,6 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, - sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1517,7 +1424,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1569,7 +1476,6 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); - const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1631,7 +1537,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/plugins/gemini/scripts/lib/review-prompt.mjs b/plugins/gemini/scripts/lib/review-prompt.mjs index 9979630d..41540ce0 100644 --- a/plugins/gemini/scripts/lib/review-prompt.mjs +++ b/plugins/gemini/scripts/lib/review-prompt.mjs @@ -100,45 +100,6 @@ function sourceManifest(sourceFiles = []) { }); } -const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; - -function sourcePathBasename(path) { - const value = String(path ?? ""); - const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); - return slash === -1 ? value : value.slice(slash + 1); -} - -function sourcePathStem(basename) { - const value = String(basename ?? ""); - const dot = value.lastIndexOf("."); - return dot > 0 ? value.slice(0, dot) : value; -} - -function sourceSymbolIndex(sourceFiles = []) { - const files = Array.isArray(sourceFiles) ? sourceFiles : []; - if (files.length === 0) return null; - const identifiers = new Set(); - const basenames = new Set(); - for (const file of files) { - const basename = sourcePathBasename(file?.path).toLowerCase(); - if (basename) { - basenames.add(basename); - const stem = sourcePathStem(basename); - if (stem) basenames.add(stem); - } - const content = contentBuffer(file); - const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); - SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; - for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { - identifiers.add(match[0].toLowerCase()); - } - } - return Object.freeze({ - identifiers: Object.freeze(identifiers), - basenames: Object.freeze(basenames), - }); -} - function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1338,9 +1299,11 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) - || (code >= 65 && code <= 90) - || (code >= 97 && code <= 122) - || char === "_" - || char === "$" - || char === "-"; -} - -function isInternalCodeTokenPeriod(text, index) { - return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); -} - -function splitReviewClauses(text) { - const clauses = []; - let start = 0; - for (let index = 0; index < text.length; index += 1) { - const char = text[index]; - const split = char === "\n" - || char === ";" - || char === "!" - || char === "?" - || (char === "." && !isInternalCodeTokenPeriod(text, index)); - if (split) { - clauses.push(text.slice(start, index)); - start = index + 1; - } - } - clauses.push(text.slice(start)); - return clauses; -} - -function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { - for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { - const basename = sourcePathBasename(match[1]).toLowerCase(); - if (sourceSymbols?.basenames?.has(basename)) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { - if (sourceSymbols?.identifiers?.has(match[1].toLowerCase())) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { - // Member loci are grounded without retaining source text: both sides must be - // attested as identifiers somewhere in the selected source packet. - if ( - sourceSymbols?.identifiers?.has(match[1].toLowerCase()) - && sourceSymbols?.identifiers?.has(match[2].toLowerCase()) - ) { - return true; - } - } - return false; -} - -function hasConcreteFinding(text, sourceSymbols = null) { +function hasConcreteFinding(text) { const value = String(text ?? ""); - const clauses = splitReviewClauses(value); + const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1492,7 +1400,6 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, - sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1517,7 +1424,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1569,7 +1476,6 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); - const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1631,7 +1537,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/plugins/grok/scripts/lib/review-prompt.mjs b/plugins/grok/scripts/lib/review-prompt.mjs index 9979630d..41540ce0 100644 --- a/plugins/grok/scripts/lib/review-prompt.mjs +++ b/plugins/grok/scripts/lib/review-prompt.mjs @@ -100,45 +100,6 @@ function sourceManifest(sourceFiles = []) { }); } -const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; - -function sourcePathBasename(path) { - const value = String(path ?? ""); - const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); - return slash === -1 ? value : value.slice(slash + 1); -} - -function sourcePathStem(basename) { - const value = String(basename ?? ""); - const dot = value.lastIndexOf("."); - return dot > 0 ? value.slice(0, dot) : value; -} - -function sourceSymbolIndex(sourceFiles = []) { - const files = Array.isArray(sourceFiles) ? sourceFiles : []; - if (files.length === 0) return null; - const identifiers = new Set(); - const basenames = new Set(); - for (const file of files) { - const basename = sourcePathBasename(file?.path).toLowerCase(); - if (basename) { - basenames.add(basename); - const stem = sourcePathStem(basename); - if (stem) basenames.add(stem); - } - const content = contentBuffer(file); - const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); - SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; - for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { - identifiers.add(match[0].toLowerCase()); - } - } - return Object.freeze({ - identifiers: Object.freeze(identifiers), - basenames: Object.freeze(basenames), - }); -} - function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1338,9 +1299,11 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) - || (code >= 65 && code <= 90) - || (code >= 97 && code <= 122) - || char === "_" - || char === "$" - || char === "-"; -} - -function isInternalCodeTokenPeriod(text, index) { - return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); -} - -function splitReviewClauses(text) { - const clauses = []; - let start = 0; - for (let index = 0; index < text.length; index += 1) { - const char = text[index]; - const split = char === "\n" - || char === ";" - || char === "!" - || char === "?" - || (char === "." && !isInternalCodeTokenPeriod(text, index)); - if (split) { - clauses.push(text.slice(start, index)); - start = index + 1; - } - } - clauses.push(text.slice(start)); - return clauses; -} - -function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { - for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { - const basename = sourcePathBasename(match[1]).toLowerCase(); - if (sourceSymbols?.basenames?.has(basename)) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { - if (sourceSymbols?.identifiers?.has(match[1].toLowerCase())) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { - // Member loci are grounded without retaining source text: both sides must be - // attested as identifiers somewhere in the selected source packet. - if ( - sourceSymbols?.identifiers?.has(match[1].toLowerCase()) - && sourceSymbols?.identifiers?.has(match[2].toLowerCase()) - ) { - return true; - } - } - return false; -} - -function hasConcreteFinding(text, sourceSymbols = null) { +function hasConcreteFinding(text) { const value = String(text ?? ""); - const clauses = splitReviewClauses(value); + const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1492,7 +1400,6 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, - sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1517,7 +1424,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1569,7 +1476,6 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); - const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1631,7 +1537,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/plugins/kimi/scripts/lib/review-prompt.mjs b/plugins/kimi/scripts/lib/review-prompt.mjs index 9979630d..41540ce0 100644 --- a/plugins/kimi/scripts/lib/review-prompt.mjs +++ b/plugins/kimi/scripts/lib/review-prompt.mjs @@ -100,45 +100,6 @@ function sourceManifest(sourceFiles = []) { }); } -const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; - -function sourcePathBasename(path) { - const value = String(path ?? ""); - const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); - return slash === -1 ? value : value.slice(slash + 1); -} - -function sourcePathStem(basename) { - const value = String(basename ?? ""); - const dot = value.lastIndexOf("."); - return dot > 0 ? value.slice(0, dot) : value; -} - -function sourceSymbolIndex(sourceFiles = []) { - const files = Array.isArray(sourceFiles) ? sourceFiles : []; - if (files.length === 0) return null; - const identifiers = new Set(); - const basenames = new Set(); - for (const file of files) { - const basename = sourcePathBasename(file?.path).toLowerCase(); - if (basename) { - basenames.add(basename); - const stem = sourcePathStem(basename); - if (stem) basenames.add(stem); - } - const content = contentBuffer(file); - const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); - SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; - for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { - identifiers.add(match[0].toLowerCase()); - } - } - return Object.freeze({ - identifiers: Object.freeze(identifiers), - basenames: Object.freeze(basenames), - }); -} - function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1338,9 +1299,11 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) - || (code >= 65 && code <= 90) - || (code >= 97 && code <= 122) - || char === "_" - || char === "$" - || char === "-"; -} - -function isInternalCodeTokenPeriod(text, index) { - return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); -} - -function splitReviewClauses(text) { - const clauses = []; - let start = 0; - for (let index = 0; index < text.length; index += 1) { - const char = text[index]; - const split = char === "\n" - || char === ";" - || char === "!" - || char === "?" - || (char === "." && !isInternalCodeTokenPeriod(text, index)); - if (split) { - clauses.push(text.slice(start, index)); - start = index + 1; - } - } - clauses.push(text.slice(start)); - return clauses; -} - -function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { - for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { - const basename = sourcePathBasename(match[1]).toLowerCase(); - if (sourceSymbols?.basenames?.has(basename)) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { - if (sourceSymbols?.identifiers?.has(match[1].toLowerCase())) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { - // Member loci are grounded without retaining source text: both sides must be - // attested as identifiers somewhere in the selected source packet. - if ( - sourceSymbols?.identifiers?.has(match[1].toLowerCase()) - && sourceSymbols?.identifiers?.has(match[2].toLowerCase()) - ) { - return true; - } - } - return false; -} - -function hasConcreteFinding(text, sourceSymbols = null) { +function hasConcreteFinding(text) { const value = String(text ?? ""); - const clauses = splitReviewClauses(value); + const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1492,7 +1400,6 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, - sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1517,7 +1424,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1569,7 +1476,6 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); - const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1631,7 +1537,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/relay/relay-agy/scripts/lib/review-prompt.mjs b/relay/relay-agy/scripts/lib/review-prompt.mjs index 9979630d..41540ce0 100644 --- a/relay/relay-agy/scripts/lib/review-prompt.mjs +++ b/relay/relay-agy/scripts/lib/review-prompt.mjs @@ -100,45 +100,6 @@ function sourceManifest(sourceFiles = []) { }); } -const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; - -function sourcePathBasename(path) { - const value = String(path ?? ""); - const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); - return slash === -1 ? value : value.slice(slash + 1); -} - -function sourcePathStem(basename) { - const value = String(basename ?? ""); - const dot = value.lastIndexOf("."); - return dot > 0 ? value.slice(0, dot) : value; -} - -function sourceSymbolIndex(sourceFiles = []) { - const files = Array.isArray(sourceFiles) ? sourceFiles : []; - if (files.length === 0) return null; - const identifiers = new Set(); - const basenames = new Set(); - for (const file of files) { - const basename = sourcePathBasename(file?.path).toLowerCase(); - if (basename) { - basenames.add(basename); - const stem = sourcePathStem(basename); - if (stem) basenames.add(stem); - } - const content = contentBuffer(file); - const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); - SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; - for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { - identifiers.add(match[0].toLowerCase()); - } - } - return Object.freeze({ - identifiers: Object.freeze(identifiers), - basenames: Object.freeze(basenames), - }); -} - function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1338,9 +1299,11 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) - || (code >= 65 && code <= 90) - || (code >= 97 && code <= 122) - || char === "_" - || char === "$" - || char === "-"; -} - -function isInternalCodeTokenPeriod(text, index) { - return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); -} - -function splitReviewClauses(text) { - const clauses = []; - let start = 0; - for (let index = 0; index < text.length; index += 1) { - const char = text[index]; - const split = char === "\n" - || char === ";" - || char === "!" - || char === "?" - || (char === "." && !isInternalCodeTokenPeriod(text, index)); - if (split) { - clauses.push(text.slice(start, index)); - start = index + 1; - } - } - clauses.push(text.slice(start)); - return clauses; -} - -function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { - for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { - const basename = sourcePathBasename(match[1]).toLowerCase(); - if (sourceSymbols?.basenames?.has(basename)) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { - if (sourceSymbols?.identifiers?.has(match[1].toLowerCase())) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { - // Member loci are grounded without retaining source text: both sides must be - // attested as identifiers somewhere in the selected source packet. - if ( - sourceSymbols?.identifiers?.has(match[1].toLowerCase()) - && sourceSymbols?.identifiers?.has(match[2].toLowerCase()) - ) { - return true; - } - } - return false; -} - -function hasConcreteFinding(text, sourceSymbols = null) { +function hasConcreteFinding(text) { const value = String(text ?? ""); - const clauses = splitReviewClauses(value); + const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1492,7 +1400,6 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, - sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1517,7 +1424,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1569,7 +1476,6 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); - const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1631,7 +1537,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/relay/relay-gemini/scripts/lib/review-prompt.mjs b/relay/relay-gemini/scripts/lib/review-prompt.mjs index 9979630d..41540ce0 100644 --- a/relay/relay-gemini/scripts/lib/review-prompt.mjs +++ b/relay/relay-gemini/scripts/lib/review-prompt.mjs @@ -100,45 +100,6 @@ function sourceManifest(sourceFiles = []) { }); } -const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; - -function sourcePathBasename(path) { - const value = String(path ?? ""); - const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); - return slash === -1 ? value : value.slice(slash + 1); -} - -function sourcePathStem(basename) { - const value = String(basename ?? ""); - const dot = value.lastIndexOf("."); - return dot > 0 ? value.slice(0, dot) : value; -} - -function sourceSymbolIndex(sourceFiles = []) { - const files = Array.isArray(sourceFiles) ? sourceFiles : []; - if (files.length === 0) return null; - const identifiers = new Set(); - const basenames = new Set(); - for (const file of files) { - const basename = sourcePathBasename(file?.path).toLowerCase(); - if (basename) { - basenames.add(basename); - const stem = sourcePathStem(basename); - if (stem) basenames.add(stem); - } - const content = contentBuffer(file); - const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); - SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; - for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { - identifiers.add(match[0].toLowerCase()); - } - } - return Object.freeze({ - identifiers: Object.freeze(identifiers), - basenames: Object.freeze(basenames), - }); -} - function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1338,9 +1299,11 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) - || (code >= 65 && code <= 90) - || (code >= 97 && code <= 122) - || char === "_" - || char === "$" - || char === "-"; -} - -function isInternalCodeTokenPeriod(text, index) { - return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); -} - -function splitReviewClauses(text) { - const clauses = []; - let start = 0; - for (let index = 0; index < text.length; index += 1) { - const char = text[index]; - const split = char === "\n" - || char === ";" - || char === "!" - || char === "?" - || (char === "." && !isInternalCodeTokenPeriod(text, index)); - if (split) { - clauses.push(text.slice(start, index)); - start = index + 1; - } - } - clauses.push(text.slice(start)); - return clauses; -} - -function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { - for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { - const basename = sourcePathBasename(match[1]).toLowerCase(); - if (sourceSymbols?.basenames?.has(basename)) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { - if (sourceSymbols?.identifiers?.has(match[1].toLowerCase())) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { - // Member loci are grounded without retaining source text: both sides must be - // attested as identifiers somewhere in the selected source packet. - if ( - sourceSymbols?.identifiers?.has(match[1].toLowerCase()) - && sourceSymbols?.identifiers?.has(match[2].toLowerCase()) - ) { - return true; - } - } - return false; -} - -function hasConcreteFinding(text, sourceSymbols = null) { +function hasConcreteFinding(text) { const value = String(text ?? ""); - const clauses = splitReviewClauses(value); + const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1492,7 +1400,6 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, - sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1517,7 +1424,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1569,7 +1476,6 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); - const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1631,7 +1537,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/relay/relay-grok/scripts/lib/review-prompt.mjs b/relay/relay-grok/scripts/lib/review-prompt.mjs index 9979630d..41540ce0 100644 --- a/relay/relay-grok/scripts/lib/review-prompt.mjs +++ b/relay/relay-grok/scripts/lib/review-prompt.mjs @@ -100,45 +100,6 @@ function sourceManifest(sourceFiles = []) { }); } -const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; - -function sourcePathBasename(path) { - const value = String(path ?? ""); - const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); - return slash === -1 ? value : value.slice(slash + 1); -} - -function sourcePathStem(basename) { - const value = String(basename ?? ""); - const dot = value.lastIndexOf("."); - return dot > 0 ? value.slice(0, dot) : value; -} - -function sourceSymbolIndex(sourceFiles = []) { - const files = Array.isArray(sourceFiles) ? sourceFiles : []; - if (files.length === 0) return null; - const identifiers = new Set(); - const basenames = new Set(); - for (const file of files) { - const basename = sourcePathBasename(file?.path).toLowerCase(); - if (basename) { - basenames.add(basename); - const stem = sourcePathStem(basename); - if (stem) basenames.add(stem); - } - const content = contentBuffer(file); - const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); - SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; - for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { - identifiers.add(match[0].toLowerCase()); - } - } - return Object.freeze({ - identifiers: Object.freeze(identifiers), - basenames: Object.freeze(basenames), - }); -} - function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1338,9 +1299,11 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) - || (code >= 65 && code <= 90) - || (code >= 97 && code <= 122) - || char === "_" - || char === "$" - || char === "-"; -} - -function isInternalCodeTokenPeriod(text, index) { - return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); -} - -function splitReviewClauses(text) { - const clauses = []; - let start = 0; - for (let index = 0; index < text.length; index += 1) { - const char = text[index]; - const split = char === "\n" - || char === ";" - || char === "!" - || char === "?" - || (char === "." && !isInternalCodeTokenPeriod(text, index)); - if (split) { - clauses.push(text.slice(start, index)); - start = index + 1; - } - } - clauses.push(text.slice(start)); - return clauses; -} - -function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { - for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { - const basename = sourcePathBasename(match[1]).toLowerCase(); - if (sourceSymbols?.basenames?.has(basename)) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { - if (sourceSymbols?.identifiers?.has(match[1].toLowerCase())) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { - // Member loci are grounded without retaining source text: both sides must be - // attested as identifiers somewhere in the selected source packet. - if ( - sourceSymbols?.identifiers?.has(match[1].toLowerCase()) - && sourceSymbols?.identifiers?.has(match[2].toLowerCase()) - ) { - return true; - } - } - return false; -} - -function hasConcreteFinding(text, sourceSymbols = null) { +function hasConcreteFinding(text) { const value = String(text ?? ""); - const clauses = splitReviewClauses(value); + const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1492,7 +1400,6 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, - sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1517,7 +1424,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1569,7 +1476,6 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); - const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1631,7 +1537,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/relay/relay-kimi/scripts/lib/review-prompt.mjs b/relay/relay-kimi/scripts/lib/review-prompt.mjs index 9979630d..41540ce0 100644 --- a/relay/relay-kimi/scripts/lib/review-prompt.mjs +++ b/relay/relay-kimi/scripts/lib/review-prompt.mjs @@ -100,45 +100,6 @@ function sourceManifest(sourceFiles = []) { }); } -const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; - -function sourcePathBasename(path) { - const value = String(path ?? ""); - const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); - return slash === -1 ? value : value.slice(slash + 1); -} - -function sourcePathStem(basename) { - const value = String(basename ?? ""); - const dot = value.lastIndexOf("."); - return dot > 0 ? value.slice(0, dot) : value; -} - -function sourceSymbolIndex(sourceFiles = []) { - const files = Array.isArray(sourceFiles) ? sourceFiles : []; - if (files.length === 0) return null; - const identifiers = new Set(); - const basenames = new Set(); - for (const file of files) { - const basename = sourcePathBasename(file?.path).toLowerCase(); - if (basename) { - basenames.add(basename); - const stem = sourcePathStem(basename); - if (stem) basenames.add(stem); - } - const content = contentBuffer(file); - const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); - SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; - for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { - identifiers.add(match[0].toLowerCase()); - } - } - return Object.freeze({ - identifiers: Object.freeze(identifiers), - basenames: Object.freeze(basenames), - }); -} - function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1338,9 +1299,11 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) - || (code >= 65 && code <= 90) - || (code >= 97 && code <= 122) - || char === "_" - || char === "$" - || char === "-"; -} - -function isInternalCodeTokenPeriod(text, index) { - return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); -} - -function splitReviewClauses(text) { - const clauses = []; - let start = 0; - for (let index = 0; index < text.length; index += 1) { - const char = text[index]; - const split = char === "\n" - || char === ";" - || char === "!" - || char === "?" - || (char === "." && !isInternalCodeTokenPeriod(text, index)); - if (split) { - clauses.push(text.slice(start, index)); - start = index + 1; - } - } - clauses.push(text.slice(start)); - return clauses; -} - -function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { - for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { - const basename = sourcePathBasename(match[1]).toLowerCase(); - if (sourceSymbols?.basenames?.has(basename)) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { - if (sourceSymbols?.identifiers?.has(match[1].toLowerCase())) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { - // Member loci are grounded without retaining source text: both sides must be - // attested as identifiers somewhere in the selected source packet. - if ( - sourceSymbols?.identifiers?.has(match[1].toLowerCase()) - && sourceSymbols?.identifiers?.has(match[2].toLowerCase()) - ) { - return true; - } - } - return false; -} - -function hasConcreteFinding(text, sourceSymbols = null) { +function hasConcreteFinding(text) { const value = String(text ?? ""); - const clauses = splitReviewClauses(value); + const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1492,7 +1400,6 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, - sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1517,7 +1424,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1569,7 +1476,6 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); - const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1631,7 +1537,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/scripts/lib/review-prompt.mjs b/scripts/lib/review-prompt.mjs index 9979630d..41540ce0 100644 --- a/scripts/lib/review-prompt.mjs +++ b/scripts/lib/review-prompt.mjs @@ -100,45 +100,6 @@ function sourceManifest(sourceFiles = []) { }); } -const SOURCE_SYMBOL_IDENTIFIER = /[A-Za-z_$][\w$]{0,127}/g; - -function sourcePathBasename(path) { - const value = String(path ?? ""); - const slash = Math.max(value.lastIndexOf("/"), value.lastIndexOf("\\")); - return slash === -1 ? value : value.slice(slash + 1); -} - -function sourcePathStem(basename) { - const value = String(basename ?? ""); - const dot = value.lastIndexOf("."); - return dot > 0 ? value.slice(0, dot) : value; -} - -function sourceSymbolIndex(sourceFiles = []) { - const files = Array.isArray(sourceFiles) ? sourceFiles : []; - if (files.length === 0) return null; - const identifiers = new Set(); - const basenames = new Set(); - for (const file of files) { - const basename = sourcePathBasename(file?.path).toLowerCase(); - if (basename) { - basenames.add(basename); - const stem = sourcePathStem(basename); - if (stem) basenames.add(stem); - } - const content = contentBuffer(file); - const text = typeof file?.text === "string" ? file.text : content.toString("utf8"); - SOURCE_SYMBOL_IDENTIFIER.lastIndex = 0; - for (let match = SOURCE_SYMBOL_IDENTIFIER.exec(text); match; match = SOURCE_SYMBOL_IDENTIFIER.exec(text)) { - identifiers.add(match[0].toLowerCase()); - } - } - return Object.freeze({ - identifiers: Object.freeze(identifiers), - basenames: Object.freeze(basenames), - }); -} - function isWordBoundary(char) { if (!char) return true; const code = char.charCodeAt(0); @@ -1338,9 +1299,11 @@ function hasDefectCue(clause) { // text (verified: 0 divergences on the realistic corpus) and the bounded language is a // strict SUBSET of the unbounded one, so the detector still only narrows (fails toward // flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_PATH_LOCUS = /(?= 48 && code <= 57) - || (code >= 65 && code <= 90) - || (code >= 97 && code <= 122) - || char === "_" - || char === "$" - || char === "-"; -} - -function isInternalCodeTokenPeriod(text, index) { - return isCodeTokenChar(text[index - 1]) && isCodeTokenChar(text[index + 1]); -} - -function splitReviewClauses(text) { - const clauses = []; - let start = 0; - for (let index = 0; index < text.length; index += 1) { - const char = text[index]; - const split = char === "\n" - || char === ";" - || char === "!" - || char === "?" - || (char === "." && !isInternalCodeTokenPeriod(text, index)); - if (split) { - clauses.push(text.slice(start, index)); - start = index + 1; - } - } - clauses.push(text.slice(start)); - return clauses; -} - -function hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols = null) { - for (const match of clause.matchAll(CONCRETE_FINDING_PATH_LOCUS)) { - const basename = sourcePathBasename(match[1]).toLowerCase(); - if (sourceSymbols?.basenames?.has(basename)) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_CALL_LOCUS)) { - if (sourceSymbols?.identifiers?.has(match[1].toLowerCase())) return true; - } - for (const match of clause.matchAll(CONCRETE_FINDING_MEMBER_LOCUS)) { - // Member loci are grounded without retaining source text: both sides must be - // attested as identifiers somewhere in the selected source packet. - if ( - sourceSymbols?.identifiers?.has(match[1].toLowerCase()) - && sourceSymbols?.identifiers?.has(match[2].toLowerCase()) - ) { - return true; - } - } - return false; -} - -function hasConcreteFinding(text, sourceSymbols = null) { +function hasConcreteFinding(text) { const value = String(text ?? ""); - const clauses = splitReviewClauses(value); + const clauses = value.split(/[\n.;!?]+/); return clauses.some((clause) => { if (!hasDefectCue(clause)) return false; - if (!hasGroundedConcreteFindingCodeLocus(clause, sourceSymbols)) return false; + if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; // Contrast override: when the clause's praise/dismissal head is followed by an adversative // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), @@ -1492,7 +1400,6 @@ function qualityFlags({ status = null, errorCode = null, selectedSource = null, - sourceSymbols = null, } = {}) { const text = String(result ?? ""); const lowerText = normalizeReviewSearchText(text).toLowerCase(); @@ -1517,7 +1424,7 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text, sourceSymbols); + const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 && !conciseTinyReview @@ -1569,7 +1476,6 @@ export function buildReviewAuditManifest({ errorCode = null, } = {}) { const selectedSource = sourceManifest(sourceFiles); - const sourceSymbols = sourceSymbolIndex(sourceFiles); const renderedPromptHash = hashObject(prompt); const routeStep = route.routeStep ?? null; const routeSteps = Array.isArray(route.routeSteps) @@ -1631,7 +1537,7 @@ export function buildReviewAuditManifest({ "Do not launch another same-packet review until the packet is split, the provider is switched, the slot is waived, or an explicit override artifact is recorded.", }) : sourcePacketPolicy; - const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource, sourceSymbols }); + const reviewQuality = qualityFlags({ result, status, errorCode, selectedSource }); const effectiveErrorCode = errorCode ?? reviewQualityPacketRecoveryErrorCode(reviewQuality); const sourceContentTransmission = effectiveSourcePacketPolicy.source_send_allowed === false diff --git a/tests/unit/review-prompt.test.mjs b/tests/unit/review-prompt.test.mjs index 5f96a90a..e7c27088 100644 --- a/tests/unit/review-prompt.test.mjs +++ b/tests/unit/review-prompt.test.mjs @@ -4280,164 +4280,6 @@ test("root2 detector3: terse-but-concrete reviews do not flag shallow_output", a } }); -test("root2 detector3: fabricated concise concrete loci still demote against source-bearing packets", async () => { - const spoofs = [ - "Verdict: APPROVE. init() leaks a buffer.", - "Verdict: APPROVE. foo() should return early.", - "Verdict: APPROVE. x() throws.", - "Verdict: APPROVE. parse() throws on bad input.", - "Verdict: APPROVE. Looks good, but foo() could throw.", - "Verdict: APPROVE. nonexistentFunction() should return early.", - ]; - for (const [name, file] of REVIEW_PROMPT_MODULES) { - const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - for (const result of spoofs) { - const manifest = targetBuildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "src/sample.js", text: "export const value = 1;\n" }], - result, - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.looks_shallow, true, `[${name}] fabricated locus should look shallow for: ${result}`); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), - true, - `[${name}] shallow_output should be present for fabricated locus: ${result}`, - ); - assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] fabricated locus should fail the review slot: ${result}`); - assert.equal(manifest.review_slot.verdict, "failed_slot", `[${name}] fabricated locus should be demoted: ${result}`); - assert.equal(manifest.review_slot.not_counted_reason, "source_sent_unusable", `[${name}] fabricated locus should not count: ${result}`); - } - } -}); - -const NO_SOURCE_CONCISE_CASES = [ - ["spoof init", "Verdict: APPROVE. init() leaks a buffer."], - ["spoof foo", "Verdict: APPROVE. foo() should return early."], - ["genuine terse", "Verdict: REQUEST CHANGES. realHandler() drops the retry delay instead of preserving backoff."], -]; - -for (const [caseName, result] of NO_SOURCE_CONCISE_CASES) { - test(`root2 detector3: no-source concise concrete-looking review demotes without grounding (${caseName})`, async () => { - for (const [name, file] of REVIEW_PROMPT_MODULES) { - const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - const manifest = targetBuildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [], - result, - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.looks_shallow, true, `[${name}] no-source concise locus should look shallow for: ${result}`); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), - true, - `[${name}] no-source concise locus should include shallow_output for: ${result}`, - ); - assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] no-source concise locus should fail the review slot: ${result}`); - assert.equal(manifest.review_slot.verdict, "failed_slot", `[${name}] no-source concise locus should be demoted: ${result}`); - assert.notEqual(manifest.review_slot.not_counted_reason, "none", `[${name}] no-source concise locus should not count: ${result}`); - } - }); -} - -test("root2 detector3: no-source substantive reviews still count outside the concise exemption", async () => { - const substantiveReview = [ - "Verdict: REQUEST CHANGES.", - "Blocking finding: realHandler() drops the retry delay instead of preserving the configured backoff.", - "The failure path retries immediately, which can create a tight loop when the upstream service is down.", - "The fix should carry the computed delay into the retry scheduler and preserve the cancellation check before enqueueing the next attempt.", - "I also checked the non-blocking notes and did not find any unrelated concerns that need to block this change.", - ROOT2_PAD, - ROOT2_PAD, - ].join("\n"); - assert.ok(substantiveReview.length >= 500, "test review must stay outside the concise-review threshold"); - for (const [name, file] of REVIEW_PROMPT_MODULES) { - const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - const manifest = targetBuildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [], - result: substantiveReview, - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.looks_shallow, false, `[${name}] no-source substantive review must not look shallow`); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), - false, - `[${name}] no-source substantive review must not include shallow_output`, - ); - assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] no-source substantive review should count`); - assert.equal(manifest.review_slot.verdict, "request_changes", `[${name}] no-source substantive request-changes review should count`); - assert.equal(manifest.review_slot.not_counted_reason, "none", `[${name}] no-source substantive review should have no not-counted reason`); - } -}); - -test("root2 detector3: attested concise source loci still count after grounding", async () => { - const sourceFiles = [{ - path: "src/cart.js", - text: [ - "export function total(items) {", - " return items.reduce((sum, item) => sum + item.price, 0);", - "}", - "export function validateInput(value) {", - " return value;", - "}", - "export const cart = { total };", - "cart.total = total;", - ].join("\n"), - }]; - const cases = [ - "Verdict: REQUEST_CHANGES. src/cart.js:42 returns the wrong total instead of adding tax.", - "Verdict: REQUEST CHANGES. total() subtracts discounts instead of adding them.", - "Verdict: REQUEST CHANGES. cart.total drops tax instead of adding it.", - "Verdict: REQUEST_CHANGES. The optional path around validateInput() should not return early for empty arrays.", - ]; - for (const [name, file] of REVIEW_PROMPT_MODULES) { - const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - for (const result of cases) { - const manifest = targetBuildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles, - result, - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.looks_shallow, false, `[${name}] attested locus should not look shallow for: ${result}`); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), - false, - `[${name}] shallow_output should be absent for attested locus: ${result}`, - ); - assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] attested locus should count: ${result}`); - assert.equal(manifest.review_slot.verdict, "request_changes", `[${name}] request-changes review should count: ${result}`); - assert.equal(manifest.review_slot.not_counted_reason, "none", `[${name}] attested locus should have no not-counted reason: ${result}`); - } - } -}); - -test("root2 detector3: source symbol grounding remains linear on large source packets", async () => { - const largeSource = `${"const attested = 1;\n".repeat(10000)}export function realHandler() { return attested; }\n`; - for (const [name, file] of REVIEW_PROMPT_MODULES) { - const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - const start = process.hrtime.bigint(); - const manifest = targetBuildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: "src/large.js", text: largeSource }], - result: "Verdict: APPROVE. fabricatedMissing() throws on empty input.", - status: "completed", - errorCode: null, - }); - const elapsedMs = Number(process.hrtime.bigint() - start) / 1e6; - assert.ok( - elapsedMs < 2000, - `[${name}] source symbol grounding took ${elapsedMs.toFixed(1)}ms on a large source packet; extraction must stay linear.`, - ); - assert.equal(manifest.review_quality.looks_shallow, true, `[${name}] fabricated locus should still demote after large source extraction`); - } -}); - test("root2 detector3: bare-LGTM with no verdict still flags shallow_output", async () => { // Also yields missing_verdict (Root-3-owned); assert only shallow_output here. for (const [name, file] of REVIEW_PROMPT_MODULES) { @@ -4539,7 +4381,7 @@ test("root2 detector3: negated-finding variant does not count as concrete (still }); test("root2 detector3: bounded code-locus regexes stay linear-time on adversarial input (ReDoS S5852 guard)", async () => { - // The concrete-finding code-locus regexes run on adversarial external-review text. After bounding every + // CONCRETE_FINDING_CODE_LOCUS runs on adversarial external-review text. After bounding every // quantifier (the S5852 fix), all three locus regexes must stay linear-time. This input is a // defect-cue-bearing clause (so hasConcreteFinding evaluates every locus regex) followed by a // 200k-char pathological run with no terminating dot/paren — forcing each regex to scan to the @@ -4577,26 +4419,23 @@ test("root2 detector3: long-but-realistic call loci still escape the shallow fla const cases = [ { selected: "src/services/auth/scheduler.js", - text: "export function validateAndRefreshAuthToken() { return Date.now(); }\n", result: "Verdict: REQUEST CHANGES. The function validateAndRefreshAuthToken() returns the wrong expiry instead of the computed deadline", }, { selected: "lib/persistence/pool.js", - text: "export function acquireConnectionWithRetry() { return openSocket(); }\n", result: "Request changes: acquireConnectionWithRetry() leaks the socket and the cleanup path swallows the close error", }, { selected: "webhooks.js", - text: "export function processIncomingWebhookPayload() { return true; }\n", result: "Verdict: REQUEST CHANGES. processIncomingWebhookPayload() drops the signature header instead of validating it first", }, ]; for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - for (const { selected, text, result } of cases) { + for (const { selected, result } of cases) { const manifest = targetBuildReviewAuditManifest({ prompt: "rendered prompt", - sourceFiles: [{ path: selected, text }], + sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], result, status: "completed", errorCode: null, @@ -4617,9 +4456,9 @@ test("root2 detector3: negation-bearing defect cues do not mis-flag concrete rev // "does not free") must not trip CONCRETE_FINDING_NEGATION when they carry a real call locus. // Guard: a clause that strips to a GENUINE negation/absence must STAY flagged (no over-rescue). const NOT_SHALLOW = [ - { selected: "socket.js", text: "export function close() { return true; }\n", result: "Verdict: REQUEST CHANGES. The socket close() is never called on the error path" }, - { selected: "validator.js", text: "export function validateInput() { return true; }\n", result: "Verdict: REQUEST CHANGES. validateInput() should not return early on empty arrays" }, - { selected: "pool.js", text: "export function acquire() { return true; }\n", result: "Verdict: REQUEST CHANGES. acquire() does not free the slot when the request times out" }, + { selected: "socket.js", result: "Verdict: REQUEST CHANGES. The socket close() is never called on the error path" }, + { selected: "validator.js", result: "Verdict: REQUEST CHANGES. validateInput() should not return early on empty arrays" }, + { selected: "pool.js", result: "Verdict: REQUEST CHANGES. acquire() does not free the slot when the request times out" }, ]; const STILL_SHALLOW = [ { selected: "socket.js", result: "Verdict: APPROVE. close() is never called but that is no real problem here" }, @@ -4627,10 +4466,10 @@ test("root2 detector3: negation-bearing defect cues do not mis-flag concrete rev ]; for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - for (const { selected, text, result } of NOT_SHALLOW) { + for (const { selected, result } of NOT_SHALLOW) { const manifest = targetBuildReviewAuditManifest({ prompt: "rendered prompt", - sourceFiles: [{ path: selected, text }], + sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], result, status: "completed", errorCode: null, @@ -4718,15 +4557,6 @@ test("root2 detector3: praise/confirmation reusing defect vocabulary flags shall "Verdict: REQUEST_CHANGES\nlookup() returns the wrong index when none of the keys match, instead of throwing.", "Verdict: REQUEST_CHANGES\nthe happy path of encode() works as expected, but the empty-input branch throws and crashes.", ]; - const cleanSource = [ - "export function close() { return true; }", - "export function acquire() { return true; }", - "export function validateInput() { return true; }", - "export function parseInt() { return 1; }", - "export function indexInto() { return 1; }", - "export function lookup() { return 1; }", - "export function encode() { return true; }", - ].join("\n"); for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: target } = await loadReviewPromptModule(file); for (const result of FLAG) { @@ -4734,7 +4564,7 @@ test("root2 detector3: praise/confirmation reusing defect vocabulary flags shall assert.equal(m.review_quality.semantic_failure_reasons.includes("shallow_output"), true, `[${name}] shallow_output should be present for: ${result}`); } for (const result of CLEAN) { - const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: cleanSource }], result, status: "completed", errorCode: null }); + const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: "export const value = 1;\n" }], result, status: "completed", errorCode: null }); assert.equal(m.review_quality.looks_shallow, false, `[${name}] real finding must stay clean for: ${result}`); } } @@ -4853,7 +4683,7 @@ test("root2 detector3: hasDefectCue split-identity — every defect-cue alternat const { buildReviewAuditManifest: target } = await loadReviewPromptModule(file); for (const cue of CUES) { const result = `Verdict: REQUEST CHANGES\nnextPage() ${cue}.`; - const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: "export function nextPage() { return 1; }\n" }], result, status: "completed", errorCode: null }); + const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: "export const value = 1;\n" }], result, status: "completed", errorCode: null }); assert.equal(m.review_quality.looks_shallow, false, `[${name}] defect cue must register a concrete finding: ${cue}`); } } From b6f2524e35a7748343eea2d6e2ee4d6a5c8a4f60 Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 25 Jun 2026 16:50:53 +0900 Subject: [PATCH 15/15] fix(review): remove spoofable conciseConcreteReview exemption (#235) A <500-char review can no longer satisfy a slot via a concrete-finding-shaped clause (panel-proven ~88% spoofable + over-suppresses real reviews). Short non-tiny reviews now ride the existing looks_shallow -> failed_review_slot -> failed_slot path. Objective conciseTinyReview (tiny-source) exemption kept. Stage 2 (model judge) will restore safe short-review recognition. --- plugins/agy/scripts/lib/review-prompt.mjs | 114 +------ .../scripts/lib/review-prompt.mjs | 114 +------ plugins/claude/scripts/lib/review-prompt.mjs | 114 +------ plugins/gemini/scripts/lib/review-prompt.mjs | 114 +------ plugins/grok/scripts/lib/review-prompt.mjs | 114 +------ plugins/kimi/scripts/lib/review-prompt.mjs | 114 +------ relay/relay-agy/scripts/lib/review-prompt.mjs | 114 +------ .../scripts/lib/review-prompt.mjs | 114 +------ .../relay-grok/scripts/lib/review-prompt.mjs | 114 +------ .../relay-kimi/scripts/lib/review-prompt.mjs | 114 +------ scripts/lib/review-prompt.mjs | 114 +------ tests/unit/companion-common.test.mjs | 4 + tests/unit/review-prompt.test.mjs | 305 +++--------------- 13 files changed, 53 insertions(+), 1510 deletions(-) diff --git a/plugins/agy/scripts/lib/review-prompt.mjs b/plugins/agy/scripts/lib/review-prompt.mjs index 41540ce0..683816fe 100644 --- a/plugins/agy/scripts/lib/review-prompt.mjs +++ b/plugins/agy/scripts/lib/review-prompt.mjs @@ -1274,116 +1274,6 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; -// A short review is substantive (not shallow) when SOME single clause names a -// concrete code locus AND describes a specific defect/change at it, and that -// clause is not a negation/absence/praise assertion. Requires co-location so a -// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something -// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing -// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only -// asserts correctness stays flagged (conservative — fail toward flagging). -// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue -// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. -const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; -const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; -const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; -function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) - || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); -} -// Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): -// these run on adversarial external-review text, so each must be provably linear-time -// (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, -// identifier 128, inter-token whitespace 16) sit far above any real path/identifier, so -// bounding only clips pathological >bound runs — it never changes a match on real review -// text (verified: 0 divergences on the realistic corpus) and the bounded language is a -// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward -// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark -// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under -// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a -// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", -// "none of the keys") does not suppress it. -const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns -// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it -// unmatchable inside contractions ("isn't"), so it never fired. -const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; -const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; -const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; -// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch -// above handles "should not "; the past-participle impact words are a distinct surface that the -// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. -// Kept as a separate small pattern so the split-identity property test below pins every alternative. -const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; -const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; -const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; -const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; -const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; -const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; -const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; - -function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT_A.test(clause) - || DISMISSAL_NEGATED_DEFECT_B.test(clause) - || DISMISSAL_SHOULD_NOT.test(clause) - || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) - || DISMISSAL_NOTHING.test(clause) - || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_NO_X.test(clause) - || DISMISSAL_CORRECTLY_A.test(clause) - || DISMISSAL_CORRECTLY_B.test(clause) - || DISMISSAL_LOOKS.test(clause) - || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); -} -function clauseIsPraiseOrDismissal(clause) { - return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); -} -function firstContrastIndex(lowerClause) { - let best = -1; - for (const word of CONTRAST_WORDS) { - const at = lowerClause.indexOf(word); - if (at !== -1 && (best === -1 || at < best)) best = at; - } - return best; -} - -function hasConcreteFinding(text) { - const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); - return clauses.some((clause) => { - if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; - // Contrast override: when the clause's praise/dismissal head is followed by an adversative - // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the - // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), - // so the head marker must not suppress it. - const advIdx = firstContrastIndex(clause.toLowerCase()); - if (advIdx !== -1) { - const tail = clause.slice(advIdx); - if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; - } - return !clauseIsPraiseOrDismissal(clause); - }); -} - function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1424,11 +1314,9 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview - && !conciseConcreteReview; + && !conciseTinyReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/plugins/api-reviewers/scripts/lib/review-prompt.mjs b/plugins/api-reviewers/scripts/lib/review-prompt.mjs index 41540ce0..683816fe 100644 --- a/plugins/api-reviewers/scripts/lib/review-prompt.mjs +++ b/plugins/api-reviewers/scripts/lib/review-prompt.mjs @@ -1274,116 +1274,6 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; -// A short review is substantive (not shallow) when SOME single clause names a -// concrete code locus AND describes a specific defect/change at it, and that -// clause is not a negation/absence/praise assertion. Requires co-location so a -// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something -// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing -// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only -// asserts correctness stays flagged (conservative — fail toward flagging). -// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue -// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. -const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; -const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; -const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; -function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) - || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); -} -// Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): -// these run on adversarial external-review text, so each must be provably linear-time -// (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, -// identifier 128, inter-token whitespace 16) sit far above any real path/identifier, so -// bounding only clips pathological >bound runs — it never changes a match on real review -// text (verified: 0 divergences on the realistic corpus) and the bounded language is a -// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward -// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark -// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under -// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a -// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", -// "none of the keys") does not suppress it. -const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns -// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it -// unmatchable inside contractions ("isn't"), so it never fired. -const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; -const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; -const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; -// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch -// above handles "should not "; the past-participle impact words are a distinct surface that the -// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. -// Kept as a separate small pattern so the split-identity property test below pins every alternative. -const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; -const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; -const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; -const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; -const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; -const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; -const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; - -function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT_A.test(clause) - || DISMISSAL_NEGATED_DEFECT_B.test(clause) - || DISMISSAL_SHOULD_NOT.test(clause) - || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) - || DISMISSAL_NOTHING.test(clause) - || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_NO_X.test(clause) - || DISMISSAL_CORRECTLY_A.test(clause) - || DISMISSAL_CORRECTLY_B.test(clause) - || DISMISSAL_LOOKS.test(clause) - || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); -} -function clauseIsPraiseOrDismissal(clause) { - return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); -} -function firstContrastIndex(lowerClause) { - let best = -1; - for (const word of CONTRAST_WORDS) { - const at = lowerClause.indexOf(word); - if (at !== -1 && (best === -1 || at < best)) best = at; - } - return best; -} - -function hasConcreteFinding(text) { - const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); - return clauses.some((clause) => { - if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; - // Contrast override: when the clause's praise/dismissal head is followed by an adversative - // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the - // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), - // so the head marker must not suppress it. - const advIdx = firstContrastIndex(clause.toLowerCase()); - if (advIdx !== -1) { - const tail = clause.slice(advIdx); - if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; - } - return !clauseIsPraiseOrDismissal(clause); - }); -} - function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1424,11 +1314,9 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview - && !conciseConcreteReview; + && !conciseTinyReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/plugins/claude/scripts/lib/review-prompt.mjs b/plugins/claude/scripts/lib/review-prompt.mjs index 41540ce0..683816fe 100644 --- a/plugins/claude/scripts/lib/review-prompt.mjs +++ b/plugins/claude/scripts/lib/review-prompt.mjs @@ -1274,116 +1274,6 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; -// A short review is substantive (not shallow) when SOME single clause names a -// concrete code locus AND describes a specific defect/change at it, and that -// clause is not a negation/absence/praise assertion. Requires co-location so a -// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something -// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing -// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only -// asserts correctness stays flagged (conservative — fail toward flagging). -// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue -// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. -const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; -const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; -const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; -function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) - || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); -} -// Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): -// these run on adversarial external-review text, so each must be provably linear-time -// (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, -// identifier 128, inter-token whitespace 16) sit far above any real path/identifier, so -// bounding only clips pathological >bound runs — it never changes a match on real review -// text (verified: 0 divergences on the realistic corpus) and the bounded language is a -// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward -// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark -// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under -// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a -// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", -// "none of the keys") does not suppress it. -const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns -// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it -// unmatchable inside contractions ("isn't"), so it never fired. -const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; -const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; -const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; -// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch -// above handles "should not "; the past-participle impact words are a distinct surface that the -// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. -// Kept as a separate small pattern so the split-identity property test below pins every alternative. -const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; -const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; -const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; -const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; -const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; -const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; -const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; - -function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT_A.test(clause) - || DISMISSAL_NEGATED_DEFECT_B.test(clause) - || DISMISSAL_SHOULD_NOT.test(clause) - || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) - || DISMISSAL_NOTHING.test(clause) - || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_NO_X.test(clause) - || DISMISSAL_CORRECTLY_A.test(clause) - || DISMISSAL_CORRECTLY_B.test(clause) - || DISMISSAL_LOOKS.test(clause) - || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); -} -function clauseIsPraiseOrDismissal(clause) { - return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); -} -function firstContrastIndex(lowerClause) { - let best = -1; - for (const word of CONTRAST_WORDS) { - const at = lowerClause.indexOf(word); - if (at !== -1 && (best === -1 || at < best)) best = at; - } - return best; -} - -function hasConcreteFinding(text) { - const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); - return clauses.some((clause) => { - if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; - // Contrast override: when the clause's praise/dismissal head is followed by an adversative - // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the - // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), - // so the head marker must not suppress it. - const advIdx = firstContrastIndex(clause.toLowerCase()); - if (advIdx !== -1) { - const tail = clause.slice(advIdx); - if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; - } - return !clauseIsPraiseOrDismissal(clause); - }); -} - function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1424,11 +1314,9 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview - && !conciseConcreteReview; + && !conciseTinyReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/plugins/gemini/scripts/lib/review-prompt.mjs b/plugins/gemini/scripts/lib/review-prompt.mjs index 41540ce0..683816fe 100644 --- a/plugins/gemini/scripts/lib/review-prompt.mjs +++ b/plugins/gemini/scripts/lib/review-prompt.mjs @@ -1274,116 +1274,6 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; -// A short review is substantive (not shallow) when SOME single clause names a -// concrete code locus AND describes a specific defect/change at it, and that -// clause is not a negation/absence/praise assertion. Requires co-location so a -// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something -// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing -// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only -// asserts correctness stays flagged (conservative — fail toward flagging). -// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue -// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. -const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; -const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; -const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; -function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) - || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); -} -// Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): -// these run on adversarial external-review text, so each must be provably linear-time -// (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, -// identifier 128, inter-token whitespace 16) sit far above any real path/identifier, so -// bounding only clips pathological >bound runs — it never changes a match on real review -// text (verified: 0 divergences on the realistic corpus) and the bounded language is a -// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward -// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark -// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under -// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a -// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", -// "none of the keys") does not suppress it. -const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns -// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it -// unmatchable inside contractions ("isn't"), so it never fired. -const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; -const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; -const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; -// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch -// above handles "should not "; the past-participle impact words are a distinct surface that the -// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. -// Kept as a separate small pattern so the split-identity property test below pins every alternative. -const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; -const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; -const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; -const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; -const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; -const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; -const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; - -function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT_A.test(clause) - || DISMISSAL_NEGATED_DEFECT_B.test(clause) - || DISMISSAL_SHOULD_NOT.test(clause) - || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) - || DISMISSAL_NOTHING.test(clause) - || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_NO_X.test(clause) - || DISMISSAL_CORRECTLY_A.test(clause) - || DISMISSAL_CORRECTLY_B.test(clause) - || DISMISSAL_LOOKS.test(clause) - || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); -} -function clauseIsPraiseOrDismissal(clause) { - return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); -} -function firstContrastIndex(lowerClause) { - let best = -1; - for (const word of CONTRAST_WORDS) { - const at = lowerClause.indexOf(word); - if (at !== -1 && (best === -1 || at < best)) best = at; - } - return best; -} - -function hasConcreteFinding(text) { - const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); - return clauses.some((clause) => { - if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; - // Contrast override: when the clause's praise/dismissal head is followed by an adversative - // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the - // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), - // so the head marker must not suppress it. - const advIdx = firstContrastIndex(clause.toLowerCase()); - if (advIdx !== -1) { - const tail = clause.slice(advIdx); - if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; - } - return !clauseIsPraiseOrDismissal(clause); - }); -} - function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1424,11 +1314,9 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview - && !conciseConcreteReview; + && !conciseTinyReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/plugins/grok/scripts/lib/review-prompt.mjs b/plugins/grok/scripts/lib/review-prompt.mjs index 41540ce0..683816fe 100644 --- a/plugins/grok/scripts/lib/review-prompt.mjs +++ b/plugins/grok/scripts/lib/review-prompt.mjs @@ -1274,116 +1274,6 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; -// A short review is substantive (not shallow) when SOME single clause names a -// concrete code locus AND describes a specific defect/change at it, and that -// clause is not a negation/absence/praise assertion. Requires co-location so a -// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something -// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing -// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only -// asserts correctness stays flagged (conservative — fail toward flagging). -// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue -// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. -const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; -const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; -const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; -function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) - || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); -} -// Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): -// these run on adversarial external-review text, so each must be provably linear-time -// (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, -// identifier 128, inter-token whitespace 16) sit far above any real path/identifier, so -// bounding only clips pathological >bound runs — it never changes a match on real review -// text (verified: 0 divergences on the realistic corpus) and the bounded language is a -// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward -// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark -// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under -// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a -// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", -// "none of the keys") does not suppress it. -const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns -// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it -// unmatchable inside contractions ("isn't"), so it never fired. -const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; -const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; -const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; -// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch -// above handles "should not "; the past-participle impact words are a distinct surface that the -// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. -// Kept as a separate small pattern so the split-identity property test below pins every alternative. -const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; -const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; -const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; -const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; -const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; -const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; -const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; - -function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT_A.test(clause) - || DISMISSAL_NEGATED_DEFECT_B.test(clause) - || DISMISSAL_SHOULD_NOT.test(clause) - || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) - || DISMISSAL_NOTHING.test(clause) - || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_NO_X.test(clause) - || DISMISSAL_CORRECTLY_A.test(clause) - || DISMISSAL_CORRECTLY_B.test(clause) - || DISMISSAL_LOOKS.test(clause) - || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); -} -function clauseIsPraiseOrDismissal(clause) { - return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); -} -function firstContrastIndex(lowerClause) { - let best = -1; - for (const word of CONTRAST_WORDS) { - const at = lowerClause.indexOf(word); - if (at !== -1 && (best === -1 || at < best)) best = at; - } - return best; -} - -function hasConcreteFinding(text) { - const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); - return clauses.some((clause) => { - if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; - // Contrast override: when the clause's praise/dismissal head is followed by an adversative - // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the - // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), - // so the head marker must not suppress it. - const advIdx = firstContrastIndex(clause.toLowerCase()); - if (advIdx !== -1) { - const tail = clause.slice(advIdx); - if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; - } - return !clauseIsPraiseOrDismissal(clause); - }); -} - function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1424,11 +1314,9 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview - && !conciseConcreteReview; + && !conciseTinyReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/plugins/kimi/scripts/lib/review-prompt.mjs b/plugins/kimi/scripts/lib/review-prompt.mjs index 41540ce0..683816fe 100644 --- a/plugins/kimi/scripts/lib/review-prompt.mjs +++ b/plugins/kimi/scripts/lib/review-prompt.mjs @@ -1274,116 +1274,6 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; -// A short review is substantive (not shallow) when SOME single clause names a -// concrete code locus AND describes a specific defect/change at it, and that -// clause is not a negation/absence/praise assertion. Requires co-location so a -// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something -// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing -// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only -// asserts correctness stays flagged (conservative — fail toward flagging). -// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue -// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. -const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; -const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; -const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; -function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) - || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); -} -// Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): -// these run on adversarial external-review text, so each must be provably linear-time -// (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, -// identifier 128, inter-token whitespace 16) sit far above any real path/identifier, so -// bounding only clips pathological >bound runs — it never changes a match on real review -// text (verified: 0 divergences on the realistic corpus) and the bounded language is a -// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward -// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark -// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under -// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a -// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", -// "none of the keys") does not suppress it. -const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns -// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it -// unmatchable inside contractions ("isn't"), so it never fired. -const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; -const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; -const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; -// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch -// above handles "should not "; the past-participle impact words are a distinct surface that the -// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. -// Kept as a separate small pattern so the split-identity property test below pins every alternative. -const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; -const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; -const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; -const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; -const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; -const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; -const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; - -function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT_A.test(clause) - || DISMISSAL_NEGATED_DEFECT_B.test(clause) - || DISMISSAL_SHOULD_NOT.test(clause) - || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) - || DISMISSAL_NOTHING.test(clause) - || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_NO_X.test(clause) - || DISMISSAL_CORRECTLY_A.test(clause) - || DISMISSAL_CORRECTLY_B.test(clause) - || DISMISSAL_LOOKS.test(clause) - || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); -} -function clauseIsPraiseOrDismissal(clause) { - return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); -} -function firstContrastIndex(lowerClause) { - let best = -1; - for (const word of CONTRAST_WORDS) { - const at = lowerClause.indexOf(word); - if (at !== -1 && (best === -1 || at < best)) best = at; - } - return best; -} - -function hasConcreteFinding(text) { - const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); - return clauses.some((clause) => { - if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; - // Contrast override: when the clause's praise/dismissal head is followed by an adversative - // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the - // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), - // so the head marker must not suppress it. - const advIdx = firstContrastIndex(clause.toLowerCase()); - if (advIdx !== -1) { - const tail = clause.slice(advIdx); - if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; - } - return !clauseIsPraiseOrDismissal(clause); - }); -} - function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1424,11 +1314,9 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview - && !conciseConcreteReview; + && !conciseTinyReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/relay/relay-agy/scripts/lib/review-prompt.mjs b/relay/relay-agy/scripts/lib/review-prompt.mjs index 41540ce0..683816fe 100644 --- a/relay/relay-agy/scripts/lib/review-prompt.mjs +++ b/relay/relay-agy/scripts/lib/review-prompt.mjs @@ -1274,116 +1274,6 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; -// A short review is substantive (not shallow) when SOME single clause names a -// concrete code locus AND describes a specific defect/change at it, and that -// clause is not a negation/absence/praise assertion. Requires co-location so a -// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something -// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing -// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only -// asserts correctness stays flagged (conservative — fail toward flagging). -// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue -// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. -const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; -const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; -const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; -function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) - || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); -} -// Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): -// these run on adversarial external-review text, so each must be provably linear-time -// (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, -// identifier 128, inter-token whitespace 16) sit far above any real path/identifier, so -// bounding only clips pathological >bound runs — it never changes a match on real review -// text (verified: 0 divergences on the realistic corpus) and the bounded language is a -// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward -// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark -// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under -// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a -// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", -// "none of the keys") does not suppress it. -const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns -// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it -// unmatchable inside contractions ("isn't"), so it never fired. -const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; -const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; -const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; -// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch -// above handles "should not "; the past-participle impact words are a distinct surface that the -// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. -// Kept as a separate small pattern so the split-identity property test below pins every alternative. -const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; -const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; -const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; -const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; -const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; -const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; -const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; - -function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT_A.test(clause) - || DISMISSAL_NEGATED_DEFECT_B.test(clause) - || DISMISSAL_SHOULD_NOT.test(clause) - || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) - || DISMISSAL_NOTHING.test(clause) - || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_NO_X.test(clause) - || DISMISSAL_CORRECTLY_A.test(clause) - || DISMISSAL_CORRECTLY_B.test(clause) - || DISMISSAL_LOOKS.test(clause) - || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); -} -function clauseIsPraiseOrDismissal(clause) { - return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); -} -function firstContrastIndex(lowerClause) { - let best = -1; - for (const word of CONTRAST_WORDS) { - const at = lowerClause.indexOf(word); - if (at !== -1 && (best === -1 || at < best)) best = at; - } - return best; -} - -function hasConcreteFinding(text) { - const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); - return clauses.some((clause) => { - if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; - // Contrast override: when the clause's praise/dismissal head is followed by an adversative - // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the - // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), - // so the head marker must not suppress it. - const advIdx = firstContrastIndex(clause.toLowerCase()); - if (advIdx !== -1) { - const tail = clause.slice(advIdx); - if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; - } - return !clauseIsPraiseOrDismissal(clause); - }); -} - function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1424,11 +1314,9 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview - && !conciseConcreteReview; + && !conciseTinyReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/relay/relay-gemini/scripts/lib/review-prompt.mjs b/relay/relay-gemini/scripts/lib/review-prompt.mjs index 41540ce0..683816fe 100644 --- a/relay/relay-gemini/scripts/lib/review-prompt.mjs +++ b/relay/relay-gemini/scripts/lib/review-prompt.mjs @@ -1274,116 +1274,6 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; -// A short review is substantive (not shallow) when SOME single clause names a -// concrete code locus AND describes a specific defect/change at it, and that -// clause is not a negation/absence/praise assertion. Requires co-location so a -// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something -// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing -// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only -// asserts correctness stays flagged (conservative — fail toward flagging). -// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue -// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. -const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; -const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; -const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; -function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) - || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); -} -// Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): -// these run on adversarial external-review text, so each must be provably linear-time -// (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, -// identifier 128, inter-token whitespace 16) sit far above any real path/identifier, so -// bounding only clips pathological >bound runs — it never changes a match on real review -// text (verified: 0 divergences on the realistic corpus) and the bounded language is a -// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward -// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark -// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under -// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a -// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", -// "none of the keys") does not suppress it. -const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns -// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it -// unmatchable inside contractions ("isn't"), so it never fired. -const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; -const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; -const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; -// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch -// above handles "should not "; the past-participle impact words are a distinct surface that the -// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. -// Kept as a separate small pattern so the split-identity property test below pins every alternative. -const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; -const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; -const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; -const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; -const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; -const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; -const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; - -function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT_A.test(clause) - || DISMISSAL_NEGATED_DEFECT_B.test(clause) - || DISMISSAL_SHOULD_NOT.test(clause) - || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) - || DISMISSAL_NOTHING.test(clause) - || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_NO_X.test(clause) - || DISMISSAL_CORRECTLY_A.test(clause) - || DISMISSAL_CORRECTLY_B.test(clause) - || DISMISSAL_LOOKS.test(clause) - || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); -} -function clauseIsPraiseOrDismissal(clause) { - return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); -} -function firstContrastIndex(lowerClause) { - let best = -1; - for (const word of CONTRAST_WORDS) { - const at = lowerClause.indexOf(word); - if (at !== -1 && (best === -1 || at < best)) best = at; - } - return best; -} - -function hasConcreteFinding(text) { - const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); - return clauses.some((clause) => { - if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; - // Contrast override: when the clause's praise/dismissal head is followed by an adversative - // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the - // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), - // so the head marker must not suppress it. - const advIdx = firstContrastIndex(clause.toLowerCase()); - if (advIdx !== -1) { - const tail = clause.slice(advIdx); - if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; - } - return !clauseIsPraiseOrDismissal(clause); - }); -} - function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1424,11 +1314,9 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview - && !conciseConcreteReview; + && !conciseTinyReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/relay/relay-grok/scripts/lib/review-prompt.mjs b/relay/relay-grok/scripts/lib/review-prompt.mjs index 41540ce0..683816fe 100644 --- a/relay/relay-grok/scripts/lib/review-prompt.mjs +++ b/relay/relay-grok/scripts/lib/review-prompt.mjs @@ -1274,116 +1274,6 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; -// A short review is substantive (not shallow) when SOME single clause names a -// concrete code locus AND describes a specific defect/change at it, and that -// clause is not a negation/absence/praise assertion. Requires co-location so a -// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something -// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing -// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only -// asserts correctness stays flagged (conservative — fail toward flagging). -// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue -// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. -const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; -const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; -const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; -function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) - || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); -} -// Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): -// these run on adversarial external-review text, so each must be provably linear-time -// (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, -// identifier 128, inter-token whitespace 16) sit far above any real path/identifier, so -// bounding only clips pathological >bound runs — it never changes a match on real review -// text (verified: 0 divergences on the realistic corpus) and the bounded language is a -// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward -// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark -// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under -// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a -// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", -// "none of the keys") does not suppress it. -const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns -// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it -// unmatchable inside contractions ("isn't"), so it never fired. -const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; -const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; -const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; -// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch -// above handles "should not "; the past-participle impact words are a distinct surface that the -// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. -// Kept as a separate small pattern so the split-identity property test below pins every alternative. -const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; -const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; -const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; -const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; -const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; -const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; -const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; - -function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT_A.test(clause) - || DISMISSAL_NEGATED_DEFECT_B.test(clause) - || DISMISSAL_SHOULD_NOT.test(clause) - || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) - || DISMISSAL_NOTHING.test(clause) - || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_NO_X.test(clause) - || DISMISSAL_CORRECTLY_A.test(clause) - || DISMISSAL_CORRECTLY_B.test(clause) - || DISMISSAL_LOOKS.test(clause) - || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); -} -function clauseIsPraiseOrDismissal(clause) { - return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); -} -function firstContrastIndex(lowerClause) { - let best = -1; - for (const word of CONTRAST_WORDS) { - const at = lowerClause.indexOf(word); - if (at !== -1 && (best === -1 || at < best)) best = at; - } - return best; -} - -function hasConcreteFinding(text) { - const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); - return clauses.some((clause) => { - if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; - // Contrast override: when the clause's praise/dismissal head is followed by an adversative - // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the - // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), - // so the head marker must not suppress it. - const advIdx = firstContrastIndex(clause.toLowerCase()); - if (advIdx !== -1) { - const tail = clause.slice(advIdx); - if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; - } - return !clauseIsPraiseOrDismissal(clause); - }); -} - function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1424,11 +1314,9 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview - && !conciseConcreteReview; + && !conciseTinyReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/relay/relay-kimi/scripts/lib/review-prompt.mjs b/relay/relay-kimi/scripts/lib/review-prompt.mjs index 41540ce0..683816fe 100644 --- a/relay/relay-kimi/scripts/lib/review-prompt.mjs +++ b/relay/relay-kimi/scripts/lib/review-prompt.mjs @@ -1274,116 +1274,6 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; -// A short review is substantive (not shallow) when SOME single clause names a -// concrete code locus AND describes a specific defect/change at it, and that -// clause is not a negation/absence/praise assertion. Requires co-location so a -// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something -// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing -// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only -// asserts correctness stays flagged (conservative — fail toward flagging). -// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue -// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. -const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; -const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; -const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; -function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) - || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); -} -// Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): -// these run on adversarial external-review text, so each must be provably linear-time -// (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, -// identifier 128, inter-token whitespace 16) sit far above any real path/identifier, so -// bounding only clips pathological >bound runs — it never changes a match on real review -// text (verified: 0 divergences on the realistic corpus) and the bounded language is a -// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward -// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark -// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under -// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a -// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", -// "none of the keys") does not suppress it. -const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns -// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it -// unmatchable inside contractions ("isn't"), so it never fired. -const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; -const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; -const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; -// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch -// above handles "should not "; the past-participle impact words are a distinct surface that the -// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. -// Kept as a separate small pattern so the split-identity property test below pins every alternative. -const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; -const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; -const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; -const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; -const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; -const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; -const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; - -function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT_A.test(clause) - || DISMISSAL_NEGATED_DEFECT_B.test(clause) - || DISMISSAL_SHOULD_NOT.test(clause) - || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) - || DISMISSAL_NOTHING.test(clause) - || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_NO_X.test(clause) - || DISMISSAL_CORRECTLY_A.test(clause) - || DISMISSAL_CORRECTLY_B.test(clause) - || DISMISSAL_LOOKS.test(clause) - || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); -} -function clauseIsPraiseOrDismissal(clause) { - return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); -} -function firstContrastIndex(lowerClause) { - let best = -1; - for (const word of CONTRAST_WORDS) { - const at = lowerClause.indexOf(word); - if (at !== -1 && (best === -1 || at < best)) best = at; - } - return best; -} - -function hasConcreteFinding(text) { - const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); - return clauses.some((clause) => { - if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; - // Contrast override: when the clause's praise/dismissal head is followed by an adversative - // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the - // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), - // so the head marker must not suppress it. - const advIdx = firstContrastIndex(clause.toLowerCase()); - if (advIdx !== -1) { - const tail = clause.slice(advIdx); - if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; - } - return !clauseIsPraiseOrDismissal(clause); - }); -} - function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1424,11 +1314,9 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview - && !conciseConcreteReview; + && !conciseTinyReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/scripts/lib/review-prompt.mjs b/scripts/lib/review-prompt.mjs index 41540ce0..683816fe 100644 --- a/scripts/lib/review-prompt.mjs +++ b/scripts/lib/review-prompt.mjs @@ -1274,116 +1274,6 @@ const TINY_SOURCE_MAX_FILES = 1; const TINY_SOURCE_MAX_BYTES = 512; const TINY_SOURCE_MAX_LINES = 5; -// A short review is substantive (not shallow) when SOME single clause names a -// concrete code locus AND describes a specific defect/change at it, and that -// clause is not a negation/absence/praise assertion. Requires co-location so a -// bare verdict ("Verdict: APPROVE", "Looks fine"), a vague claim ("something -// seems incorrect"), or a praise/absence LGTM ("correctly throws ... missing -// nothing") never qualifies. Defect-cue oriented: a terse APPROVE that only -// asserts correctness stays flagged (conservative — fail toward flagging). -// Split into three small sub-patterns (each under the regex-complexity cap); their union is the cue -// set. hasDefectCue() is the single entry point used everywhere CONCRETE_FINDING_DEFECT_CUE was. -const DEFECT_CUE_PHRASE = /\b(?:instead of|rather than|should (?:be|use|return|call|not)|fails to|does not (?:handle|close|await|free|release))\b/i; -const DEFECT_CUE_TERM = /\b(?:off-by-one|null deref|use-after-free|race condition|overflow|underflow|incorrect|returns? the wrong|wrong (?:order|sign|value|index))\b/i; -const DEFECT_CUE_VERB_A = /\b(?:subtracts?|adds? to|drops?|leaks?)\b/i; -const DEFECT_CUE_VERB_B = /\b(?:swallows?|throws?|never (?:called|awaited|closed))\b/i; -function hasDefectCue(clause) { - return DEFECT_CUE_PHRASE.test(clause) || DEFECT_CUE_TERM.test(clause) - || DEFECT_CUE_VERB_A.test(clause) || DEFECT_CUE_VERB_B.test(clause); -} -// Every quantifier here is UPPER-BOUNDED (no unbounded *,+ on a character class): -// these run on adversarial external-review text, so each must be provably linear-time -// (S5852 / ReDoS hardening). The bounds (path-prefix 255, filename 128, line# 9 digits, -// identifier 128, inter-token whitespace 16) sit far above any real path/identifier, so -// bounding only clips pathological >bound runs — it never changes a match on real review -// text (verified: 0 divergences on the realistic corpus) and the bounded language is a -// strict SUBSET of the unbounded one, so the detector still only narrows (fails toward -// flagging). Do NOT relax these back to *,+ without restoring the linear-time guarantee. -const CONCRETE_FINDING_CODE_LOCUS = [ - /(?" suffices to mark -// the cue as describing CORRECT behavior. DISMISSAL is split into small sub-patterns (each well under -// the regex-complexity cap) plus an includesAny LGTM list; a negation only dismisses when BOUND to a -// defect noun within two words ("no off-by-one"), so a bare negation in the finding ("never called", -// "none of the keys") does not suppress it. -const CONCRETE_FINDING_PRAISE = /\bas (?:expected|intended|designed|documented|planned|specified|promised|required|appropriate|advertised|warranted)\b/i; -// Each kept under the regex-complexity cap by splitting wide alternations across paired patterns -// (the union is identical). The bare "n['o]?t" negation was dropped: the leading \b makes it -// unmatchable inside contractions ("isn't"), so it never fired. -const DISMISSAL_NEGATED_DEFECT_A = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:issues?|problems?|bugs?|concerns?)\b/i; -const DISMISSAL_NEGATED_DEFECT_B = /\b(?:no|not|never|none|without)\b(?: \w+){0,2} (?:defects?|regressions?|blockers?|off-by-one)\b/i; -const DISMISSAL_SHOULD_NOT = /\bshould not (?:be (?:an? )?(?:problem|issue|concern|blocker|big deal)|cause|regress|matter|break|hurt|harm|affect)\b/i; -// PASSIVE/copular reassurance: "X should not be affected/impacted/touched/...". The active branch -// above handles "should not "; the past-participle impact words are a distinct surface that the -// a5c2868 regex split dropped ("be affected"), letting a hand-wave APPROVE pass as a concrete finding. -// Kept as a separate small pattern so the split-identity property test below pins every alternative. -const DISMISSAL_SHOULD_NOT_PASSIVE = /\bshould not be (?:affected|impacted|touched|altered|changed|disturbed|disrupted|noticeable|visible|a factor)\b/i; -const DISMISSAL_NOTHING = /\bnothing (?:wrong|concerning|of concern|problematic|improper|improperly|amiss|untoward|alarming|broken|to (?:flag|note|fix|report))\b/i; -const DISMISSAL_ABSENCE = /\bmissing nothing\b|\bdoes not appear\b/i; -const DISMISSAL_NO_X = /\bno (?:concerns?|issues?|problems?|blockers?|objections?)\b/i; -const DISMISSAL_CORRECTLY_A = /\bcorrectly (?:handles?|handled|throws?|works?|working|closes?|closed)\b/i; -const DISMISSAL_CORRECTLY_B = /\bcorrectly (?:returns?|catches?|caught|falls? back)\b/i; -const DISMISSAL_LOOKS = /\b(?:looks?|seems?|is|are|all) (?:fine|clean|good|correct|solid|right|reasonable|ok|okay|sensible|acceptable)\b/i; -const DISMISSAL_LGTM_PHRASES = ["lgtm", "ship it", "nicely done", "well done", "good work", "solid work", "looks solid", "that is acceptable", "that's acceptable"]; -const CONTRAST_WORDS = [" but ", " yet ", " however", " whereas ", " though ", " although ", " instead ", " nevertheless", " nonetheless"]; - -function clauseIsDismissal(clause) { - return DISMISSAL_NEGATED_DEFECT_A.test(clause) - || DISMISSAL_NEGATED_DEFECT_B.test(clause) - || DISMISSAL_SHOULD_NOT.test(clause) - || DISMISSAL_SHOULD_NOT_PASSIVE.test(clause) - || DISMISSAL_NOTHING.test(clause) - || DISMISSAL_ABSENCE.test(clause) - || DISMISSAL_NO_X.test(clause) - || DISMISSAL_CORRECTLY_A.test(clause) - || DISMISSAL_CORRECTLY_B.test(clause) - || DISMISSAL_LOOKS.test(clause) - || includesAny(clause.toLowerCase(), DISMISSAL_LGTM_PHRASES); -} -function clauseIsPraiseOrDismissal(clause) { - return CONCRETE_FINDING_PRAISE.test(clause) || clauseIsDismissal(clause); -} -function firstContrastIndex(lowerClause) { - let best = -1; - for (const word of CONTRAST_WORDS) { - const at = lowerClause.indexOf(word); - if (at !== -1 && (best === -1 || at < best)) best = at; - } - return best; -} - -function hasConcreteFinding(text) { - const value = String(text ?? ""); - const clauses = value.split(/[\n.;!?]+/); - return clauses.some((clause) => { - if (!hasDefectCue(clause)) return false; - if (!CONCRETE_FINDING_CODE_LOCUS.some((pattern) => pattern.test(clause))) return false; - // Contrast override: when the clause's praise/dismissal head is followed by an adversative - // whose TAIL carries its own independent defect cue and is not itself a dismissal/praise, the - // tail is a real finding ("works as expected, but the empty-input branch throws and crashes"), - // so the head marker must not suppress it. - const advIdx = firstContrastIndex(clause.toLowerCase()); - if (advIdx !== -1) { - const tail = clause.slice(advIdx); - if (hasDefectCue(tail) && !clauseIsPraiseOrDismissal(tail)) return true; - } - return !clauseIsPraiseOrDismissal(clause); - }); -} - function isTinySelectedSource(selectedSource) { const totals = selectedSource?.totals; return Number.isInteger(totals?.files) @@ -1424,11 +1314,9 @@ function qualityFlags({ && hasBlockingSection && hasNonBlockingSection && mentionsSelectedSourceInspection(lowerText, selectedSource); - const conciseConcreteReview = hasVerdictFlag && hasConcreteFinding(text); const looksShallow = text.trim().length > 0 && text.trim().length < 500 - && !conciseTinyReview - && !conciseConcreteReview; + && !conciseTinyReview; const isFinalReviewAttempt = !["approval_request", "preflight_failed", "queued", "running"].includes(status); const failureReasons = [...semanticFailureReasons(text, looksShallow, selectedSource)]; if (isFinalReviewAttempt && status === "completed" && !hasVerdictFlag) { diff --git a/tests/unit/companion-common.test.mjs b/tests/unit/companion-common.test.mjs index c135276f..e8bfd9b8 100644 --- a/tests/unit/companion-common.test.mjs +++ b/tests/unit/companion-common.test.mjs @@ -862,6 +862,8 @@ async function assertCopyHelperBranches(mod, plugin) { error_code: "scope_empty", error_message: "scope failed", error_summary: "copy scope failed", + http_status: 400, + suggested_action: "retry with explicit copy scope", external_review: { marker: "EXTERNAL REVIEW", provider: plugin, @@ -881,6 +883,8 @@ async function assertCopyHelperBranches(mod, plugin) { assert.match(lifecycleMarkdown, /\| Error \| scope_empty \|/); assert.match(lifecycleMarkdown, /\| Message \| scope failed \|/); assert.match(lifecycleMarkdown, /\| Summary \| copy scope failed \|/); + assert.match(lifecycleMarkdown, /\| HTTP \| 400 \|/); + assert.match(lifecycleMarkdown, /\| Action \| retry with explicit copy scope \|/); assert.equal(mod.parseLifecycleEventsMode(undefined), null); assert.equal(mod.parseLifecycleEventsMode(false), null); assert.equal(mod.parseLifecycleEventsMode("jsonl"), "jsonl"); diff --git a/tests/unit/review-prompt.test.mjs b/tests/unit/review-prompt.test.mjs index e7c27088..69ca87fd 100644 --- a/tests/unit/review-prompt.test.mjs +++ b/tests/unit/review-prompt.test.mjs @@ -4240,46 +4240,6 @@ test("root2 detector2: foreign-path gap WITHOUT proven selected-source inspectio // Detector 3 — shallow_output ------------------------------------------------ -test("root2 detector3: terse-but-concrete reviews do not flag shallow_output", async () => { - const cases = [ - { - selected: "src/cart.js", - result: "Verdict: REQUEST CHANGES. src/cart.js total() subtracts item.price instead of adding; the reduce should use sum + item.price. That is the only blocker.", - }, - { - selected: "parser.mjs", - result: "Request changes: parser.mjs scanDigits() has an off-by-one; the loop should use index <= len, not index < len.", - }, - { - selected: "utils.js", - result: "Verdict: REQUEST CHANGES. utils.js:42 slice() drops the last element; it should be slice(0, len) not slice(0, len-1).", - }, - ]; - for (const [name, file] of REVIEW_PROMPT_MODULES) { - const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - for (const { selected, result } of cases) { - const manifest = targetBuildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], - result, - status: "completed", - errorCode: null, - }); - assert.equal( - manifest.review_quality.looks_shallow, - false, - `[${name}] looks_shallow should be false for: ${result}`, - ); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), - false, - `[${name}] shallow_output should be absent for: ${result}`, - ); - assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] failed_review_slot should be false for: ${result}`); - } - } -}); - test("root2 detector3: bare-LGTM with no verdict still flags shallow_output", async () => { // Also yields missing_verdict (Root-3-owned); assert only shallow_output here. for (const [name, file] of REVIEW_PROMPT_MODULES) { @@ -4346,8 +4306,7 @@ test("root2 detector3: defect-flavored words with no code locus still flag shall }); test("root2 detector3: praise/absence clauses do not count as concrete findings (still flags)", async () => { - // ADVERSARIAL GUARD REGRESSION: every cue sits in a negated/praise clause -> - // hasConcreteFinding false -> looks_shallow true. + // Short non-tiny reviews stay shallow regardless of praise/absence wording. for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); const manifest = targetBuildReviewAuditManifest({ @@ -4364,7 +4323,7 @@ test("root2 detector3: praise/absence clauses do not count as concrete findings }); test("root2 detector3: negated-finding variant does not count as concrete (still flags)", async () => { - // ADVERSARIAL GUARD REGRESSION: negated-finding clauses -> stays flagged. + // Short non-tiny reviews stay shallow regardless of negated-finding wording. for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); const manifest = targetBuildReviewAuditManifest({ @@ -4380,121 +4339,59 @@ test("root2 detector3: negated-finding variant does not count as concrete (still } }); -test("root2 detector3: bounded code-locus regexes stay linear-time on adversarial input (ReDoS S5852 guard)", async () => { - // CONCRETE_FINDING_CODE_LOCUS runs on adversarial external-review text. After bounding every - // quantifier (the S5852 fix), all three locus regexes must stay linear-time. This input is a - // defect-cue-bearing clause (so hasConcreteFinding evaluates every locus regex) followed by a - // 200k-char pathological run with no terminating dot/paren — forcing each regex to scan to the - // end without matching. A backtracking regression (re-introducing an unbounded *,+) would blow - // this from ~10ms to seconds+; the generous 2000ms budget catches that without CI flake. - const adversarialReview = "Verdict: REQUEST_CHANGES\nthe handler throws " + "a".repeat(200000); +test("root2 detector3: short concrete non-tiny review still flags shallow_output", async () => { for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - const start = process.hrtime.bigint(); const manifest = targetBuildReviewAuditManifest({ prompt: "rendered prompt", - sourceFiles: [{ path: "sample.js", text: "export const value = 1;\n" }], - result: adversarialReview, + sourceFiles: [ + { path: "src/cart.js", text: "export function total(items) {\n return items.length;\n}\n" }, + { path: "src/tax.js", text: "export const tax = 0;\n" }, + ], + result: "Verdict: REQUEST CHANGES. src/cart.js total() has an off-by-one and should use the item price.", status: "completed", errorCode: null, }); - const elapsedMs = Number(process.hrtime.bigint() - start) / 1e6; - assert.ok( - elapsedMs < 2000, - `[${name}] buildReviewAuditManifest took ${elapsedMs.toFixed(1)}ms on a 200k-char adversarial review; ` + - "the bounded locus regexes must be linear (<2000ms). A super-linear regression likely re-introduced an unbounded quantifier.", - ); - // A 200k-char review is long, so it is never "shallow" regardless of locus matching. - assert.equal(manifest.review_quality.looks_shallow, false, `[${name}] a 200k-char review must not be flagged shallow`); + assert.equal(manifest.review_quality.looks_shallow, true, `[${name}] short non-tiny concrete review should be shallow`); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true, `[${name}] shallow_output should be present`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true`); } }); -test("root2 detector3: long-but-realistic call loci still escape the shallow flag after bounding", async () => { - // EQUIVALENCE ANCHOR: the identifier bound ({0,128}) sits far above any real identifier, so a - // concise review whose only concrete finding cites a long-but-realistic function call must still - // be recognized (looks_shallow=false). Guards against tightening the bound far enough to clip - // real loci and re-introduce the Root-2 false positive. (Only the call locus is asserted here: - // hasConcreteFinding splits clauses on ".", so path/member loci are evaluated on dot-free clauses - // — that pre-existing reachability gap is tracked separately, not relied on by this fix.) - const cases = [ - { - selected: "src/services/auth/scheduler.js", - result: "Verdict: REQUEST CHANGES. The function validateAndRefreshAuthToken() returns the wrong expiry instead of the computed deadline", - }, - { - selected: "lib/persistence/pool.js", - result: "Request changes: acquireConnectionWithRetry() leaks the socket and the cleanup path swallows the close error", - }, - { - selected: "webhooks.js", - result: "Verdict: REQUEST CHANGES. processIncomingWebhookPayload() drops the signature header instead of validating it first", - }, - ]; +test("root2 detector3: structured tiny-source short review still counts", async () => { for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - for (const { selected, result } of cases) { - const manifest = targetBuildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], - result, - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.looks_shallow, false, `[${name}] looks_shallow should be false for: ${result}`); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), - false, - `[${name}] shallow_output should be absent for: ${result}`, - ); - assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] failed_review_slot should be false for: ${result}`); - } + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "README.md", text: "# E2E\n" }], + result: [ + "Verdict: APPROVE.", + "Blocking findings: No blocking findings apply to README.md.", + "Non-blocking concerns: None for README.md.", + "Inspection statement: I inspected README.md.", + ].join("\n"), + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, false, `[${name}] structured tiny-source review should not be shallow`); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), false, `[${name}] shallow_output should be absent`); + assert.equal(manifest.review_quality.failed_review_slot, false, `[${name}] failed_review_slot should be false`); } }); -test("root2 detector3: negation-bearing defect cues do not mis-flag concrete reviews as shallow (PR #237 comment 2)", async () => { - // Regression: valid defect cues that contain negation words ("never called", "should not", - // "does not free") must not trip CONCRETE_FINDING_NEGATION when they carry a real call locus. - // Guard: a clause that strips to a GENUINE negation/absence must STAY flagged (no over-rescue). - const NOT_SHALLOW = [ - { selected: "socket.js", result: "Verdict: REQUEST CHANGES. The socket close() is never called on the error path" }, - { selected: "validator.js", result: "Verdict: REQUEST CHANGES. validateInput() should not return early on empty arrays" }, - { selected: "pool.js", result: "Verdict: REQUEST CHANGES. acquire() does not free the slot when the request times out" }, - ]; - const STILL_SHALLOW = [ - { selected: "socket.js", result: "Verdict: APPROVE. close() is never called but that is no real problem here" }, - { selected: "parser.js", result: "Verdict: APPROVE\nThe parseConfig() function correctly throws on bad input and the schema is missing nothing important." }, - ]; +test("root2 detector3: tiny concrete unstructured review still flags shallow_output", async () => { for (const [name, file] of REVIEW_PROMPT_MODULES) { const { buildReviewAuditManifest: targetBuildReviewAuditManifest } = await loadReviewPromptModule(file); - for (const { selected, result } of NOT_SHALLOW) { - const manifest = targetBuildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], - result, - status: "completed", - errorCode: null, - }); - assert.equal(manifest.review_quality.looks_shallow, false, `[${name}] looks_shallow should be false for: ${result}`); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), - false, - `[${name}] shallow_output should be absent for: ${result}`, - ); - } - for (const { selected, result } of STILL_SHALLOW) { - const manifest = targetBuildReviewAuditManifest({ - prompt: "rendered prompt", - sourceFiles: [{ path: selected, text: "export const value = 1;\n" }], - result, - status: "completed", - errorCode: null, - }); - assert.equal( - manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), - true, - `[${name}] shallow_output must STAY present (genuine negation/absence) for: ${result}`, - ); - } + const manifest = targetBuildReviewAuditManifest({ + prompt: "rendered prompt", + sourceFiles: [{ path: "src/tiny.js", text: "export const value = 1;\n" }], + result: "Verdict: REQUEST CHANGES. src/tiny.js value() has an off-by-one and should return the next value.", + status: "completed", + errorCode: null, + }); + assert.equal(manifest.review_quality.looks_shallow, true, `[${name}] unstructured tiny concrete review should be shallow`); + assert.equal(manifest.review_quality.semantic_failure_reasons.includes("shallow_output"), true, `[${name}] shallow_output should be present`); + assert.equal(manifest.review_quality.failed_review_slot, true, `[${name}] failed_review_slot should be true`); } }); @@ -4531,93 +4428,6 @@ test("root2 detector1: reviewer-process blocks (contraction/paraphrase/unicode) } }); -test("root2 detector3: praise/confirmation reusing defect vocabulary flags shallow_output; real findings stay clean", async () => { - // Reviewer/sweep-found false-negatives (decidable subset): "should not ", - // " as expected/promised", LGTM, and the passive "should not be affected" family (F2, below). - // The positive-sentiment praise subclass ("throws sensibly", "drops cleanly", "throws a helpful - // error") is SURFACE-UNDECIDABLE — it is token-identical to a real finding ("cleanly drops the - // final page"), so a keyword classifier cannot separate them without flagging correct reviews; - // a 76-case adversarial sweep disproved the lexicon approach (14 FPs + unbounded synonym leaks). - // That subclass, and the off-lexicon dismissal tail ("harmless"/"benign"/"no real risk"), are - // tracked in #236/#238 for the Way-2 advisory-disposition redesign, not patched by enumeration. - const FLAG = [ - "Verdict: APPROVE\nfoo() should not be a problem", - "Verdict: APPROVE\nparseConfig() should not cause issues", - "Verdict: APPROVE\ncache.get() should not regress", - "Verdict: APPROVE. parseConfig() throws on bad input as expected.", - "Verdict: APPROVE\nThe close() handler throws as promised on bad input.", - ]; - // Genuine concise findings — including the negation-strip-leftover FP cases — must STAY clean. - const CLEAN = [ - "Verdict: REQUEST CHANGES. socket close() is never called", - "Verdict: REQUEST CHANGES. acquire() does not free the slot", - "Verdict: REQUEST CHANGES. validateInput() should not return early", - "Verdict: REQUEST CHANGES. parseInt() returns the wrong index", - "Verdict: REQUEST_CHANGES\nindexInto() returns the wrong value because no bounds check guards the array access.", - "Verdict: REQUEST_CHANGES\nlookup() returns the wrong index when none of the keys match, instead of throwing.", - "Verdict: REQUEST_CHANGES\nthe happy path of encode() works as expected, but the empty-input branch throws and crashes.", - ]; - for (const [name, file] of REVIEW_PROMPT_MODULES) { - const { buildReviewAuditManifest: target } = await loadReviewPromptModule(file); - for (const result of FLAG) { - const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: "export const value = 1;\n" }], result, status: "completed", errorCode: null }); - assert.equal(m.review_quality.semantic_failure_reasons.includes("shallow_output"), true, `[${name}] shallow_output should be present for: ${result}`); - } - for (const result of CLEAN) { - const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: "export const value = 1;\n" }], result, status: "completed", errorCode: null }); - assert.equal(m.review_quality.looks_shallow, false, `[${name}] real finding must stay clean for: ${result}`); - } - } -}); - -// F2 + split-identity. The a5c2868 "behavior-identical" regex split silently dropped the passive -// "should not be affected" dismissal alternative, so a hand-wave APPROVE passed as a concrete -// finding (UNSAFE). This test pins the passive family AND enumerates a canonical phrase for every -// pre-split DISMISSAL_SHOULD_NOT alternative, so a future split cannot silently narrow a dismissal -// again (the root cause that let F2 ship). Each phrase co-locates a "should not" cue with a code -// locus, so the dismissal regex is the load-bearing classifier. -test("root2 detector3 F2: passive + every pre-split should-not dismissal flags shallow_output (split-identity)", async () => { - const SHOULD_NOT_CANON = [ - // F2 passive reassurance — EVERY impact participle in IMPACT_REASSURANCE_NEG (all 9, was 5/10). - "should not be affected", - "should not be impacted", - "should not be touched", - "should not be altered", - "should not be disturbed", - "should not be changed", - "should not be disrupted", - "should not be noticeable", - "should not be visible", - "should not be a factor", - // pre-split copular reassurance - "should not be a problem", - "should not be an issue", - "should not be a concern", - "should not be a blocker", - "should not be a big deal", - // pre-split active branches - "should not cause problems", - "should not cause trouble", - "should not create problems", - "should not introduce a regression", - "should not regress", - "should not matter", - "should not break", - "should not hurt", - "should not harm", - "should not affect anything", - "should not affect the output", - ]; - for (const [name, file] of REVIEW_PROMPT_MODULES) { - const { buildReviewAuditManifest: target } = await loadReviewPromptModule(file); - for (const tail of SHOULD_NOT_CANON) { - const result = `Verdict: APPROVE\nfoo() ${tail}`; - const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: "export const value = 1;\n" }], result, status: "completed", errorCode: null }); - assert.equal(m.review_quality.semantic_failure_reasons.includes("shallow_output"), true, `[${name}] should-not dismissal must flag shallow_output: ${tail}`); - } - } -}); - // --- PR #237 review round 2 (GLM/GPT/Claude): fixes verified through buildReviewAuditManifest --- // B1 revert reproductions. The a2a7be1 permission-praise suppressor let a GENUINELY blocked APPROVE @@ -4650,45 +4460,6 @@ test("root2 detector1: genuinely-blocked APPROVE with EACCES handling-praise fla } }); -// hasDefectCue four-way split oracle: each DEFECT_CUE_* alternative, used ALONE next to a code locus, -// must register a concrete finding (looks_shallow:false). If a future "behavior-identical" split drops -// any alternative, the corresponding case flips to shallow and this fails (the MAJOR test-gap closed). -test("root2 detector3: hasDefectCue split-identity — every defect-cue alternative escapes shallow alone", async () => { - const CUES = [ - "uses a global instead of the injected client", - "mutates the array rather than copying it", - "should return the count", - "fails to close the handle", - "does not free the slot", - "has an off-by-one", - "has a null deref", - "has a use-after-free", - "has a race condition", - "can overflow", - "can underflow", - "is incorrect", - "returns the wrong index", - "uses the wrong order", - "subtracts one too many", - "adds to the wrong bucket", - "drops the last element", - "leaks the buffer", - "swallows the error", - "throws on empty input", - "is never called", - "is never awaited", - "is never closed", - ]; - for (const [name, file] of REVIEW_PROMPT_MODULES) { - const { buildReviewAuditManifest: target } = await loadReviewPromptModule(file); - for (const cue of CUES) { - const result = `Verdict: REQUEST CHANGES\nnextPage() ${cue}.`; - const m = target({ prompt: "p", sourceFiles: [{ path: "x.js", text: "export const value = 1;\n" }], result, status: "completed", errorCode: null }); - assert.equal(m.review_quality.looks_shallow, false, `[${name}] defect cue must register a concrete finding: ${cue}`); - } - } -}); - // #238: ground-truth disposition guard -- a clean, substantive approval reached on a SOURCE-BEARING // review whose source send was BLOCKED (source_send_allowed=false) must not count as a satisfied // slot; it is demoted to failed_slot / source_not_sent. Legit diff-only (not source-bearing) and