From 153ca6156c93e027592710b0b7c7f60029070cc5 Mon Sep 17 00:00:00 2001 From: tcsenpai Date: Fri, 29 May 2026 14:59:54 +0200 Subject: [PATCH 1/7] =?UTF-8?q?fix(audit-sweep):=20batch=20B=20=E2=80=94?= =?UTF-8?q?=20sync/consensus=20process.exit,=20fee-sum=20assertion,=20PROD?= =?UTF-8?q?=20gas-balance=20guard?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three audit-sweep follow-ups from the 2026-05-28 bug-hunt that were deferred from batch A because they touch the consensus path. All are small, targeted fixes; the larger design-required items (assignNonce stub, vote race, GCREdit stubs) remain deferred. Sync.ts: - Genesis-hash mismatch (line 282) now throws instead of exit(1); the outer fastSync() has try/finally that resets state, and main().catch routes to gracefulShutdown. - Exhausted-peers branch (line 305) returns false so the existing boolean caller path applies, instead of taking the chain down. - fastSyncRoutine "Last block not coherent" check (line 867) now throws with a descriptive message instead of exit(1). - verifyLastBlockIntegrity now has an explicit Promise return type (was implicit). secretaryManager.ts: - receiveGreenLight unexpected-greenlight path (line 682) now logs at error level and returns false (function already returns boolean everywhere else), instead of killing the node mid-consensus round. - endConsensusRoutine hanging-greenlight cleanup (line 887) now logs the failure and continues with pre-held cleanup instead of exit(1); the catch was already isolating the wait so cleanup can proceed. - simulateSecretaryGoingOffline / simulateNormalNodeGoingOffline keep their process.exit(0) because that IS their documented purpose (chaos-test helpers), but now carry a WARNING block explaining the deliberate bypass of graceful shutdown so a future reader does not wire them into production without a gating flag. applyGasFeeSeparation.ts: - Add post-calculation assertion that network_fee + rpc_fee + additional_fee === total. Any rounding bug or config drift in calculateFeeBreakdown is now caught at the RPC boundary as a validation failure instead of surfacing later as a validator-side consensus disagreement. validateTransaction.ts: - Drop the `&& getSharedState.PROD` guard from the sender gas-balance check (line 304). The previous gate let non-prod nodes accept zero-balance transactions, which made devnet/staging diverge from PROD validation semantics. Devnet now uses a funded-genesis fixture, so unfunded broadcasts are no longer needed for local testing. --- src/libs/blockchain/routines/Sync.ts | 12 ++++-- .../routines/applyGasFeeSeparation.ts | 12 ++++++ .../routines/validateTransaction.ts | 9 ++++- .../consensus/v2/types/secretaryManager.ts | 40 +++++++++++++------ 4 files changed, 55 insertions(+), 18 deletions(-) diff --git a/src/libs/blockchain/routines/Sync.ts b/src/libs/blockchain/routines/Sync.ts index d0daa183..9b70abaf 100644 --- a/src/libs/blockchain/routines/Sync.ts +++ b/src/libs/blockchain/routines/Sync.ts @@ -256,7 +256,7 @@ async function verifyLastBlockIntegrity( peer: Peer, ourLastBlockNumber: number, ourLastBlockHash: string, -) { +): Promise { const ourGenesisHash = await Chain.getGenesisBlockHash() const seenPeers = new Set() let currentPeer: Peer | null = peer @@ -279,7 +279,9 @@ async function verifyLastBlockIntegrity( log.error("[fastSync] Genesis hash is not coherent") log.error(`[fastSync] Our hash: ${ourGenesisHash}`) log.error(`[fastSync] Peer hash: ${genesisBlock.hash}`) - process.exit(1) + throw new Error( + `[fastSync] Genesis hash mismatch with peer ${currentPeer.identity}: ours=${ourGenesisHash} peer=${genesisBlock.hash}`, + ) } // Verify if the last block hash is coherent @@ -302,7 +304,7 @@ async function verifyLastBlockIntegrity( log.error( "[fastSync] Exhausted all peers, could not verify last block integrity", ) - process.exit(1) + return false } /** @@ -864,7 +866,9 @@ async function fastSyncRoutine(peers: Peer[] = []) { if (!verified) { log.error("[fastSync] Last block is not coherent") - process.exit(1) + throw new Error( + "[fastSync] Last block integrity check failed — node refusing to sync against incoherent chain", + ) } } diff --git a/src/libs/blockchain/routines/applyGasFeeSeparation.ts b/src/libs/blockchain/routines/applyGasFeeSeparation.ts index 27a410b7..9bb417c1 100644 --- a/src/libs/blockchain/routines/applyGasFeeSeparation.ts +++ b/src/libs/blockchain/routines/applyGasFeeSeparation.ts @@ -117,6 +117,18 @@ export async function applyGasFeeSeparation( } } + // Audit-sweep batch B: assert components sum to total so any rounding + // bug or config drift in calculateFeeBreakdown is caught here instead + // of as a validator-side consensus disagreement. + const componentSum = + breakdown.network_fee + breakdown.rpc_fee + breakdown.additional_fee + if (componentSum !== breakdown.total) { + return { + ok: false, + message: `calculateFeeBreakdown components do not sum to total: network_fee=${breakdown.network_fee} + rpc_fee=${breakdown.rpc_fee} + additional_fee=${breakdown.additional_fee} = ${componentSum}, expected ${breakdown.total}`, + } + } + // Stamp the transaction with the per-component values + this // node's pubkey as the rpc_address. Peers receiving the signed // ValidityData rely on these fields being present. diff --git a/src/libs/blockchain/routines/validateTransaction.ts b/src/libs/blockchain/routines/validateTransaction.ts index 2506d2b6..dd143af0 100644 --- a/src/libs/blockchain/routines/validateTransaction.ts +++ b/src/libs/blockchain/routines/validateTransaction.ts @@ -300,8 +300,13 @@ async function defineGas( "non-negative integers in the active denomination.", ) } - // FIXME Overriding for testing - if (fromBalance < BigInt(compositeFeeAmount) && getSharedState.PROD) { + // Audit-sweep batch B: dropped the `&& getSharedState.PROD` guard so the + // balance check is enforced in every environment. The previous PROD-only + // gate let non-prod nodes accept zero-balance transactions, which made + // devnet/staging diverge from PROD validation semantics. Devnet now uses + // a funded-genesis fixture, so unfunded broadcasts are no longer needed + // for local testing. + if (fromBalance < BigInt(compositeFeeAmount)) { log.error( "TX", "[Native Tx Validation] [BALANCE ERROR] Insufficient balance for gas; required: " + diff --git a/src/libs/consensus/v2/types/secretaryManager.ts b/src/libs/consensus/v2/types/secretaryManager.ts index db9e0e84..f3e08d45 100644 --- a/src/libs/consensus/v2/types/secretaryManager.ts +++ b/src/libs/consensus/v2/types/secretaryManager.ts @@ -381,8 +381,14 @@ export default class SecretaryManager { } /** - * Simulates the secretary going offline. - * If we're forging block x = 5, kill the node if it's the secretary + * Chaos-test helper. Currently unused in production paths. + * + * WARNING: process.exit(0) below is intentional — this method exists to + * simulate an abrupt secretary crash for resilience testing and deliberately + * bypasses graceful shutdown. Do not wire into production routines without + * an explicit gating flag. + * + * If we're forging block x = 10, kill the node if it is the secretary. */ public async simulateSecretaryGoingOffline() { const weAreForgingBlock = this.shard.blockRef === 10 @@ -396,9 +402,14 @@ export default class SecretaryManager { } /** - * Simulates a normal node going offline. + * Chaos-test helper. Currently unused in production paths. + * + * WARNING: process.exit(0) below is intentional — this method exists to + * simulate an abrupt normal-node crash for resilience testing and + * deliberately bypasses graceful shutdown. Do not wire into production + * routines without an explicit gating flag. * - * If we're forging block x = 5, kill normal this node if it's not the secretary + * If we're forging block x = 5, kill the node if it is not the secretary. */ public async simulateNormalNodeGoingOffline() { const weAreForgingBlock10 = this.shard.blockRef === 5 @@ -674,12 +685,18 @@ export default class SecretaryManager { return true } - log.debug("We don't know what to do with this green light") - log.debug(`Validator phase: ${validatorPhase}`) - log.debug(`Our phase: ${this.ourValidatorPhase.currentPhase}`) - log.debug(`Secretary block timestamp: ${secretaryBlockTimestamp}`) - log.debug(`Block timestamp: ${this.blockTimestamp}`) - process.exit(1) + log.error( + "[SECRETARY] Received an unexpected green light — ignoring to preserve consensus round", + ) + log.error(`[SECRETARY] Validator phase: ${validatorPhase}`) + log.error( + `[SECRETARY] Our phase: ${this.ourValidatorPhase.currentPhase}`, + ) + log.error( + `[SECRETARY] Secretary block timestamp: ${secretaryBlockTimestamp}`, + ) + log.error(`[SECRETARY] Block timestamp: ${this.blockTimestamp}`) + return false } /** @@ -882,9 +899,8 @@ export default class SecretaryManager { await Promise.all(waiters) } catch (error) { log.error( - `[SECRETARY] Error waiting for hanging greenlights: ${error}`, + `[SECRETARY] Error waiting for hanging greenlights: ${error instanceof Error ? error.message : String(error)} — continuing with pre-held cleanup`, ) - process.exit(1) } Waiter.preHeld From fec8c69ff630d0ed3af6388887bd1e6736c06321 Mon Sep 17 00:00:00 2001 From: tcsenpai Date: Fri, 29 May 2026 15:22:50 +0200 Subject: [PATCH 2/7] fix(audit-sweep): drop PROD-only balance guard in applyGasFeeSeparation (greploop iter 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Greptile review of PR #883 flagged a validation split: this PR dropped the `&& getSharedState.PROD` guard from `validateTransaction.defineGas` so the gas-balance check is enforced in every environment, but the parallel guard in `applyGasFeeSeparation` at line 146 was left intact with a stale comment that explicitly claimed it matched the legacy defineGas behaviour — which is no longer true after the defineGas change. Drops the `if (getSharedState.PROD)` wrapper around the balance check in applyGasFeeSeparation as well, so both gas-balance paths enforce the same semantics in every environment. Comment updated to match. `getSharedState` import retained — still consumed by `signingAlgorithm` lookup on line 136. --- .../routines/applyGasFeeSeparation.ts | 39 ++++++++++--------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/libs/blockchain/routines/applyGasFeeSeparation.ts b/src/libs/blockchain/routines/applyGasFeeSeparation.ts index 9bb417c1..288f00ff 100644 --- a/src/libs/blockchain/routines/applyGasFeeSeparation.ts +++ b/src/libs/blockchain/routines/applyGasFeeSeparation.ts @@ -141,25 +141,28 @@ export async function applyGasFeeSeparation( tx.content.transaction_fee.additional_fee = breakdown.additional_fee tx.content.transaction_fee.rpc_address = rpcAddressHex - // Sender balance check — only enforced in PROD (matches the legacy - // defineGas behavior so non-prod testing can submit unfunded txs). - if (getSharedState.PROD) { - let senderBalance: bigint - try { - senderBalance = await GCR.getAccountBalance(senderAddress) - } catch (e) { - return { - ok: false, - message: `failed to read sender balance: ${ - e instanceof Error ? e.message : stringifyNonError(e) - }`, - } + // Audit-sweep batch B: balance check is now enforced in every + // environment. The previous PROD-only gate (paired with the same + // gate in validateTransaction.defineGas, also dropped in this + // batch) let non-prod nodes accept unfunded transactions, which + // made devnet/staging diverge from PROD validation semantics. + // Devnet uses a funded-genesis fixture, so unfunded broadcasts + // are no longer needed for local testing. + let senderBalance: bigint + try { + senderBalance = await GCR.getAccountBalance(senderAddress) + } catch (e) { + return { + ok: false, + message: `failed to read sender balance: ${ + e instanceof Error ? e.message : stringifyNonError(e) + }`, } - if (senderBalance < BigInt(breakdown.total)) { - return { - ok: false, - message: `sender balance ${senderBalance.toString()} < total fee ${breakdown.total}`, - } + } + if (senderBalance < BigInt(breakdown.total)) { + return { + ok: false, + message: `sender balance ${senderBalance.toString()} < total fee ${breakdown.total}`, } } From f02f12313127f8ee44d6228b1bb4f96aa03762bb Mon Sep 17 00:00:00 2001 From: tcsenpai Date: Fri, 29 May 2026 15:29:15 +0200 Subject: [PATCH 3/7] fix(audit-sweep): genesis-mismatch retries next peer, refresh stale PROD docstring (greploop iter 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Greptile review of PR #883 caught a real correctness regression in iter 1 plus a stale comment: P1 — Sync.ts:278-285 (genesis-mismatch was breaking the peer-iteration loop) verifyLastBlockIntegrity is designed as a multi-peer iteration loop: every other failure branch (unavailable genesis block, unavailable last block) logs + advances `currentPeer = findNextAvailablePeer( seenPeers)` and `continue`s. The previous batch-B fix replaced `process.exit(1)` with `throw`, which abandoned all remaining candidates after a single bad peer. In a network where one peer is misconfigured or forked but the rest are healthy, the node would trigger graceful shutdown instead of syncing from a good peer. Fix: align with the unavailable-block branches — log the mismatch, advance to the next peer via the same helper. If every peer has a mismatched genesis, the loop exhausts and falls through to the existing `return false` at the bottom of the function, which triggers the descriptive throw at the fastSyncRoutine call site added in the original batch B. Stale docstring — applyGasFeeSeparation.ts (file header, step 3) The "(PROD only) Reads the sender's GCR balance ..." line in the file-level JSDoc was already updated for the runtime change in the iter-1 commit but the header docstring still claimed PROD-only. Refreshed to describe the new "enforced in every environment" semantic and reference batch B as the change point. --- src/libs/blockchain/routines/Sync.ts | 9 ++++----- src/libs/blockchain/routines/applyGasFeeSeparation.ts | 6 ++++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/libs/blockchain/routines/Sync.ts b/src/libs/blockchain/routines/Sync.ts index 9b70abaf..b8c17b36 100644 --- a/src/libs/blockchain/routines/Sync.ts +++ b/src/libs/blockchain/routines/Sync.ts @@ -276,12 +276,11 @@ async function verifyLastBlockIntegrity( } if (genesisBlock.hash !== ourGenesisHash) { - log.error("[fastSync] Genesis hash is not coherent") - log.error(`[fastSync] Our hash: ${ourGenesisHash}`) - log.error(`[fastSync] Peer hash: ${genesisBlock.hash}`) - throw new Error( - `[fastSync] Genesis hash mismatch with peer ${currentPeer.identity}: ours=${ourGenesisHash} peer=${genesisBlock.hash}`, + log.error( + `[fastSync] Genesis hash mismatch with peer ${currentPeer.identity}: ours=${ourGenesisHash} peer=${genesisBlock.hash}, trying next peer`, ) + currentPeer = findNextAvailablePeer(seenPeers) + continue } // Verify if the last block hash is coherent diff --git a/src/libs/blockchain/routines/applyGasFeeSeparation.ts b/src/libs/blockchain/routines/applyGasFeeSeparation.ts index 288f00ff..55f2bebc 100644 --- a/src/libs/blockchain/routines/applyGasFeeSeparation.ts +++ b/src/libs/blockchain/routines/applyGasFeeSeparation.ts @@ -22,8 +22,10 @@ KyneSys Labs: https://www.kynesys.xyz/ * additional_fee, rpc_address}` with the breakdown values + this * node's signing pubkey. Peers verifying the signed ValidityData * rely on those fields being present. - * 3. (PROD only) Reads the sender's GCR balance and rejects if it is - * below the total fee. + * 3. Reads the sender's GCR balance and rejects if it is below the + * total fee. Enforced in every environment as of audit-sweep + * batch B (the previous PROD-only gate let non-prod nodes accept + * unfunded transactions, which made devnet diverge from PROD). * 4. Generates the fee-distribution GCREdits via * {@link generateFeeDistributionEdits} and prepends them onto * `tx.content.gcr_edits` so the fee deductions apply before any From 6d5f4ed9ea2f7d45fa7dfdb76edff3f734aabe45 Mon Sep 17 00:00:00 2001 From: tcsenpai Date: Fri, 29 May 2026 15:35:17 +0200 Subject: [PATCH 4/7] fix(audit-sweep): replace tautological fee-sum assertion with per-component validation (greploop iter 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Greptile review of PR #883 caught that the fee-sum assertion added in the original batch B commit was tautological. calculateFeeBreakdown computes: total: network_fee + rpc_fee + additional_fee from the same three local variables it returns as components. Asserting that components-sum === total recomputes the same arithmetic from the same returned values and can never fail — it offers zero protection against the failure modes the comment claimed it would catch ("rounding bug or config drift"). The actual failure surface is each component going non-numeric or out-of-range via the `scalar * surge` multiplication in calculateFeeBreakdown: const network_fee = getSharedState.networkFee * surge const rpc_fee = getSharedState.rpcFee * surge const additional_fee = getSharedState.additionalFee * surge A misconfigured scalar (negative governance proposal, accidental float coefficient, NaN from a broken dynamicSurgeMultiplier) produces one or more bad components which propagate into tx.content.transaction_fee and the fee-distribution edits, and finally surface as a validator-side consensus disagreement. Replace the tautology with per-component finite/integer/non-negative validation across all four fields (network_fee, rpc_fee, additional_fee, total). The first invalid component triggers an actionable error message naming the bad field and including the full breakdown for diagnosis, instead of the tx being silently accepted at the RPC boundary and rejected later by other validators. --- .../routines/applyGasFeeSeparation.ts | 54 ++++++++++++------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/src/libs/blockchain/routines/applyGasFeeSeparation.ts b/src/libs/blockchain/routines/applyGasFeeSeparation.ts index 55f2bebc..8d618c88 100644 --- a/src/libs/blockchain/routines/applyGasFeeSeparation.ts +++ b/src/libs/blockchain/routines/applyGasFeeSeparation.ts @@ -46,7 +46,10 @@ KyneSys Labs: https://www.kynesys.xyz/ import { GCREdit } from "@kynesyslabs/demosdk/types" import type { Transaction as ITransaction } from "@kynesyslabs/demosdk/types" import { ucrypto, uint8ArrayToHex } from "@kynesyslabs/demosdk/encryption" -import { calculateFeeBreakdown } from "@/libs/blockchain/routines/calculateCurrentGas" +import { + calculateFeeBreakdown, + type FeeBreakdown, +} from "@/libs/blockchain/routines/calculateCurrentGas" import { generateFeeDistributionEdits } from "@/libs/blockchain/gcr/gcr_routines/feeDistribution" import GCR from "@/libs/blockchain/gcr/gcr" import { forgeToHex } from "@/libs/crypto/forgeUtils" @@ -108,26 +111,37 @@ export async function applyGasFeeSeparation( // Compute per-component breakdown. const breakdown = await calculateFeeBreakdown(tx) - if ( - !Number.isFinite(breakdown.total) || - !Number.isInteger(breakdown.total) || - breakdown.total < 0 - ) { - return { - ok: false, - message: `calculateFeeBreakdown returned non-integer total: ${breakdown.total}`, - } - } - // Audit-sweep batch B: assert components sum to total so any rounding - // bug or config drift in calculateFeeBreakdown is caught here instead - // of as a validator-side consensus disagreement. - const componentSum = - breakdown.network_fee + breakdown.rpc_fee + breakdown.additional_fee - if (componentSum !== breakdown.total) { - return { - ok: false, - message: `calculateFeeBreakdown components do not sum to total: network_fee=${breakdown.network_fee} + rpc_fee=${breakdown.rpc_fee} + additional_fee=${breakdown.additional_fee} = ${componentSum}, expected ${breakdown.total}`, + // Audit-sweep batch B: validate every fee component independently. + // calculateFeeBreakdown derives `total` as the direct sum of the + // three component locals, so asserting `total` alone, or asserting + // components-sum === total, is tautological with the current + // implementation. The real failure surface is each component + // becoming NaN / Infinity / negative / fractional via the + // `scalar * surge` multiplication: a misconfigured scalar + // (negative governance proposal, accidental float coefficient) or + // a broken `dynamicSurgeMultiplier` will produce one or more bad + // components, which then propagate into `tx.content.transaction_fee` + // and the fee-distribution edits and finally surface as + // validator-side consensus disagreement. Validate each component + // here so the tx is rejected at the RPC boundary with an + // actionable per-component message instead. + const components: Array<[keyof FeeBreakdown, number]> = [ + ["network_fee", breakdown.network_fee], + ["rpc_fee", breakdown.rpc_fee], + ["additional_fee", breakdown.additional_fee], + ["total", breakdown.total], + ] + for (const [name, value] of components) { + if ( + !Number.isFinite(value) || + !Number.isInteger(value) || + value < 0 + ) { + return { + ok: false, + message: `calculateFeeBreakdown produced an invalid ${name}: ${value} (must be a non-negative integer; full breakdown: network_fee=${breakdown.network_fee}, rpc_fee=${breakdown.rpc_fee}, additional_fee=${breakdown.additional_fee}, total=${breakdown.total})`, + } } } From 51f30d2486e4ecebba0440807d98bd1fbe1cf0e1 Mon Sep 17 00:00:00 2001 From: tcsenpai Date: Fri, 29 May 2026 15:40:31 +0200 Subject: [PATCH 5/7] fix(audit-sweep): remove process.exit in sendGreenlightToValidators (greploop iter 4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Greptile review of PR #883 caught a third process.exit(1) in this same file that the original batch B commit missed: line 618 inside releaseWaitingMembers's send loop (sendGreenlightToValidators). A single misbehaving validator returning any non-200/400 HTTP status would kill the secretary mid-consensus round with partial state. Same class of issue this PR was scoped to fix. Drop the process.exit and `continue` to the next validator in the result-iteration loop. The validator stays in `waitingMembers` for the next greenlight pass, so secretaryRoutine retries via its own timeout-driven loop. If every validator fails, the round eventually times out at the existing `_greenlight_timeout` boundary instead of taking the node down with partial round state. After this commit secretaryManager.ts has zero remaining real process.exit calls — only the two chaos-test helpers (simulateSecretaryGoingOffline, simulateNormalNodeGoingOffline) which are documented with WARNING blocks explaining the intentional bypass of graceful shutdown. --- src/libs/consensus/v2/types/secretaryManager.ts | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/libs/consensus/v2/types/secretaryManager.ts b/src/libs/consensus/v2/types/secretaryManager.ts index f3e08d45..31ad281f 100644 --- a/src/libs/consensus/v2/types/secretaryManager.ts +++ b/src/libs/consensus/v2/types/secretaryManager.ts @@ -612,10 +612,19 @@ export default class SecretaryManager { } log.error( - `[SECRETARY ROUTINE] Error sending greenlight to ${pubKey}`, + `[SECRETARY ROUTINE] Error sending greenlight to ${pubKey} (result=${result?.result}) — skipping this validator and continuing with the round`, ) log.error(`Response: ${JSON.stringify(result)}`) - process.exit(1) + // Audit-sweep batch B: was process.exit(1). A single + // misbehaving validator returning an unexpected HTTP + // status no longer kills the secretary mid-consensus + // round. The validator stays in `waitingMembers` for the + // next greenlight pass; secretaryRoutine will retry via + // its own timeout-driven loop. If every validator fails, + // the consensus round eventually times out at the + // existing `_greenlight_timeout` boundary instead of + // taking the node down with partial round state. + continue } return true From 56ce0793cbcb5bd3cd267663ba866daf64f43abe Mon Sep 17 00:00:00 2001 From: tcsenpai Date: Fri, 29 May 2026 15:46:23 +0200 Subject: [PATCH 6/7] fix(audit-sweep): await receiveGreenLight at RPC call site (greploop iter 5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Greptile review of PR #883 caught a pre-existing async-call-site bug that the batch B receiveGreenLight change exposed. manageConsensusRoutines.ts:377 calls `manager.receiveGreenLight(...)` without `await`. The return value is a Promise, which is always truthy, so the response.result on the next line was always 200 regardless of receiveGreenLight's actual return value. Before this batch, that did not matter much because the unreachable-state path in receiveGreenLight was a process.exit(1) that killed the node before the response was sent. The iter-1 batch B commit converted that exit to `return false`, which the unawaited call site silently masked as a successful 200 — the very behaviour the conversion was meant to expose. Add the missing `await`. The enclosing manageConsensusRoutines function is already async, so this is a single-keyword change with no caller-chain implications. After this commit the receiveGreenLight `return false` path correctly surfaces as an HTTP 400 to the secretary, matching the original intent of the conversion. --- src/libs/network/manageConsensusRoutines.ts | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/libs/network/manageConsensusRoutines.ts b/src/libs/network/manageConsensusRoutines.ts index c9269b58..9f3be459 100644 --- a/src/libs/network/manageConsensusRoutines.ts +++ b/src/libs/network/manageConsensusRoutines.ts @@ -374,7 +374,16 @@ export default async function manageConsensusRoutines( } // INFO: Act on the greenlight - const greenLightReceived = manager.receiveGreenLight( + // Audit-sweep batch B (greploop iter 5): added missing + // `await`. receiveGreenLight is async, so the previous + // call site assigned a Promise to + // `greenLightReceived` — which is always truthy, so the + // response.result was always 200 regardless of the + // actual return value. After the batch B change that + // converted an unreachable-state process.exit(1) inside + // receiveGreenLight to `return false`, the unawaited call + // was silently masking that 400-class failure as a 200. + const greenLightReceived = await manager.receiveGreenLight( timestamp, validatorPhase, ) From 369e5a33886758ad75daba1f7eeb27f761c2c376 Mon Sep 17 00:00:00 2001 From: tcsenpai Date: Fri, 29 May 2026 15:52:28 +0200 Subject: [PATCH 7/7] docs(audit-sweep): correct retry-guarantee overstatement in greenlight-error comment (greploop iter 6 - comment only) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Greptile review of PR #883 flagged the comment added in the iter-4 commit overstating recovery semantics. The original comment claimed the validator "stays in waitingMembers for the next greenlight pass" and that secretaryRoutine would "retry via its own timeout-driven loop" — but waitStatus has already been cleared by the time the releaseWaitingMembers result-iteration loop runs, so the validator does NOT automatically re-enter the wait queue. The actual fallback is the `_greenlight_timeout` boundary firing, exactly as the second half of the original comment correctly described. Comment-only fix. No code behaviour change. --- src/libs/consensus/v2/types/secretaryManager.ts | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/libs/consensus/v2/types/secretaryManager.ts b/src/libs/consensus/v2/types/secretaryManager.ts index 31ad281f..9b6159c6 100644 --- a/src/libs/consensus/v2/types/secretaryManager.ts +++ b/src/libs/consensus/v2/types/secretaryManager.ts @@ -618,12 +618,13 @@ export default class SecretaryManager { // Audit-sweep batch B: was process.exit(1). A single // misbehaving validator returning an unexpected HTTP // status no longer kills the secretary mid-consensus - // round. The validator stays in `waitingMembers` for the - // next greenlight pass; secretaryRoutine will retry via - // its own timeout-driven loop. If every validator fails, - // the consensus round eventually times out at the - // existing `_greenlight_timeout` boundary instead of - // taking the node down with partial round state. + // round. The validator's waitStatus has already been + // cleared by the time this loop runs, so it does NOT + // automatically re-enter the wait queue — the round's + // fallback is the existing `_greenlight_timeout` + // boundary, which will fire and trigger the standard + // timeout-recovery path instead of taking the node down + // with partial round state. continue }