diff --git a/src/libs/blockchain/routines/Sync.ts b/src/libs/blockchain/routines/Sync.ts index d0daa183..b8c17b36 100644 --- a/src/libs/blockchain/routines/Sync.ts +++ b/src/libs/blockchain/routines/Sync.ts @@ -256,7 +256,7 @@ async function verifyLastBlockIntegrity( peer: Peer, ourLastBlockNumber: number, ourLastBlockHash: string, -) { +): Promise { const ourGenesisHash = await Chain.getGenesisBlockHash() const seenPeers = new Set() let currentPeer: Peer | null = peer @@ -276,10 +276,11 @@ async function verifyLastBlockIntegrity( } if (genesisBlock.hash !== ourGenesisHash) { - log.error("[fastSync] Genesis hash is not coherent") - log.error(`[fastSync] Our hash: ${ourGenesisHash}`) - log.error(`[fastSync] Peer hash: ${genesisBlock.hash}`) - process.exit(1) + log.error( + `[fastSync] Genesis hash mismatch with peer ${currentPeer.identity}: ours=${ourGenesisHash} peer=${genesisBlock.hash}, trying next peer`, + ) + currentPeer = findNextAvailablePeer(seenPeers) + continue } // Verify if the last block hash is coherent @@ -302,7 +303,7 @@ async function verifyLastBlockIntegrity( log.error( "[fastSync] Exhausted all peers, could not verify last block integrity", ) - process.exit(1) + return false } /** @@ -864,7 +865,9 @@ async function fastSyncRoutine(peers: Peer[] = []) { if (!verified) { log.error("[fastSync] Last block is not coherent") - process.exit(1) + throw new Error( + "[fastSync] Last block integrity check failed — node refusing to sync against incoherent chain", + ) } } diff --git a/src/libs/blockchain/routines/applyGasFeeSeparation.ts b/src/libs/blockchain/routines/applyGasFeeSeparation.ts index 27a410b7..8d618c88 100644 --- a/src/libs/blockchain/routines/applyGasFeeSeparation.ts +++ b/src/libs/blockchain/routines/applyGasFeeSeparation.ts @@ -22,8 +22,10 @@ KyneSys Labs: https://www.kynesys.xyz/ * additional_fee, rpc_address}` with the breakdown values + this * node's signing pubkey. Peers verifying the signed ValidityData * rely on those fields being present. - * 3. (PROD only) Reads the sender's GCR balance and rejects if it is - * below the total fee. + * 3. Reads the sender's GCR balance and rejects if it is below the + * total fee. Enforced in every environment as of audit-sweep + * batch B (the previous PROD-only gate let non-prod nodes accept + * unfunded transactions, which made devnet diverge from PROD). * 4. Generates the fee-distribution GCREdits via * {@link generateFeeDistributionEdits} and prepends them onto * `tx.content.gcr_edits` so the fee deductions apply before any @@ -44,7 +46,10 @@ KyneSys Labs: https://www.kynesys.xyz/ import { GCREdit } from "@kynesyslabs/demosdk/types" import type { Transaction as ITransaction } from "@kynesyslabs/demosdk/types" import { ucrypto, uint8ArrayToHex } from "@kynesyslabs/demosdk/encryption" -import { calculateFeeBreakdown } from "@/libs/blockchain/routines/calculateCurrentGas" +import { + calculateFeeBreakdown, + type FeeBreakdown, +} from "@/libs/blockchain/routines/calculateCurrentGas" import { generateFeeDistributionEdits } from "@/libs/blockchain/gcr/gcr_routines/feeDistribution" import GCR from "@/libs/blockchain/gcr/gcr" import { forgeToHex } from "@/libs/crypto/forgeUtils" @@ -106,14 +111,37 @@ export async function applyGasFeeSeparation( // Compute per-component breakdown. const breakdown = await calculateFeeBreakdown(tx) - if ( - !Number.isFinite(breakdown.total) || - !Number.isInteger(breakdown.total) || - breakdown.total < 0 - ) { - return { - ok: false, - message: `calculateFeeBreakdown returned non-integer total: ${breakdown.total}`, + + // Audit-sweep batch B: validate every fee component independently. + // calculateFeeBreakdown derives `total` as the direct sum of the + // three component locals, so asserting `total` alone, or asserting + // components-sum === total, is tautological with the current + // implementation. The real failure surface is each component + // becoming NaN / Infinity / negative / fractional via the + // `scalar * surge` multiplication: a misconfigured scalar + // (negative governance proposal, accidental float coefficient) or + // a broken `dynamicSurgeMultiplier` will produce one or more bad + // components, which then propagate into `tx.content.transaction_fee` + // and the fee-distribution edits and finally surface as + // validator-side consensus disagreement. Validate each component + // here so the tx is rejected at the RPC boundary with an + // actionable per-component message instead. + const components: Array<[keyof FeeBreakdown, number]> = [ + ["network_fee", breakdown.network_fee], + ["rpc_fee", breakdown.rpc_fee], + ["additional_fee", breakdown.additional_fee], + ["total", breakdown.total], + ] + for (const [name, value] of components) { + if ( + !Number.isFinite(value) || + !Number.isInteger(value) || + value < 0 + ) { + return { + ok: false, + message: `calculateFeeBreakdown produced an invalid ${name}: ${value} (must be a non-negative integer; full breakdown: network_fee=${breakdown.network_fee}, rpc_fee=${breakdown.rpc_fee}, additional_fee=${breakdown.additional_fee}, total=${breakdown.total})`, + } } } @@ -129,25 +157,28 @@ export async function applyGasFeeSeparation( tx.content.transaction_fee.additional_fee = breakdown.additional_fee tx.content.transaction_fee.rpc_address = rpcAddressHex - // Sender balance check — only enforced in PROD (matches the legacy - // defineGas behavior so non-prod testing can submit unfunded txs). - if (getSharedState.PROD) { - let senderBalance: bigint - try { - senderBalance = await GCR.getAccountBalance(senderAddress) - } catch (e) { - return { - ok: false, - message: `failed to read sender balance: ${ - e instanceof Error ? e.message : stringifyNonError(e) - }`, - } + // Audit-sweep batch B: balance check is now enforced in every + // environment. The previous PROD-only gate (paired with the same + // gate in validateTransaction.defineGas, also dropped in this + // batch) let non-prod nodes accept unfunded transactions, which + // made devnet/staging diverge from PROD validation semantics. + // Devnet uses a funded-genesis fixture, so unfunded broadcasts + // are no longer needed for local testing. + let senderBalance: bigint + try { + senderBalance = await GCR.getAccountBalance(senderAddress) + } catch (e) { + return { + ok: false, + message: `failed to read sender balance: ${ + e instanceof Error ? e.message : stringifyNonError(e) + }`, } - if (senderBalance < BigInt(breakdown.total)) { - return { - ok: false, - message: `sender balance ${senderBalance.toString()} < total fee ${breakdown.total}`, - } + } + if (senderBalance < BigInt(breakdown.total)) { + return { + ok: false, + message: `sender balance ${senderBalance.toString()} < total fee ${breakdown.total}`, } } diff --git a/src/libs/blockchain/routines/validateTransaction.ts b/src/libs/blockchain/routines/validateTransaction.ts index 2506d2b6..dd143af0 100644 --- a/src/libs/blockchain/routines/validateTransaction.ts +++ b/src/libs/blockchain/routines/validateTransaction.ts @@ -300,8 +300,13 @@ async function defineGas( "non-negative integers in the active denomination.", ) } - // FIXME Overriding for testing - if (fromBalance < BigInt(compositeFeeAmount) && getSharedState.PROD) { + // Audit-sweep batch B: dropped the `&& getSharedState.PROD` guard so the + // balance check is enforced in every environment. The previous PROD-only + // gate let non-prod nodes accept zero-balance transactions, which made + // devnet/staging diverge from PROD validation semantics. Devnet now uses + // a funded-genesis fixture, so unfunded broadcasts are no longer needed + // for local testing. + if (fromBalance < BigInt(compositeFeeAmount)) { log.error( "TX", "[Native Tx Validation] [BALANCE ERROR] Insufficient balance for gas; required: " + diff --git a/src/libs/consensus/v2/types/secretaryManager.ts b/src/libs/consensus/v2/types/secretaryManager.ts index db9e0e84..9b6159c6 100644 --- a/src/libs/consensus/v2/types/secretaryManager.ts +++ b/src/libs/consensus/v2/types/secretaryManager.ts @@ -381,8 +381,14 @@ export default class SecretaryManager { } /** - * Simulates the secretary going offline. - * If we're forging block x = 5, kill the node if it's the secretary + * Chaos-test helper. Currently unused in production paths. + * + * WARNING: process.exit(0) below is intentional — this method exists to + * simulate an abrupt secretary crash for resilience testing and deliberately + * bypasses graceful shutdown. Do not wire into production routines without + * an explicit gating flag. + * + * If we're forging block x = 10, kill the node if it is the secretary. */ public async simulateSecretaryGoingOffline() { const weAreForgingBlock = this.shard.blockRef === 10 @@ -396,9 +402,14 @@ export default class SecretaryManager { } /** - * Simulates a normal node going offline. + * Chaos-test helper. Currently unused in production paths. + * + * WARNING: process.exit(0) below is intentional — this method exists to + * simulate an abrupt normal-node crash for resilience testing and + * deliberately bypasses graceful shutdown. Do not wire into production + * routines without an explicit gating flag. * - * If we're forging block x = 5, kill normal this node if it's not the secretary + * If we're forging block x = 5, kill the node if it is not the secretary. */ public async simulateNormalNodeGoingOffline() { const weAreForgingBlock10 = this.shard.blockRef === 5 @@ -601,10 +612,20 @@ export default class SecretaryManager { } log.error( - `[SECRETARY ROUTINE] Error sending greenlight to ${pubKey}`, + `[SECRETARY ROUTINE] Error sending greenlight to ${pubKey} (result=${result?.result}) — skipping this validator and continuing with the round`, ) log.error(`Response: ${JSON.stringify(result)}`) - process.exit(1) + // Audit-sweep batch B: was process.exit(1). A single + // misbehaving validator returning an unexpected HTTP + // status no longer kills the secretary mid-consensus + // round. The validator's waitStatus has already been + // cleared by the time this loop runs, so it does NOT + // automatically re-enter the wait queue — the round's + // fallback is the existing `_greenlight_timeout` + // boundary, which will fire and trigger the standard + // timeout-recovery path instead of taking the node down + // with partial round state. + continue } return true @@ -674,12 +695,18 @@ export default class SecretaryManager { return true } - log.debug("We don't know what to do with this green light") - log.debug(`Validator phase: ${validatorPhase}`) - log.debug(`Our phase: ${this.ourValidatorPhase.currentPhase}`) - log.debug(`Secretary block timestamp: ${secretaryBlockTimestamp}`) - log.debug(`Block timestamp: ${this.blockTimestamp}`) - process.exit(1) + log.error( + "[SECRETARY] Received an unexpected green light — ignoring to preserve consensus round", + ) + log.error(`[SECRETARY] Validator phase: ${validatorPhase}`) + log.error( + `[SECRETARY] Our phase: ${this.ourValidatorPhase.currentPhase}`, + ) + log.error( + `[SECRETARY] Secretary block timestamp: ${secretaryBlockTimestamp}`, + ) + log.error(`[SECRETARY] Block timestamp: ${this.blockTimestamp}`) + return false } /** @@ -882,9 +909,8 @@ export default class SecretaryManager { await Promise.all(waiters) } catch (error) { log.error( - `[SECRETARY] Error waiting for hanging greenlights: ${error}`, + `[SECRETARY] Error waiting for hanging greenlights: ${error instanceof Error ? error.message : String(error)} — continuing with pre-held cleanup`, ) - process.exit(1) } Waiter.preHeld diff --git a/src/libs/network/manageConsensusRoutines.ts b/src/libs/network/manageConsensusRoutines.ts index c9269b58..9f3be459 100644 --- a/src/libs/network/manageConsensusRoutines.ts +++ b/src/libs/network/manageConsensusRoutines.ts @@ -374,7 +374,16 @@ export default async function manageConsensusRoutines( } // INFO: Act on the greenlight - const greenLightReceived = manager.receiveGreenLight( + // Audit-sweep batch B (greploop iter 5): added missing + // `await`. receiveGreenLight is async, so the previous + // call site assigned a Promise to + // `greenLightReceived` — which is always truthy, so the + // response.result was always 200 regardless of the + // actual return value. After the batch B change that + // converted an unreachable-state process.exit(1) inside + // receiveGreenLight to `return false`, the unawaited call + // was silently masking that 400-class failure as a 200. + const greenLightReceived = await manager.receiveGreenLight( timestamp, validatorPhase, )