From bc7498b89d66c325e177ebea4897c6a6dcec365e Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Fri, 29 May 2026 21:16:47 -0700 Subject: [PATCH 1/2] UpgradeTxClusters: deploy cluster-downgrade upgrade entry before load The mission deployed the `ledgerMaxDependentTxClusters` 8 -> 4 config-upgrade entry *after* starting the long-running soroban_invoke load. Deploying the entry takes a single Soroban transaction, but the load saturates the (instruction-limited) Soroban lane, so that single transaction gets starved and never makes it into a ledger. The upgrade-set entry is therefore never written, the upgrade can never be armed/applied, and `WaitForMaxDependentTxClusters 4` spins until the mission times out. This only reproduces under enough sustained load to fill the lane, which is why it passed locally but timed out on the cluster. Split DeployUpgradeEntriesAndArm into DeployUpgradeEntries (writes the upgrade-set entry, returns its key) and ArmUpgradeEntries (arms a key on each peer); the existing combined helpers are rebuilt on top so all other callers are unchanged. The mission now deploys the downgrade entry while the network is quiet (after setup, before the flood) and arms it during the load. --- src/FSLibrary/MissionUpgradeTxClusters.fs | 32 ++++++++++++------ src/FSLibrary/StellarStatefulSets.fs | 41 ++++++++++++----------- 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/src/FSLibrary/MissionUpgradeTxClusters.fs b/src/FSLibrary/MissionUpgradeTxClusters.fs index 21b08b99..e46acb30 100644 --- a/src/FSLibrary/MissionUpgradeTxClusters.fs +++ b/src/FSLibrary/MissionUpgradeTxClusters.fs @@ -65,21 +65,33 @@ let upgradeTxClusters (context: MissionContext) = let peer0 = formation.NetworkCfg.GetPeer coreSet 0 peer0.WaitForMaxDependentTxClusters 8 - // Don't use peer 0. This will let us run soroban_invoke on peer 1 - // and then do the settings upgrade on peer 0. + // Don't use peer 0. This will let us run soroban_invoke on peer 1 and + // then arm the settings upgrade on peer 0 while the load is running. let peer1 = formation.NetworkCfg.GetPeer coreSet 1 LogInfo "Loadgen: %s" (peer1.GenerateLoad context.SetupSorobanInvoke) peer1.WaitForLoadGenComplete context.SetupSorobanInvoke - // 1500 txs at a rate of 15 txs/sec should take ~100 seconds, so we'll have enough time to do both settings upgrades + + // Deploy (but do not yet arm) the entry that lowers + // ledgerMaxDependentTxClusters to 4, while the network is still quiet -- + // i.e. before starting the long-running soroban load. Deploying the + // upgrade-set entry takes a single Soroban transaction, and once loadgen + // saturates the (instruction-limited) Soroban lane that transaction gets + // starved and never makes it into a ledger, so the entry would never be + // written and the upgrade could never be armed or applied. + let clusterDowngradeKey = + formation.DeployUpgradeEntries + [ coreSet ] + { LoadGen.GetDefault() with + mode = CreateSorobanUpgrade + ledgerMaxDependentTxClusters = Some(4) + minSorobanPercentSuccess = Some 100 } + + // 1500 txs at a rate of 15 txs/sec should take ~100 seconds, giving us + // time to arm the cluster downgrade while the load is running. LogInfo "Loadgen: %s" (peer1.GenerateLoad context.GenerateSorobanInvokeLoad) - formation.DeployUpgradeEntriesAndArm - [ coreSet ] - { LoadGen.GetDefault() with - mode = CreateSorobanUpgrade - ledgerMaxDependentTxClusters = Some(4) - minSorobanPercentSuccess = Some 100 } - (System.DateTime.UtcNow.AddSeconds(20.0)) + // Arm the already-deployed downgrade while the load is in flight. + formation.ArmUpgradeEntries [ coreSet ] clusterDowngradeKey (System.DateTime.UtcNow.AddSeconds(20.0)) peer0.WaitForMaxDependentTxClusters 4 diff --git a/src/FSLibrary/StellarStatefulSets.fs b/src/FSLibrary/StellarStatefulSets.fs index c7a573a3..7983d056 100644 --- a/src/FSLibrary/StellarStatefulSets.fs +++ b/src/FSLibrary/StellarStatefulSets.fs @@ -362,11 +362,14 @@ type StellarFormation with self.RunLoadgen coreSet loadgen - member self.DeployUpgradeEntriesAndArm - (coreSetList: CoreSet list) - (loadGen: LoadGen) - (upgradeTime: System.DateTime) - = + // Deploy upgrade-set entries by running a CreateSorobanUpgrade loadgen on node 0 + // of the first core set, and return the resulting ConfigUpgradeSetKey so the + // caller can arm it later. Keeping deploy separate from arm lets a caller create + // the upgrade-set entry while the network is quiet: deploying it takes a single + // Soroban transaction, and once concurrent load saturates the + // (instruction-limited) Soroban lane that transaction can be starved and never + // make it into a ledger, leaving the entry unwritten. + member self.DeployUpgradeEntries (coreSetList: CoreSet list) (loadGen: LoadGen) : string = let peer = self.NetworkCfg.GetPeer coreSetList.[0] 0 let resStr = peer.GenerateLoad loadGen @@ -374,31 +377,31 @@ type StellarFormation with LogInfo "Loadgen: %s" resStr peer.WaitForLoadGenComplete loadGen + contractKey - // Arm upgrades on each peer in the core set + // Arm a previously-deployed upgrade-set (identified by contractKey) on each peer + // in the given core sets, to take effect at upgradeTime. + member self.ArmUpgradeEntries (coreSetList: CoreSet list) (contractKey: string) (upgradeTime: System.DateTime) = self.NetworkCfg.EachPeerInSets (List.toArray coreSetList) (fun peer -> peer.UpgradeNetworkSetting contractKey upgradeTime) + member self.DeployUpgradeEntriesAndArm + (coreSetList: CoreSet list) + (loadGen: LoadGen) + (upgradeTime: System.DateTime) + = + let contractKey = self.DeployUpgradeEntries coreSetList loadGen + self.ArmUpgradeEntries coreSetList contractKey upgradeTime + member self.DeployUpgradeEntriesAndArmAfter (coreSetList: CoreSet list) (loadGen: LoadGen) (delay: System.TimeSpan) = - let peer = self.NetworkCfg.GetPeer coreSetList.[0] 0 - let resStr = peer.GenerateLoad loadGen - - let contractKey = Loadgen.Parse(resStr).ConfigUpgradeSetKey - - LogInfo "Loadgen: %s" resStr - peer.WaitForLoadGenComplete loadGen - + let contractKey = self.DeployUpgradeEntries coreSetList loadGen let upgradeTime = System.DateTime.UtcNow.Add(delay) - - // Arm upgrades on each peer in the core set - self.NetworkCfg.EachPeerInSets - (List.toArray coreSetList) - (fun peer -> peer.UpgradeNetworkSetting contractKey upgradeTime) + self.ArmUpgradeEntries coreSetList contractKey upgradeTime member self.clearMetrics(coreSets: CoreSet list) = self.NetworkCfg.EachPeerInSets(coreSets |> List.toArray) (fun peer -> peer.ClearMetrics()) From 17c8820472a5079d2e459988be690c6e18e3a60a Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Mon, 1 Jun 2026 10:12:56 -0700 Subject: [PATCH 2/2] Rename contractKey -> configUpgradeSetKey for clarity It holds Loadgen.Parse(...).ConfigUpgradeSetKey, not a contract key; align with UpgradeNetworkSetting's parameter name. (PR review nit.) --- src/FSLibrary/StellarStatefulSets.fs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/FSLibrary/StellarStatefulSets.fs b/src/FSLibrary/StellarStatefulSets.fs index 7983d056..673a2b75 100644 --- a/src/FSLibrary/StellarStatefulSets.fs +++ b/src/FSLibrary/StellarStatefulSets.fs @@ -373,35 +373,39 @@ type StellarFormation with let peer = self.NetworkCfg.GetPeer coreSetList.[0] 0 let resStr = peer.GenerateLoad loadGen - let contractKey = Loadgen.Parse(resStr).ConfigUpgradeSetKey + let configUpgradeSetKey = Loadgen.Parse(resStr).ConfigUpgradeSetKey LogInfo "Loadgen: %s" resStr peer.WaitForLoadGenComplete loadGen - contractKey + configUpgradeSetKey - // Arm a previously-deployed upgrade-set (identified by contractKey) on each peer + // Arm a previously-deployed upgrade-set (identified by configUpgradeSetKey) on each peer // in the given core sets, to take effect at upgradeTime. - member self.ArmUpgradeEntries (coreSetList: CoreSet list) (contractKey: string) (upgradeTime: System.DateTime) = + member self.ArmUpgradeEntries + (coreSetList: CoreSet list) + (configUpgradeSetKey: string) + (upgradeTime: System.DateTime) + = self.NetworkCfg.EachPeerInSets (List.toArray coreSetList) - (fun peer -> peer.UpgradeNetworkSetting contractKey upgradeTime) + (fun peer -> peer.UpgradeNetworkSetting configUpgradeSetKey upgradeTime) member self.DeployUpgradeEntriesAndArm (coreSetList: CoreSet list) (loadGen: LoadGen) (upgradeTime: System.DateTime) = - let contractKey = self.DeployUpgradeEntries coreSetList loadGen - self.ArmUpgradeEntries coreSetList contractKey upgradeTime + let configUpgradeSetKey = self.DeployUpgradeEntries coreSetList loadGen + self.ArmUpgradeEntries coreSetList configUpgradeSetKey upgradeTime member self.DeployUpgradeEntriesAndArmAfter (coreSetList: CoreSet list) (loadGen: LoadGen) (delay: System.TimeSpan) = - let contractKey = self.DeployUpgradeEntries coreSetList loadGen + let configUpgradeSetKey = self.DeployUpgradeEntries coreSetList loadGen let upgradeTime = System.DateTime.UtcNow.Add(delay) - self.ArmUpgradeEntries coreSetList contractKey upgradeTime + self.ArmUpgradeEntries coreSetList configUpgradeSetKey upgradeTime member self.clearMetrics(coreSets: CoreSet list) = self.NetworkCfg.EachPeerInSets(coreSets |> List.toArray) (fun peer -> peer.ClearMetrics())