Skip to content

Commit 38cac99

Browse files
Rework SimpleSetupCluster in cluster tests, and longer timeouts for select tests (#1762)
* rework SimpleSetupCluster so it's 1) async 2) checks for success throughout; async's a few more tests; adds some null checks * bump timeouts on longer running cluster vector set tests; plumb [CancelAfter] so ClusterTestContext knows of it * formatting * address feedback * this CancelAfter was not being enforced, as is too aggressive - remove it * remove some debugging changes that slipped in
1 parent c3e049c commit 38cac99

9 files changed

Lines changed: 345 additions & 135 deletions

File tree

libs/cluster/Server/Replication/ReplicationManager.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ or FailoverStatus.FAILOVER_IN_PROGRESS
251251
return;
252252
}
253253

254-
logger.LogInformation("Beginning resync to {primaryId} after replication session failed", primaryId);
254+
logger?.LogInformation("Beginning resync to {primaryId} after replication session failed", primaryId);
255255

256256
// At this point we need to hold the lock until this upcoming task completes
257257
suppressUnlock = true;
@@ -270,16 +270,16 @@ or FailoverStatus.FAILOVER_IN_PROGRESS
270270

271271
if (success)
272272
{
273-
logger.LogInformation("Resync to {primaryId} successfully started", primaryId);
273+
logger?.LogInformation("Resync to {primaryId} successfully started", primaryId);
274274
}
275275
else
276276
{
277-
logger.LogWarning("Failed to resync to {primaryId} after replication session failed: {errorMessage}", primaryId, Encoding.UTF8.GetString(errorMessage.Span));
277+
logger?.LogWarning("Failed to resync to {primaryId} after replication session failed: {errorMessage}", primaryId, Encoding.UTF8.GetString(errorMessage.Span));
278278
}
279279
}
280280
catch (Exception ex)
281281
{
282-
logger.LogError(ex, "Error encountered on replication recovery background task");
282+
logger?.LogError(ex, "Error encountered on replication recovery background task");
283283
}
284284
finally
285285
{

libs/cluster/Session/RespClusterBasicCommands.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,9 @@ private bool NetworkClusterGossip(out bool invalidParameters)
374374

375375
Debug.Assert(withMeetSpan.EqualsUpperCaseSpanIgnoringCase(CmdStrings.WITHMEET));
376376
if (withMeetSpan.EqualsUpperCaseSpanIgnoringCase(CmdStrings.WITHMEET))
377+
{
377378
gossipWithMeet = true;
379+
}
378380
}
379381

380382
var gossipMessage = parseState.GetArgSliceByRef(currTokenIdx).SpanByte.ToByteArray();
@@ -406,7 +408,9 @@ private bool NetworkClusterGossip(out bool invalidParameters)
406408
RemoteNodeId = other.LocalNodeId;
407409
}
408410
else
411+
{
409412
logger?.LogWarning("Received gossip from unknown node: {node-id}", other.LocalNodeId);
413+
}
410414
}
411415

412416
// Respond if configuration has changed or gossipWithMeet option is specified

test/Garnet.test.cluster/ClusterConfigTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public void ClusterConfigInitializesUnassignedWorkerTest()
6363
}
6464

6565
[Test, Order(2)]
66-
[Category("CLUSTER-CONFIG"), CancelAfter(1000)]
66+
[Category("CLUSTER-CONFIG")]
6767
public void ClusterForgetAfterNodeRestartTest()
6868
{
6969
int nbInstances = 4;

test/Garnet.test.cluster/ClusterMigrateTests.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1404,7 +1404,7 @@ public void ClusterSimpleMigrateWithAuth(List<int> migrateRange)
14041404

14051405
[Test, Order(15)]
14061406
[Category("CLUSTER")]
1407-
public void ClusterAllowWritesDuringMigrateTest()
1407+
public async Task ClusterAllowWritesDuringMigrateTestAsync()
14081408
{
14091409
context.logger.LogDebug("0. ClusterSimpleMigrateTestWithReadWrite started");
14101410
var Shards = defaultShards;
@@ -1428,7 +1428,7 @@ public void ClusterAllowWritesDuringMigrateTest()
14281428

14291429
// Get slot mapping
14301430
var slot = context.clusterTestUtils.ClusterKeySlot(srcNode.EndPoint.ToIPEndPoint(), Encoding.ASCII.GetString(keyExists));
1431-
var tgtNode = context.clusterTestUtils.GetAnyOtherNode(srcNode.EndPoint.ToIPEndPoint(), logger: context.logger);
1431+
var tgtNode = await context.clusterTestUtils.GetAnyOtherNodeAsync(srcNode.EndPoint.ToIPEndPoint(), logger: context.logger).ConfigureAwait(false);
14321432

14331433
// Set slot to IMPORTING state
14341434
var resp = context.clusterTestUtils.SetSlot(tgtNode.EndPoint.ToIPEndPoint(), slot, "IMPORTING", srcNode.NodeId);

test/Garnet.test.cluster/ClusterNegativeTests.cs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@ public void ClusterReplicateFails()
628628
}
629629
[Test, Order(14), CancelAfter(testTimeout)]
630630
[Category("REPLICATION")]
631-
public void ClusterFailoverSucceedsDuringEnsureReplication(CancellationToken cancellationToken)
631+
public async Task ClusterFailoverSucceedsDuringEnsureReplicationAsync(CancellationToken cancellationToken)
632632
{
633633
// Verify that EnsureReplication does not block an in-flight failover.
634634
// EnsureReplication polls for dropped replication sessions and attempts auto-resync.
@@ -653,7 +653,7 @@ public void ClusterFailoverSucceedsDuringEnsureReplication(CancellationToken can
653653
var resp = context.clusterTestUtils.ClusterReplicate(replicaNodeIndex: replicaIndex, primaryNodeIndex: primaryIndex, logger: context.logger);
654654
ClassicAssert.AreEqual("OK", resp);
655655
context.clusterTestUtils.WaitForReplicaRecovery(replicaIndex, context.logger);
656-
context.clusterTestUtils.WaitForConnectedReplicaCount(primaryIndex, 1, context.logger);
656+
await context.clusterTestUtils.WaitForConnectedReplicaCountAsync(primaryIndex, 1, context.logger).ConfigureAwait(false);
657657

658658
// Populate primary and wait for sync
659659
context.kvPairs = [];
@@ -686,7 +686,7 @@ public void ClusterFailoverSucceedsDuringEnsureReplication(CancellationToken can
686686

687687
[Test, Order(15), CancelAfter(testTimeout)]
688688
[Category("REPLICATION")]
689-
public void ClusterEnsureReplicationWorksAfterFailover(CancellationToken cancellationToken)
689+
public async Task ClusterEnsureReplicationWorksAfterFailoverAsync(CancellationToken cancellationToken)
690690
{
691691
// Verify that EnsureReplication still functions after a failover completes.
692692
// The failover guard should only suppress auto-resync during active failover states,
@@ -710,7 +710,7 @@ public void ClusterEnsureReplicationWorksAfterFailover(CancellationToken cancell
710710
var resp = context.clusterTestUtils.ClusterReplicate(replicaNodeIndex: replicaIndex, primaryNodeIndex: primaryIndex, logger: context.logger);
711711
ClassicAssert.AreEqual("OK", resp);
712712
context.clusterTestUtils.WaitForReplicaRecovery(replicaIndex, context.logger);
713-
context.clusterTestUtils.WaitForConnectedReplicaCount(primaryIndex, 1, context.logger);
713+
await context.clusterTestUtils.WaitForConnectedReplicaCountAsync(primaryIndex, 1, context.logger).ConfigureAwait(false);
714714

715715
// Populate primary data and sync
716716
context.kvPairs = [];
@@ -746,7 +746,7 @@ public void ClusterEnsureReplicationWorksAfterFailover(CancellationToken cancell
746746
#if DEBUG
747747
[Test, Order(16), CancelAfter(testTimeout)]
748748
[Category("REPLICATION")]
749-
public void ClusterFailoverResetsPrimaryOnTakeOverFailure(CancellationToken cancellationToken)
749+
public async Task ClusterFailoverResetsPrimaryOnTakeOverFailureAsync(CancellationToken cancellationToken)
750750
{
751751
// Verify that when TakeOverAsPrimary fails after PauseWritesAndWaitForSync
752752
// has already sent failstopwrites to the primary, the primary is reset back
@@ -772,7 +772,7 @@ public void ClusterFailoverResetsPrimaryOnTakeOverFailure(CancellationToken canc
772772
var resp = context.clusterTestUtils.ClusterReplicate(replicaNodeIndex: replicaIndex, primaryNodeIndex: primaryIndex, logger: context.logger);
773773
ClassicAssert.AreEqual("OK", resp);
774774
context.clusterTestUtils.WaitForReplicaRecovery(replicaIndex, context.logger);
775-
context.clusterTestUtils.WaitForConnectedReplicaCount(primaryIndex, 1, context.logger);
775+
await context.clusterTestUtils.WaitForConnectedReplicaCountAsync(primaryIndex, 1, context.logger).ConfigureAwait(false);
776776

777777
// Populate data and sync
778778
context.kvPairs = [];

test/Garnet.test.cluster/ClusterTestContext.cs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ public class ClusterTestContext
4646

4747
public void Setup(Dictionary<string, LogLevel> monitorTests, int testTimeoutSeconds = 60)
4848
{
49+
// Pull timeout off [CancelAfter] if its specified, otherwise use default
50+
if (TestContext.CurrentContext.Test.Properties.ContainsKey("Timeout"))
51+
{
52+
testTimeoutSeconds = ((int)TestContext.CurrentContext.Test.Properties["Timeout"].First()) / 1_000;
53+
}
54+
4955
cts = new CancellationTokenSource(TimeSpan.FromSeconds(testTimeoutSeconds));
5056

5157
TestFolder = TestUtils.UnitTestWorkingDir() + "\\";
@@ -779,7 +785,7 @@ public void ClusterFailoverSpinWait(int replicaNodeIndex, ILogger logger)
779785
}
780786
}
781787

782-
public void AttachAndWaitForSync(int primary_count, int replica_count, bool disableObjects)
788+
public async Task AttachAndWaitForSyncAsync(int primary_count, int replica_count, bool disableObjects)
783789
{
784790
var primaryId = clusterTestUtils.GetNodeIdFromNode(0, logger);
785791

@@ -800,7 +806,7 @@ public void AttachAndWaitForSync(int primary_count, int replica_count, bool disa
800806
clusterTestUtils.WaitForReplicaAofSync(0, i, logger);
801807
}
802808

803-
clusterTestUtils.WaitForConnectedReplicaCount(0, replica_count, logger: logger);
809+
await clusterTestUtils.WaitForConnectedReplicaCountAsync(0, replica_count, logger: logger).ConfigureAwait(false);
804810

805811
// Validate data on replicas
806812
for (var i = primary_count; i < replica_count; i++)

0 commit comments

Comments
 (0)