From b50f4fabb67e1c6393f09af01decfee41a4c062a Mon Sep 17 00:00:00 2001 From: Felipe Cotti Date: Wed, 27 May 2026 13:02:12 -0300 Subject: [PATCH] Delete stale CloudFront redirect KVS entries Operand order on `Except` in the redirect KVS sync was flipped, so the delete batch resolved to "keys we are about to PUT" instead of "keys already in the KVS that no longer appear in the new redirects file". Result: nothing has ever been deleted from the prod/staging/preview redirect KVS since the regression landed, and stale redirects stick forever (e.g. elastic/docs-content#6716 could not undo the broken azure-native-isv-service redirect via redirects.yml). Fix the operand order, lift the diff into a pure helper so we can unit-test it without mocking the AWS CLI, and add a wipe guard that refuses to deploy an empty redirects.json against a populated KVS. --- .../AwsCloudFrontKeyValueStoreProxy.cs | 18 +- .../Deploying/Redirects/RedirectKvsDiff.cs | 50 +++++ .../RedirectKvsDiffTests.cs | 196 ++++++++++++++++++ 3 files changed, 257 insertions(+), 7 deletions(-) create mode 100644 src/services/Elastic.Documentation.Assembler/Deploying/Redirects/RedirectKvsDiff.cs create mode 100644 tests/Elastic.Documentation.Build.Tests/RedirectKvsDiffTests.cs diff --git a/src/services/Elastic.Documentation.Assembler/Deploying/Redirects/AwsCloudFrontKeyValueStoreProxy.cs b/src/services/Elastic.Documentation.Assembler/Deploying/Redirects/AwsCloudFrontKeyValueStoreProxy.cs index 480d6503ab..c9875a8799 100644 --- a/src/services/Elastic.Documentation.Assembler/Deploying/Redirects/AwsCloudFrontKeyValueStoreProxy.cs +++ b/src/services/Elastic.Documentation.Assembler/Deploying/Redirects/AwsCloudFrontKeyValueStoreProxy.cs @@ -37,13 +37,17 @@ public void UpdateRedirects(string kvsName, IReadOnlyDictionary if (!listingSuccessful) return; - var toPut = sourcedRedirects - .Select(kvp => new PutKeyRequestListItem { Key = kvp.Key, Value = kvp.Value }) - .ToArray(); - var toDelete = sourcedRedirects.Keys - .Except(existingRedirects) - .Select(k => new DeleteKeyRequestListItem { Key = k }) - .ToArray(); + if (RedirectKvsDiff.WouldWipeAllExisting(sourcedRedirects, existingRedirects)) + { + Collector.EmitError("", $"Refusing to update redirects: sourced redirects are empty but the KVS contains {existingRedirects.Count} entries. " + + "This would wipe every redirect. Verify the assembler produced a non-empty redirects.json before retrying."); + return; + } + + var (toPut, toDelete) = RedirectKvsDiff.ComputeBatchUpdates(sourcedRedirects, existingRedirects); + + Logger.LogInformation("Computed redirect KVS diff: {ToPut} to put, {ToDelete} to delete (from {Existing} existing, {Sourced} sourced)", + toPut.Length, toDelete.Length, existingRedirects.Count, sourcedRedirects.Count); eTag = ProcessBatchUpdates(kvsArn, eTag, toDelete, KvsOperation.Deletes); _ = ProcessBatchUpdates(kvsArn, eTag, toPut, KvsOperation.Puts); diff --git a/src/services/Elastic.Documentation.Assembler/Deploying/Redirects/RedirectKvsDiff.cs b/src/services/Elastic.Documentation.Assembler/Deploying/Redirects/RedirectKvsDiff.cs new file mode 100644 index 0000000000..eedb1727e6 --- /dev/null +++ b/src/services/Elastic.Documentation.Assembler/Deploying/Redirects/RedirectKvsDiff.cs @@ -0,0 +1,50 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +namespace Elastic.Documentation.Assembler.Deploying.Redirects; + +/// +/// Pure set-arithmetic helpers for diffing the locally-built redirects file against +/// the live CloudFront KeyValueStore so we can decide which keys to put and which to +/// delete. Kept independent of the AWS CLI wrapper so the logic is unit-testable. +/// +internal static class RedirectKvsDiff +{ + /// + /// Produces the put/delete batches needed to reconcile the live KVS with the + /// freshly-built redirects file. + /// + /// Key/value pairs from the build's redirects.json (desired state). + /// Keys currently present in the live KVS. + public static (PutKeyRequestListItem[] ToPut, DeleteKeyRequestListItem[] ToDelete) ComputeBatchUpdates( + IReadOnlyDictionary sourcedRedirects, + IReadOnlyCollection existingRedirects) + { + var toPut = sourcedRedirects + .Select(kvp => new PutKeyRequestListItem { Key = kvp.Key, Value = kvp.Value }) + .ToArray(); + + // Stale entries = keys in KVS that no longer appear in the new sourced file. + // Operand order matters: it must be `existingRedirects.Except(sourcedRedirects.Keys)`. + // The reverse (sourcedRedirects.Keys.Except(existingRedirects)) computes the + // brand-new keys we are about to PUT, which makes the DELETE batch a no-op and + // causes stale redirects to live in the KVS forever. + var toDelete = existingRedirects + .Except(sourcedRedirects.Keys) + .Select(k => new DeleteKeyRequestListItem { Key = k }) + .ToArray(); + + return (toPut, toDelete); + } + + /// + /// Returns true when applying the sourced redirects would wipe every existing entry + /// from the KVS. Used as a sanity guard: an empty redirects.json almost certainly + /// signals a broken build rather than an intentional reset. + /// + public static bool WouldWipeAllExisting( + IReadOnlyDictionary sourcedRedirects, + IReadOnlyCollection existingRedirects) => + sourcedRedirects.Count == 0 && existingRedirects.Count > 0; +} diff --git a/tests/Elastic.Documentation.Build.Tests/RedirectKvsDiffTests.cs b/tests/Elastic.Documentation.Build.Tests/RedirectKvsDiffTests.cs new file mode 100644 index 0000000000..ce8c1efbd6 --- /dev/null +++ b/tests/Elastic.Documentation.Build.Tests/RedirectKvsDiffTests.cs @@ -0,0 +1,196 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using AwesomeAssertions; +using Elastic.Documentation.Assembler.Deploying.Redirects; + +namespace Elastic.Documentation.Build.Tests; + +/* + * RedirectKvsDiff tests + * ===================== + * + * These tests pin the diff semantics that decide which keys are written to and + * deleted from the CloudFront KeyValueStore that backs documentation redirects. + * + * The regression that motivated this suite: + * ----------------------------------------- + * A writer split `azure-native-isv-service.md` into three pages in elastic/docs-content#5818 + * but kept the parent page alive. The redirect for the parent page was kept in + * redirects.yml without a top-level `to:`, which led the assembler to emit + * /docs/.../azure-native-isv-service -> /docs/.../azure-native-isv-service-troubleshooting + * into the live KVS, breaking the still-valid parent page. + * + * Two follow-up PRs (#6667, #6716) tried to remove the stale redirect from + * redirects.yml. Both failed because the diff helper had the operands of `Except` + * reversed: it computed "keys in the new file but not in the live KVS" (which is + * just the brand-new entries we are about to PUT) instead of "keys in the live + * KVS that no longer appear in the new file" (the stale entries to DELETE). + * Result: nothing was ever deleted from the KVS and the bad redirect "stuck". + * + * The `KeyRemovedFromSourced_AppearsInToDelete` test reproduces that exact + * scenario. Don't loosen it. + */ + +public class RedirectKvsDiffTests +{ + [Fact] + public void ComputeBatchUpdates_KeyRemovedFromSourced_AppearsInToDelete() + { + // Reproduces elastic/docs-content#6716: a redirect that was previously pushed + // to the KVS is dropped from redirects.json. The diff MUST flag it for deletion. + const string staleKey = "/docs/deploy-manage/deploy/elastic-cloud/azure-native-isv-service"; + + var sourcedRedirects = new Dictionary + { + ["/docs/some-other-page"] = "/docs/some-other-page-new" + }; + var existingRedirects = new HashSet + { + staleKey, + "/docs/some-other-page" + }; + + var (toPut, toDelete) = RedirectKvsDiff.ComputeBatchUpdates(sourcedRedirects, existingRedirects); + + toDelete.Select(d => d.Key).Should().ContainSingle().Which.Should().Be(staleKey); + toPut.Should().ContainSingle().Which.Key.Should().Be("/docs/some-other-page"); + } + + [Fact] + public void ComputeBatchUpdates_KeyOnlyInSourced_IsPutAndNotDeleted() + { + var sourcedRedirects = new Dictionary + { + ["/docs/new-page"] = "/docs/new-page-target" + }; + var existingRedirects = new HashSet(); + + var (toPut, toDelete) = RedirectKvsDiff.ComputeBatchUpdates(sourcedRedirects, existingRedirects); + + toPut.Should().ContainSingle().Which.Should().BeEquivalentTo(new + { + Key = "/docs/new-page", + Value = "/docs/new-page-target" + }); + toDelete.Should().BeEmpty(); + } + + [Fact] + public void ComputeBatchUpdates_KeyInBoth_IsPutAndNotDeleted() + { + // The current implementation re-puts every sourced entry (even unchanged values). + // This pins that behaviour explicitly so a future optimisation that skips + // no-op puts has to update both the code and this test. + var sourcedRedirects = new Dictionary + { + ["/docs/shared-key"] = "/docs/target" + }; + var existingRedirects = new HashSet { "/docs/shared-key" }; + + var (toPut, toDelete) = RedirectKvsDiff.ComputeBatchUpdates(sourcedRedirects, existingRedirects); + + toPut.Select(p => p.Key).Should().Equal("/docs/shared-key"); + toDelete.Should().BeEmpty(); + } + + [Fact] + public void ComputeBatchUpdates_BothEmpty_ProducesEmptyBatches() + { + var (toPut, toDelete) = RedirectKvsDiff.ComputeBatchUpdates( + new Dictionary(), + new HashSet()); + + toPut.Should().BeEmpty(); + toDelete.Should().BeEmpty(); + } + + [Fact] + public void ComputeBatchUpdates_OnlyExisting_AllAreDeleted() + { + // Mirrors a "remove every redirect" intent. The helper itself does not refuse + // to compute this; the wipe guard (WouldWipeAllExisting) lives one layer up. + var sourcedRedirects = new Dictionary(); + var existingRedirects = new HashSet + { + "/docs/a", + "/docs/b", + "/docs/c" + }; + + var (toPut, toDelete) = RedirectKvsDiff.ComputeBatchUpdates(sourcedRedirects, existingRedirects); + + toPut.Should().BeEmpty(); + toDelete.Select(d => d.Key).Should().BeEquivalentTo(existingRedirects); + } + + [Fact] + public void ComputeBatchUpdates_AzureIsvRegression_ScenarioEndToEnd() + { + // Simulates the post-#6716 state: redirects.yml no longer mentions the parent + // page, but the KVS still has the stale entry from the original push. + const string stalePath = "/docs/deploy-manage/deploy/elastic-cloud/azure-native-isv-service"; + + var sourcedRedirects = new Dictionary + { + // Many unrelated valid redirects exist in the same deploy. + ["/docs/some/other/legitimate-old"] = "/docs/some/other/legitimate-new", + ["/docs/another/page"] = "/docs/another/page-renamed" + }; + var existingRedirects = new HashSet + { + stalePath, + "/docs/some/other/legitimate-old", + "/docs/historical-entry-still-valid" + }; + + var (toPut, toDelete) = RedirectKvsDiff.ComputeBatchUpdates(sourcedRedirects, existingRedirects); + + toDelete.Select(d => d.Key).Should().Contain(stalePath, + because: "the stale Azure ISV redirect must be removed from the KVS once it is dropped from redirects.yml"); + toDelete.Select(d => d.Key).Should().Contain("/docs/historical-entry-still-valid", + because: "any KVS key absent from the sourced redirects is stale by definition"); + toDelete.Select(d => d.Key).Should().NotContain("/docs/another/page", + because: "brand-new sourced entries belong in the PUT batch, not the DELETE batch"); + + toPut.Select(p => p.Key).Should().Contain("/docs/another/page"); + toPut.Should().NotContain(p => p.Key == stalePath, + because: "we never PUT a value for a key that the sourced file dropped"); + + // Sanity: each toDelete key must come from the live KVS, never from the sourced set. + foreach (var del in toDelete) + { + existingRedirects.Should().Contain(del.Key); + sourcedRedirects.Keys.Should().NotContain(del.Key); + } + } + + [Fact] + public void WouldWipeAllExisting_EmptySourcedAndPopulatedExisting_ReturnsTrue() + { + var sourced = new Dictionary(); + var existing = new HashSet { "/docs/a", "/docs/b" }; + + RedirectKvsDiff.WouldWipeAllExisting(sourced, existing).Should().BeTrue(); + } + + [Fact] + public void WouldWipeAllExisting_SourcedHasEntries_ReturnsFalse() + { + var sourced = new Dictionary { ["/docs/a"] = "/docs/b" }; + var existing = new HashSet { "/docs/a" }; + + RedirectKvsDiff.WouldWipeAllExisting(sourced, existing).Should().BeFalse(); + } + + [Fact] + public void WouldWipeAllExisting_BothEmpty_ReturnsFalse() + { + // Empty in, empty out is not a wipe — it's a no-op on a clean KVS. + var sourced = new Dictionary(); + var existing = new HashSet(); + + RedirectKvsDiff.WouldWipeAllExisting(sourced, existing).Should().BeFalse(); + } +}