diff --git a/.changelog/namespace-id-pagination.txt b/.changelog/namespace-id-pagination.txt new file mode 100644 index 00000000000..b8173f61204 --- /dev/null +++ b/.changelog/namespace-id-pagination.txt @@ -0,0 +1,3 @@ +```release-note:bug +core: Fixed a bug where pagination of namespaced objects (jobs, variables, volumes, service registrations, and others using the Namespace+ID cursor) could duplicate or skip results when one namespace name was a prefix of another that differed by a dash, such as "team" and "team-a" +``` diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go index aeafcbd8c5f..02ad1f1c93c 100644 --- a/nomad/job_endpoint_test.go +++ b/nomad/job_endpoint_test.go @@ -5352,6 +5352,92 @@ func TestJobEndpoint_ListJobs_PaginationFiltering(t *testing.T) { } } +// TestJobEndpoint_ListJobs_NamespacePagination is a regression test for a +// pagination bug affecting namespaces that share a prefix but differ by a '-', +// for example "team" and "team-a". The NamespaceID pagination token is +// ".". Comparing those tokens as whole strings ordered +// "team-a.*" before "team.*" (because '.' (0x2E) > '-' (0x2D)), which disagrees +// with the state store's (Namespace, ID) iteration order, where "team" sorts +// before "team-a". That mismatch made jobs in "team" duplicate across pages +// while jobs in "team-a" became unreachable, and the cursor could repeat a page +// forever. Comparing the token field-by-field (namespace, then id) fixes it. +func TestJobEndpoint_ListJobs_NamespacePagination(t *testing.T) { + ci.Parallel(t) + s1, _, cleanupS1 := TestACLServer(t, nil) + defer cleanupS1() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + state := s1.fsm.State() + must.NoError(t, state.UpsertNamespaces(999, []*structs.Namespace{ + {Name: "team"}, {Name: "team-a"}, + })) + + // Three jobs in each namespace. The state store iterates the (Namespace, ID) + // index in order, so all "team" jobs precede all "team-a" jobs. + type nsID struct{ namespace, id string } + want := []nsID{} + index := uint64(1000) + for _, ns := range []string{"team", "team-a"} { + for _, id := range []string{"j1", "j2", "j3"} { + job := mock.Job() + job.ID = id + job.Name = id + job.Namespace = ns + job.CreateIndex = index + must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, index, nil, job)) + want = append(want, nsID{ns, id}) + index++ + } + } + + aclToken := mock.CreatePolicyAndToken(t, state, 1100, "test-valid-read", + mock.NamespacePolicy("*", "read", nil)).SecretID + + // Walk every page with a small page size, following NextToken, collecting + // each (namespace, id). A correct walk visits all six jobs exactly once and + // terminates; the pre-fix bug duplicated "team" jobs, never returned + // "team-a" jobs, and repeated a token forever (caught by the page guard). + seen := map[nsID]int{} + total := 0 + nextToken := "" + for page := 0; ; page++ { + if page > 10 { + t.Fatalf("pagination did not terminate after %d pages (stall); seen=%v", page, seen) + } + + req := &structs.JobListRequest{ + QueryOptions: structs.QueryOptions{ + Region: "global", + Namespace: "*", + PerPage: 2, + NextToken: nextToken, + }, + } + req.AuthToken = aclToken + var resp structs.JobListResponse + must.NoError(t, msgpackrpc.CallWithCodec(codec, "Job.List", req, &resp)) + + for _, job := range resp.Jobs { + seen[nsID{job.Namespace, job.ID}]++ + total++ + } + + nextToken = resp.QueryMeta.NextToken + if nextToken == "" { + break + } + } + + // Every job returned exactly once: none missing, none duplicated. + must.Eq(t, len(want), total, + must.Sprint("expected each job exactly once across all pages (no duplicates, no missing)")) + for _, w := range want { + must.Eq(t, 1, seen[w], + must.Sprintf("job %s/%s should appear exactly once, saw %d", w.namespace, w.id, seen[w])) + } +} + func TestJobEndpoint_Allocations(t *testing.T) { ci.Parallel(t) diff --git a/nomad/state/paginator/tokenizer.go b/nomad/state/paginator/tokenizer.go index 4aa3440aeb4..584c08fa2c8 100644 --- a/nomad/state/paginator/tokenizer.go +++ b/nomad/state/paginator/tokenizer.go @@ -5,7 +5,6 @@ package paginator import ( "cmp" - "fmt" "strconv" "strings" ) @@ -16,62 +15,115 @@ import ( // seeking. Implementations should close over the token we're seeking. type Tokenizer[T any] func(item T) (string, int) -// NamespaceIDTokenizer returns a tokenizer by Namespace and ID. +// tokenField is one component of a pagination token. Fields are compared in +// order to produce the token's total ordering, which must match the order the +// underlying memdb index iterates in. A numeric field is compared numerically +// (so 12 sorts before 102, not lexicographically); a string field is compared +// lexicographically. +type tokenField struct { + str string + num uint64 + isNumeric bool +} + +// stringField builds a lexicographically-compared token component. +func stringField(s string) tokenField { return tokenField{str: s} } + +// numericField builds a numerically-compared token component. +func numericField(n uint64) tokenField { return tokenField{num: n, isNumeric: true} } + +// tokenAndCompare serializes fields into a '.'-joined token and compares that +// token against target field-by-field, in order, returning the token and the +// comparison result (-1, 0, +1). +// +// Comparing field-by-field — rather than comparing the joined strings directly — +// is what makes the token's order match memdb's tuple order. memdb's compound +// indexes separate fields with a NUL byte, which sorts before every printable +// character, so the index orders by each field in turn. A direct string compare +// of the joined token uses the '.' separator instead, whose value relative to +// the field characters can invert the order (e.g. namespaces "team" and +// "team-a": memdb sorts "team" first, but "team.id" > "team-a.id" because +// '.' > '-'). Field-by-field comparison avoids that entirely. +// +// Only the final field may contain the '.' separator (e.g. an ID); every +// earlier field (namespace, an index) must not, so the target is split into at +// most len(fields) segments and the last segment absorbs any remaining '.'. +// +// A target with fewer segments than fields — a legacy or truncated token, e.g. +// a bare integer minted before a tiebreaker was added, seen during a rolling +// upgrade — compares only the segments it has and treats the rest as equal, +// degrading to the older, narrower ordering rather than erroring. +func tokenAndCompare(fields []tokenField, target string) (string, int) { + parts := make([]string, len(fields)) + for i, f := range fields { + if f.isNumeric { + parts[i] = strconv.FormatUint(f.num, 10) + } else { + parts[i] = f.str + } + } + token := strings.Join(parts, ".") + + segments := strings.SplitN(target, ".", len(fields)) + + for i, f := range fields { + // The target ran out of segments; everything so far matched, so treat + // the remaining fields as equal (legacy/truncated token). + if i >= len(segments) { + return token, 0 + } + + var c int + if f.isNumeric { + targetNum, err := strconv.ParseUint(segments[i], 10, 64) + if err != nil { + // A numeric field whose target segment isn't an integer is a + // malformed token; fall back to a direct string comparison. + return token, cmp.Compare(token, target) + } + c = cmp.Compare(f.num, targetNum) + } else { + c = cmp.Compare(f.str, segments[i]) + } + if c != 0 { + return token, c + } + } + + return token, 0 +} + +// NamespaceIDTokenizer returns a tokenizer by Namespace and ID. The token is +// `namespace.id`; comparison is field-by-field so it matches the memdb +// (Namespace, ID) iteration order (Namespace cannot contain '.', so it is +// always the first segment). func NamespaceIDTokenizer[T namespaceIDGetter](target string) Tokenizer[T] { return func(item T) (string, int) { - ns := item.GetNamespace() - id := item.GetID() - - // Use a character that is not part of validNamespaceName as separator to - // avoid accidentally generating collisions. - // For example, namespace `a` and job `b-c` would collide with namespace - // `a-b` and job `c` into the same token `a-b-c`, since `-` is an allowed - // character in namespace. - token := fmt.Sprintf("%s.%s", ns, id) - return token, cmp.Compare(token, target) + return tokenAndCompare([]tokenField{ + stringField(item.GetNamespace()), + stringField(item.GetID()), + }, target) } } // IDTokenizer returns a tokenizer by ID. func IDTokenizer[T idGetter](target string) Tokenizer[T] { return func(item T) (string, int) { - id := item.GetID() - return id, cmp.Compare(id, target) + return tokenAndCompare([]tokenField{ + stringField(item.GetID()), + }, target) } } -// CreateIndexAndIDTokenizer returns a tokenizer by CreateIndex and ID. +// CreateIndexAndIDTokenizer returns a tokenizer by CreateIndex and ID. The +// token is `createIndex.id`; the index is compared numerically and the ID +// lexicographically. func CreateIndexAndIDTokenizer[T idAndCreateIndexGetter](target string) Tokenizer[T] { return func(item T) (string, int) { - index := item.GetCreateIndex() - id := item.GetID() - token := fmt.Sprintf("%d.%s", index, id) - - // Split the target to extract the create index and the ID values. - targetParts := strings.SplitN(target, ".", 2) - // If the target wasn't composed of both parts, directly compare. - if len(targetParts) < 2 { - return token, cmp.Compare(token, target) - } - - // Convert the create index to an integer for comparison. This - // prevents a lexigraphical comparison of the create index which - // can cause unexpected results when comparing index values like - // '12' and '102'. If the index cannot be converted to an integer, - // fall back to direct comparison. - targetIndex, err := strconv.Atoi(targetParts[0]) - if err != nil { - return token, cmp.Compare(token, target) - } - - indexCmp := cmp.Compare(index, uint64(targetIndex)) - if indexCmp != 0 { - return token, indexCmp - } - - // If the index values are equivalent use the ID values - // as the comparison. - return token, cmp.Compare(id, targetParts[1]) + return tokenAndCompare([]tokenField{ + numericField(item.GetCreateIndex()), + stringField(item.GetID()), + }, target) } } @@ -84,40 +136,11 @@ func CreateIndexAndIDTokenizer[T idAndCreateIndexGetter](target string) Tokenize // primary key. func ModifyIndexAndNamespaceIDTokenizer[T modifyIndexAndNamespaceIDGetter](target string) Tokenizer[T] { return func(item T) (string, int) { - index := item.GetModifyIndex() - ns := item.GetNamespace() - id := item.GetID() - token := fmt.Sprintf("%d.%s.%s", index, ns, id) - - // Namespace cannot contain '.', so the index and namespace are the first - // two segments; the ID (which may contain '.') is the remainder. - parts := strings.SplitN(target, ".", 3) - - // Parse the index numerically so values like 12 and 102 compare - // correctly rather than lexicographically. A target that doesn't begin - // with an integer is malformed; fall back to a direct string compare. - targetIndex, err := strconv.ParseUint(parts[0], 10, 64) - if err != nil { - return token, cmp.Compare(token, target) - } - if c := cmp.Compare(index, targetIndex); c != 0 { - return token, c - } - - // Indexes are equal; break the tie by namespace then ID. A target with - // fewer segments (e.g. a legacy bare-integer token from before this - // tiebreaker existed, seen during a rolling upgrade) compares only the - // segments it has, degrading to the previous index-only behavior. - if len(parts) < 2 { - return token, 0 - } - if c := cmp.Compare(ns, parts[1]); c != 0 { - return token, c - } - if len(parts) < 3 { - return token, 0 - } - return token, cmp.Compare(id, parts[2]) + return tokenAndCompare([]tokenField{ + numericField(item.GetModifyIndex()), + stringField(item.GetNamespace()), + stringField(item.GetID()), + }, target) } } diff --git a/nomad/state/paginator/tokenizer_test.go b/nomad/state/paginator/tokenizer_test.go index 9fbb217f558..373a6f25a11 100644 --- a/nomad/state/paginator/tokenizer_test.go +++ b/nomad/state/paginator/tokenizer_test.go @@ -249,6 +249,111 @@ func TestModifyIndexAndNamespaceIDTokenizer(t *testing.T) { } } +func TestNamespaceIDTokenizer(t *testing.T) { + ci.Parallel(t) + + cases := []struct { + name string + obj *mockNamespaceIDObject + target string + expectedToken string + expectedCmp int + }{ + { + name: "less namespace", + obj: newMockNamespaceIDObject("aaa", "x"), + target: "bbb.x", + expectedToken: "aaa.x", + expectedCmp: -1, + }, + { + name: "greater namespace", + obj: newMockNamespaceIDObject("bbb", "x"), + target: "aaa.x", + expectedToken: "bbb.x", + expectedCmp: 1, + }, + { + name: "equal namespace, less id", + obj: newMockNamespaceIDObject("team", "aaa"), + target: "team.bbb", + expectedToken: "team.aaa", + expectedCmp: -1, + }, + { + name: "equal namespace, greater id", + obj: newMockNamespaceIDObject("team", "ccc"), + target: "team.bbb", + expectedToken: "team.ccc", + expectedCmp: 1, + }, + { + name: "equal namespace and id", + obj: newMockNamespaceIDObject("team", "aaa"), + target: "team.aaa", + expectedToken: "team.aaa", + expectedCmp: 0, + }, + { + // regression: namespaces "team" and "team-a" must order + // field-by-field so "team" sorts before "team-a" (matching memdb's + // NUL-separated (Namespace, ID) key order). A whole-string compare + // of the joined token would reverse them, because '.' > '-', which + // is the bug this fixes (a "team-a" namespace could shadow "team" + // and break pagination). + name: "dash namespace ordering (less)", + obj: newMockNamespaceIDObject("team", "z"), + target: "team-a.a", + expectedToken: "team.z", + expectedCmp: -1, + }, + { + name: "dash namespace ordering (greater)", + obj: newMockNamespaceIDObject("team-a", "a"), + target: "team.z", + expectedToken: "team-a.a", + expectedCmp: 1, + }, + { + // id may contain '.'; it's the remainder after the first split. + name: "id with dots", + obj: newMockNamespaceIDObject("default", "a.b.c"), + target: "default.a.b.c", + expectedToken: "default.a.b.c", + expectedCmp: 0, + }, + { + // truncated single-segment target: compare namespace only. + name: "namespace-only target (equal)", + obj: newMockNamespaceIDObject("team", "aaa"), + target: "team", + expectedToken: "team.aaa", + expectedCmp: 0, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + fn := NamespaceIDTokenizer[*mockNamespaceIDObject](tc.target) + actualToken, actualCmp := fn(tc.obj) + must.Eq(t, tc.expectedToken, actualToken) + must.Eq(t, tc.expectedCmp, actualCmp) + }) + } +} + +func newMockNamespaceIDObject(namespace, id string) *mockNamespaceIDObject { + return &mockNamespaceIDObject{namespace: namespace, id: id} +} + +type mockNamespaceIDObject struct { + namespace string + id string +} + +func (m *mockNamespaceIDObject) GetNamespace() string { return m.namespace } +func (m *mockNamespaceIDObject) GetID() string { return m.id } + func newMockModifyIndexObject(modifyIndex uint64, namespace, id string) *mockModifyIndexObject { return &mockModifyIndexObject{ modifyIndex: modifyIndex,