Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/namespace-id-pagination.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
core: Fixed a bug where pagination of namespaced objects (jobs, variables, volumes, service registrations, and others using the Namespace+ID cursor) could duplicate or skip results when one namespace name was a prefix of another that differed by a dash, such as "team" and "team-a"
```
86 changes: 86 additions & 0 deletions nomad/job_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5352,6 +5352,92 @@ func TestJobEndpoint_ListJobs_PaginationFiltering(t *testing.T) {
}
}

// TestJobEndpoint_ListJobs_NamespacePagination is a regression test for a
// pagination bug affecting namespaces that share a prefix but differ by a '-',
// for example "team" and "team-a". The NamespaceID pagination token is
// "<namespace>.<id>". Comparing those tokens as whole strings ordered
// "team-a.*" before "team.*" (because '.' (0x2E) > '-' (0x2D)), which disagrees
// with the state store's (Namespace, ID) iteration order, where "team" sorts
// before "team-a". That mismatch made jobs in "team" duplicate across pages
// while jobs in "team-a" became unreachable, and the cursor could repeat a page
// forever. Comparing the token field-by-field (namespace, then id) fixes it.
func TestJobEndpoint_ListJobs_NamespacePagination(t *testing.T) {
ci.Parallel(t)
s1, _, cleanupS1 := TestACLServer(t, nil)
defer cleanupS1()
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)

state := s1.fsm.State()
must.NoError(t, state.UpsertNamespaces(999, []*structs.Namespace{
{Name: "team"}, {Name: "team-a"},
}))

// Three jobs in each namespace. The state store iterates the (Namespace, ID)
// index in order, so all "team" jobs precede all "team-a" jobs.
type nsID struct{ namespace, id string }
want := []nsID{}
index := uint64(1000)
for _, ns := range []string{"team", "team-a"} {
for _, id := range []string{"j1", "j2", "j3"} {
job := mock.Job()
job.ID = id
job.Name = id
job.Namespace = ns
job.CreateIndex = index
must.NoError(t, state.UpsertJob(structs.MsgTypeTestSetup, index, nil, job))
want = append(want, nsID{ns, id})
index++
}
}

aclToken := mock.CreatePolicyAndToken(t, state, 1100, "test-valid-read",
mock.NamespacePolicy("*", "read", nil)).SecretID

// Walk every page with a small page size, following NextToken, collecting
// each (namespace, id). A correct walk visits all six jobs exactly once and
// terminates; the pre-fix bug duplicated "team" jobs, never returned
// "team-a" jobs, and repeated a token forever (caught by the page guard).
seen := map[nsID]int{}
total := 0
nextToken := ""
for page := 0; ; page++ {
if page > 10 {
t.Fatalf("pagination did not terminate after %d pages (stall); seen=%v", page, seen)
}

req := &structs.JobListRequest{
QueryOptions: structs.QueryOptions{
Region: "global",
Namespace: "*",
PerPage: 2,
NextToken: nextToken,
},
}
req.AuthToken = aclToken
var resp structs.JobListResponse
must.NoError(t, msgpackrpc.CallWithCodec(codec, "Job.List", req, &resp))

for _, job := range resp.Jobs {
seen[nsID{job.Namespace, job.ID}]++
total++
}

nextToken = resp.QueryMeta.NextToken
if nextToken == "" {
break
}
}

// Every job returned exactly once: none missing, none duplicated.
must.Eq(t, len(want), total,
must.Sprint("expected each job exactly once across all pages (no duplicates, no missing)"))
for _, w := range want {
must.Eq(t, 1, seen[w],
must.Sprintf("job %s/%s should appear exactly once, saw %d", w.namespace, w.id, seen[w]))
}
}

func TestJobEndpoint_Allocations(t *testing.T) {
ci.Parallel(t)

Expand Down
179 changes: 101 additions & 78 deletions nomad/state/paginator/tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ package paginator

import (
"cmp"
"fmt"
"strconv"
"strings"
)
Expand All @@ -16,62 +15,115 @@ import (
// seeking. Implementations should close over the token we're seeking.
type Tokenizer[T any] func(item T) (string, int)

// NamespaceIDTokenizer returns a tokenizer by Namespace and ID.
// tokenField is one component of a pagination token. Fields are compared in
// order to produce the token's total ordering, which must match the order the
// underlying memdb index iterates in. A numeric field is compared numerically
// (so 12 sorts before 102, not lexicographically); a string field is compared
// lexicographically.
type tokenField struct {
str string
num uint64
isNumeric bool
}

// stringField builds a lexicographically-compared token component.
func stringField(s string) tokenField { return tokenField{str: s} }

// numericField builds a numerically-compared token component.
func numericField(n uint64) tokenField { return tokenField{num: n, isNumeric: true} }

// tokenAndCompare serializes fields into a '.'-joined token and compares that
// token against target field-by-field, in order, returning the token and the
// comparison result (-1, 0, +1).
//
// Comparing field-by-field — rather than comparing the joined strings directly —
// is what makes the token's order match memdb's tuple order. memdb's compound
// indexes separate fields with a NUL byte, which sorts before every printable
// character, so the index orders by each field in turn. A direct string compare
// of the joined token uses the '.' separator instead, whose value relative to
// the field characters can invert the order (e.g. namespaces "team" and
// "team-a": memdb sorts "team" first, but "team.id" > "team-a.id" because
// '.' > '-'). Field-by-field comparison avoids that entirely.
//
// Only the final field may contain the '.' separator (e.g. an ID); every
// earlier field (namespace, an index) must not, so the target is split into at
// most len(fields) segments and the last segment absorbs any remaining '.'.
//
// A target with fewer segments than fields — a legacy or truncated token, e.g.
// a bare integer minted before a tiebreaker was added, seen during a rolling
// upgrade — compares only the segments it has and treats the rest as equal,
// degrading to the older, narrower ordering rather than erroring.
func tokenAndCompare(fields []tokenField, target string) (string, int) {
parts := make([]string, len(fields))
for i, f := range fields {
if f.isNumeric {
parts[i] = strconv.FormatUint(f.num, 10)
} else {
parts[i] = f.str
}
}
token := strings.Join(parts, ".")

segments := strings.SplitN(target, ".", len(fields))

for i, f := range fields {
// The target ran out of segments; everything so far matched, so treat
// the remaining fields as equal (legacy/truncated token).
if i >= len(segments) {
return token, 0
}

var c int
if f.isNumeric {
targetNum, err := strconv.ParseUint(segments[i], 10, 64)
if err != nil {
// A numeric field whose target segment isn't an integer is a
// malformed token; fall back to a direct string comparison.
return token, cmp.Compare(token, target)
}
c = cmp.Compare(f.num, targetNum)
} else {
c = cmp.Compare(f.str, segments[i])
}
if c != 0 {
return token, c
}
}

return token, 0
}

// NamespaceIDTokenizer returns a tokenizer by Namespace and ID. The token is
// `namespace.id`; comparison is field-by-field so it matches the memdb
// (Namespace, ID) iteration order (Namespace cannot contain '.', so it is
// always the first segment).
func NamespaceIDTokenizer[T namespaceIDGetter](target string) Tokenizer[T] {
return func(item T) (string, int) {
ns := item.GetNamespace()
id := item.GetID()

// Use a character that is not part of validNamespaceName as separator to
// avoid accidentally generating collisions.
// For example, namespace `a` and job `b-c` would collide with namespace
// `a-b` and job `c` into the same token `a-b-c`, since `-` is an allowed
// character in namespace.
token := fmt.Sprintf("%s.%s", ns, id)
return token, cmp.Compare(token, target)
return tokenAndCompare([]tokenField{
stringField(item.GetNamespace()),
stringField(item.GetID()),
}, target)
}
}

// IDTokenizer returns a tokenizer by ID.
func IDTokenizer[T idGetter](target string) Tokenizer[T] {
return func(item T) (string, int) {
id := item.GetID()
return id, cmp.Compare(id, target)
return tokenAndCompare([]tokenField{
stringField(item.GetID()),
}, target)
}
}

// CreateIndexAndIDTokenizer returns a tokenizer by CreateIndex and ID.
// CreateIndexAndIDTokenizer returns a tokenizer by CreateIndex and ID. The
// token is `createIndex.id`; the index is compared numerically and the ID
// lexicographically.
func CreateIndexAndIDTokenizer[T idAndCreateIndexGetter](target string) Tokenizer[T] {
return func(item T) (string, int) {
index := item.GetCreateIndex()
id := item.GetID()
token := fmt.Sprintf("%d.%s", index, id)

// Split the target to extract the create index and the ID values.
targetParts := strings.SplitN(target, ".", 2)
// If the target wasn't composed of both parts, directly compare.
if len(targetParts) < 2 {
return token, cmp.Compare(token, target)
}

// Convert the create index to an integer for comparison. This
// prevents a lexigraphical comparison of the create index which
// can cause unexpected results when comparing index values like
// '12' and '102'. If the index cannot be converted to an integer,
// fall back to direct comparison.
targetIndex, err := strconv.Atoi(targetParts[0])
if err != nil {
return token, cmp.Compare(token, target)
}

indexCmp := cmp.Compare(index, uint64(targetIndex))
if indexCmp != 0 {
return token, indexCmp
}

// If the index values are equivalent use the ID values
// as the comparison.
return token, cmp.Compare(id, targetParts[1])
return tokenAndCompare([]tokenField{
numericField(item.GetCreateIndex()),
stringField(item.GetID()),
}, target)
}
}

Expand All @@ -84,40 +136,11 @@ func CreateIndexAndIDTokenizer[T idAndCreateIndexGetter](target string) Tokenize
// primary key.
func ModifyIndexAndNamespaceIDTokenizer[T modifyIndexAndNamespaceIDGetter](target string) Tokenizer[T] {
return func(item T) (string, int) {
index := item.GetModifyIndex()
ns := item.GetNamespace()
id := item.GetID()
token := fmt.Sprintf("%d.%s.%s", index, ns, id)

// Namespace cannot contain '.', so the index and namespace are the first
// two segments; the ID (which may contain '.') is the remainder.
parts := strings.SplitN(target, ".", 3)

// Parse the index numerically so values like 12 and 102 compare
// correctly rather than lexicographically. A target that doesn't begin
// with an integer is malformed; fall back to a direct string compare.
targetIndex, err := strconv.ParseUint(parts[0], 10, 64)
if err != nil {
return token, cmp.Compare(token, target)
}
if c := cmp.Compare(index, targetIndex); c != 0 {
return token, c
}

// Indexes are equal; break the tie by namespace then ID. A target with
// fewer segments (e.g. a legacy bare-integer token from before this
// tiebreaker existed, seen during a rolling upgrade) compares only the
// segments it has, degrading to the previous index-only behavior.
if len(parts) < 2 {
return token, 0
}
if c := cmp.Compare(ns, parts[1]); c != 0 {
return token, c
}
if len(parts) < 3 {
return token, 0
}
return token, cmp.Compare(id, parts[2])
return tokenAndCompare([]tokenField{
numericField(item.GetModifyIndex()),
stringField(item.GetNamespace()),
stringField(item.GetID()),
}, target)
}
}

Expand Down
Loading