Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/28178.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
core: Fixed a bug where jobs sharing a ModifyIndex could break /v1/jobs/statuses pagination, causing the jobs index page to show a job on more than one page, fail to page past a group of such jobs, or jump to an incorrect last page
```
2 changes: 1 addition & 1 deletion nomad/job_endpoint_statuses.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func (j *Job) Statuses(
newJobs := set.New[structs.NamespacedID](0)
pager, err := paginator.NewPaginator(iter, args.QueryOptions,
selector,
paginator.ModifyIndexTokenizer[*structs.Job](args.NextToken),
paginator.ModifyIndexAndNamespaceIDTokenizer[*structs.Job](args.NextToken),
func(job *structs.Job) (structs.JobStatusesJob, error) {
var none structs.JobStatusesJob
// this is where the sausage is made
Expand Down
86 changes: 86 additions & 0 deletions nomad/job_endpoint_statuses_pagination_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright IBM Corp. 2015, 2026
// SPDX-License-Identifier: BUSL-1.1

package nomad

import (
"fmt"
"testing"

"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test/must"
)

// TestJob_Statuses_Pagination_SharedModifyIndex is a regression test for the
// /v1/jobs/statuses paginator. Its NextToken is built from ModifyIndex alone,
// with no tiebreaker. When several jobs share a ModifyIndex (e.g. written in a
// single Raft transaction) and that group does not fit in one page, the cursor
// cannot advance past the group: the same page repeats (duplicate jobs) and the
// walk never terminates.
//
// Paginating this endpoint must always (a) terminate and (b) return every job
// exactly once, regardless of jobs sharing a ModifyIndex.
func TestJob_Statuses_Pagination_SharedModifyIndex(t *testing.T) {
ci.Parallel(t)

s, cleanup := TestServer(t, func(c *Config) {
c.NumSchedulers = 0
})
t.Cleanup(cleanup)
testutil.WaitForLeader(t, s.RPC)

// Write several jobs at a single shared ModifyIndex, as happens when one
// Raft transaction writes multiple jobs at once.
const n = 5
const sharedIndex = 1000
want := make(map[string]bool, n)
for i := range n {
job := mock.MinJob()
job.ID = fmt.Sprintf("shared-%02d", i)
must.NoError(t, s.State().UpsertJob(structs.MsgTypeTestSetup, sharedIndex, nil, job))
want[job.ID] = true
}

// Page through the endpoint following NextToken, the way the web UI does.
// per_page is smaller than the shared group, so the group spans a page
// boundary -- exactly the case the ModifyIndex-only token cannot handle.
seen := make(map[string]int, n)
token := ""
pages := 0
// A terminating walk needs ceil(n/perPage) pages; a stalled cursor never
// stops, so anything well past that means the walk did not terminate.
maxPages := n * 3
for {
pages++
if pages > maxPages {
t.Fatalf("pagination did not terminate after %d pages: cursor stuck at token %q (seen=%v)",
pages, token, seen)
}

req := &structs.JobStatusesRequest{}
req.QueryOptions.Region = "global"
req.QueryOptions.Namespace = structs.DefaultNamespace
req.QueryOptions.PerPage = 2
req.QueryOptions.NextToken = token

var resp structs.JobStatusesResponse
must.NoError(t, s.RPC("Job.Statuses", req, &resp))

for _, j := range resp.Jobs {
seen[j.ID]++
}
if resp.NextToken == "" {
break
}
token = resp.NextToken
}

for id := range want {
must.Eq(t, 1, seen[id],
must.Sprintf("job %s returned %d time(s) across pages, want exactly 1", id, seen[id]))
}
must.Eq(t, n, len(seen), must.Sprintf("walked jobs = %v", seen))
}
21 changes: 13 additions & 8 deletions nomad/job_endpoint_statuses_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ package nomad

import (
"context"
"strconv"
"fmt"
"testing"
"time"

Expand Down Expand Up @@ -142,29 +142,34 @@ func TestJob_Statuses(t *testing.T) {

// test various single-job requests

// the pagination cursor is "<ModifyIndex>.<Namespace>.<ID>"
tok := func(j *structs.Job) string {
return fmt.Sprintf("%d.%s.%s", j.ModifyIndex, j.Namespace, j.ID)
}

for _, tc := range []struct {
name string
qo structs.QueryOptions
jobs []structs.NamespacedID
expect *structs.Job
expectNext uint64 // NextToken (ModifyIndex)
expectNext *structs.Job // job the NextToken cursor points at
}{
{
name: "page 1",
qo: structs.QueryOptions{
PerPage: 1,
},
expect: jobs[4],
expectNext: jobs[3].ModifyIndex,
expectNext: jobs[3],
},
{
name: "page 2",
qo: structs.QueryOptions{
PerPage: 1,
NextToken: strconv.FormatUint(jobs[3].ModifyIndex, 10),
NextToken: tok(jobs[3]),
},
expect: jobs[3],
expectNext: jobs[2].ModifyIndex,
expectNext: jobs[2],
},
{
name: "reverse",
Expand All @@ -173,7 +178,7 @@ func TestJob_Statuses(t *testing.T) {
Reverse: true,
},
expect: jobs[0],
expectNext: jobs[1].ModifyIndex,
expectNext: jobs[1],
},
{
name: "filter",
Expand Down Expand Up @@ -212,8 +217,8 @@ func TestJob_Statuses(t *testing.T) {
must.Eq(t, tc.expect.ID, resp.Jobs[0].ID)
}
expectToken := ""
if tc.expectNext > 0 {
expectToken = strconv.FormatUint(tc.expectNext, 10)
if tc.expectNext != nil {
expectToken = tok(tc.expectNext)
}
must.Eq(t, expectToken, resp.NextToken)
})
Expand Down
12 changes: 7 additions & 5 deletions nomad/state/paginator/paginator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,20 +60,22 @@ func TestPaginator(t *testing.T) {
{
name: "when numbers are numbers",
perPage: 2,
// "10" is converted to uint64(10) and compared with uint64 index
// a bare "10" target exercises the legacy index-only path: "10" is
// parsed as uint64(10) and compared numerically with the index.
nextToken: "10",
tokenizer: ModifyIndexTokenizer[*mockObject]("10"),
tokenizer: ModifyIndexAndNamespaceIDTokenizer[*mockObject]("10"),
expected: []string{"10", "11"},
expectedNextToken: "",
},
{
name: "when zero is a number",
perPage: 2,
// "" is converted to uint64(0) and compared with uint64 index
// an empty token starts from the beginning; the next token is the
// full "<index>.<namespace>.<id>" cursor (namespace is empty here).
nextToken: "",
tokenizer: ModifyIndexTokenizer[*mockObject](""),
tokenizer: ModifyIndexAndNamespaceIDTokenizer[*mockObject](""),
expected: []string{"0", "1"},
expectedNextToken: "2",
expectedNextToken: "2..2",
},
{
name: "starting off the end",
Expand Down
57 changes: 48 additions & 9 deletions nomad/state/paginator/tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,17 +75,49 @@ func CreateIndexAndIDTokenizer[T idAndCreateIndexGetter](target string) Tokenize
}
}

// ModifyIndexTokenizer returns a tokenizer by ModifyIndex.
func ModifyIndexTokenizer[T modifyIndexGetter](target string) Tokenizer[T] {
// attempt to convert token to uint for iterators ordered numerically.
// it's safe to ignore the error here because the `next` method ignores
// this field for string tokens and 0 is valid for an unset numeric token.
targetIndex, _ := strconv.ParseUint(target, 10, 64)

// ModifyIndexAndNamespaceIDTokenizer returns a tokenizer by ModifyIndex, with
// Namespace and ID as a tiebreaker. ModifyIndex is not unique across objects
// (several may be written in one Raft transaction), so ModifyIndex alone does
// not identify a unique position to resume pagination from. Namespace and ID
// make the token a total order that matches the memdb iteration order of the
// non-unique modify_index index, which breaks ties on the (Namespace, ID)
// primary key.
func ModifyIndexAndNamespaceIDTokenizer[T modifyIndexAndNamespaceIDGetter](target string) Tokenizer[T] {
return func(item T) (string, int) {
index := item.GetModifyIndex()
token := fmt.Sprintf("%d", index)
return token, cmp.Compare(index, targetIndex)
ns := item.GetNamespace()
id := item.GetID()
token := fmt.Sprintf("%d.%s.%s", index, ns, id)

// Namespace cannot contain '.', so the index and namespace are the first
// two segments; the ID (which may contain '.') is the remainder.
parts := strings.SplitN(target, ".", 3)

// Parse the index numerically so values like 12 and 102 compare
// correctly rather than lexicographically. A target that doesn't begin
// with an integer is malformed; fall back to a direct string compare.
targetIndex, err := strconv.ParseUint(parts[0], 10, 64)
if err != nil {
return token, cmp.Compare(token, target)
}
if c := cmp.Compare(index, targetIndex); c != 0 {
return token, c
}

// Indexes are equal; break the tie by namespace then ID. A target with
// fewer segments (e.g. a legacy bare-integer token from before this
// tiebreaker existed, seen during a rolling upgrade) compares only the
// segments it has, degrading to the previous index-only behavior.
if len(parts) < 2 {
return token, 0
}
if c := cmp.Compare(ns, parts[1]); c != 0 {
return token, c
}
if len(parts) < 3 {
return token, 0
}
return token, cmp.Compare(id, parts[2])
}
}

Expand Down Expand Up @@ -120,3 +152,10 @@ type idAndCreateIndexGetter interface {
type modifyIndexGetter interface {
GetModifyIndex() uint64
}

// modifyIndexAndNamespaceIDGetter must be implemented by structs that want to
// use ModifyIndex with a Namespace and ID tiebreaker as their pagination token.
type modifyIndexAndNamespaceIDGetter interface {
modifyIndexGetter
namespaceIDGetter
}
Loading
Loading