testmind-hq · yuchou87 · May 7, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/.gitignore b/.gitignore
@@ -41,3 +41,6 @@ docs/design/
 
 # Build output
 bin/
+
+# Generated test case output (caseforge gen default output dir)
+cases/
diff --git a/cmd/gen.go b/cmd/gen.go
@@ -50,9 +50,10 @@ var (
 	genExcludePath string
 	genIncludeTag  string
 	genExcludeTag  string
-	genAuthBootstrap bool
-	genWithOracles   bool
-	genForce         bool
+	genAuthBootstrap    bool
+	genWithOracles      bool
+	genForce            bool
+	genAnnotationBatch  int
 )
 
 // allTechniqueNames is the canonical list used for --technique completion.
@@ -108,6 +109,7 @@ func init() {
 	genCmd.Flags().BoolVar(&genAuthBootstrap, "auth-bootstrap", false, "Wrap all secured-endpoint cases with an auth setup step")
 	genCmd.Flags().BoolVar(&genWithOracles, "with-oracles", false, "Mine response body constraints via LLM and inject as assertions (requires LLM)")
 	genCmd.Flags().BoolVar(&genForce, "force", false, "Regenerate even when spec hash matches existing output")
+	genCmd.Flags().IntVar(&genAnnotationBatch, "annotation-batch", 0, "Number of operations to annotate per LLM call (0 = one call per operation, recommended: 8–20)")
 	_ = genCmd.MarkFlagRequired("spec")
 
 	// Dynamic completion: --operations reads the spec and suggests operationIds.
@@ -401,6 +403,9 @@ func runGen(cmd *cobra.Command, args []string) error {
 	if genMaxCasesPerOp > 0 {
 		engine.SetMaxCasesPerOp(genMaxCasesPerOp)
 	}
+	if genAnnotationBatch > 0 {
+		engine.SetAnnotationBatch(genAnnotationBatch)
+	}
 	newCases, err := engine.Generate(parsedSpec)
 	if err != nil {
 		return fmt.Errorf("generating test cases: %w", err)

diff --git a/cmd/gen_e2e_test.go b/cmd/gen_e2e_test.go
@@ -37,6 +37,7 @@ func resetGenGlobals(t *testing.T) func() {
 		genConcurrency = 1
 		genResume = false
 		genForce = false
+		genAnnotationBatch = 0
 		genTupleLevel = 2
 		genSeed = 0
 	}

diff --git a/docs/acceptance/acceptance-tests.md b/docs/acceptance/acceptance-tests.md
@@ -146,6 +146,7 @@
 | AT-249 | Hurl output contains case_name field | `caseforge gen --no-ai --format hurl --spec petstore.yaml --output /tmp/at249` | Every `.hurl` file has a `# case_name=` header line | ✅ PASS |
 | AT-250 | gen skips regeneration on unchanged spec | Run `gen` twice on the same spec | Second run prints "unchanged" and exits without regenerating | ✅ PASS |
 | AT-251 | gen --force regenerates despite matching hash | Run `gen` then `gen --force` on the same spec | `--force` run prints "Generated" (bypasses dedup) | ✅ PASS |
+| AT-252 | gen --annotation-batch flag is registered and runs without error | `caseforge gen --help` + `caseforge gen --no-ai --annotation-batch 5 --spec petstore.yaml --output /tmp/at252` | Help text contains `annotation-batch`; gen completes successfully with flag set | ✅ PASS |
 
 ---
 

diff --git a/internal/methodology/engine.go b/internal/methodology/engine.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"strings"
 	"sync"
 	"time"
 
@@ -37,14 +38,15 @@ type Seedable interface {
 }
 
 type Engine struct {
-	techniques     []Technique
-	specTechniques []SpecTechnique
-	llm            llm.LLMProvider
-	sink           event.Sink
-	warnWriter     io.Writer // destination for warn: lines; defaults to os.Stderr
-	concurrency    int       // 0 or 1 = serial; >1 = parallel worker pool
-	seed           int64     // 0 = random
-	maxCasesPerOp  int       // 0 = unlimited
+	techniques      []Technique
+	specTechniques  []SpecTechnique
+	llm             llm.LLMProvider
+	sink            event.Sink
+	warnWriter      io.Writer // destination for warn: lines; defaults to os.Stderr
+	concurrency     int       // 0 or 1 = serial; >1 = parallel worker pool
+	seed            int64     // 0 = random
+	maxCasesPerOp   int       // 0 = unlimited
+	annotationBatch int       // 0 = sequential (one call per op); >0 = batch size
 }
 
 func NewEngine(provider llm.LLMProvider, techniques ...Technique) *Engine {
@@ -83,6 +85,14 @@ func (e *Engine) SetMaxCasesPerOp(n int) {
 	e.maxCasesPerOp = n
 }
 
+// SetAnnotationBatch sets the number of operations to annotate per LLM call.
+// 0 (default) uses sequential mode: one call per operation.
+// Values > 0 batch that many operations into a single call, reducing round-trips
+// at the cost of larger prompts. Recommended range: 5–20.
+func (e *Engine) SetAnnotationBatch(n int) {
+	e.annotationBatch = n
+}
+
 // SetSink registers an event sink for progress events.
 func (e *Engine) SetSink(s event.Sink) {
 	e.sink = s
@@ -233,6 +243,11 @@ func (e *Engine) annotateOperations(ops []*spec.Operation) {
 	if !e.llm.IsAvailable() {
 		return // NoopProvider: skip annotation, SemanticInfo stays nil
 	}
+	if e.annotationBatch >= 1 {
+		e.annotateOperationsBatch(ops, e.annotationBatch)
+		return
+	}
+	// Sequential mode: one LLM call per operation.
 	for i, op := range ops {
 		if i > 0 {
 			time.Sleep(500 * time.Millisecond) // throttle to reduce rate-limit pressure
@@ -251,6 +266,108 @@ func (e *Engine) annotateOperations(ops []*spec.Operation) {
 	}
 }
 
+// annotateOperationsBatch sends ops in groups of batchSize to the LLM, each
+// group in a single call. Responses are matched back to ops by operation_id.
+// Failures are per-batch: if a batch call fails, those ops get no annotation
+// and generation continues unaffected (annotation is best-effort).
+func (e *Engine) annotateOperationsBatch(ops []*spec.Operation, batchSize int) {
+	for start := 0; start < len(ops); start += batchSize {
+		if start > 0 {
+			time.Sleep(200 * time.Millisecond) // light throttle between batches
+		}
+		end := start + batchSize
+		if end > len(ops) {
+			end = len(ops)
+		}
+		batch := ops[start:end]
+
+		annotations, err := e.annotateBatch(batch)
+		for _, op := range batch {
+			if err != nil {
+				e.warn("warn: batch annotation failed for %s %s: %v\n", op.Method, op.Path, err)
+			} else if a, ok := annotations[op.OperationID]; ok {
+				op.SemanticInfo = a
+			}
+			e.emit(event.Event{Type: event.EventOperationAnnotating, Payload: event.OperationDonePayload{
+				OperationID: op.OperationID,
+				Method:      op.Method,
+				Path:        op.Path,
+			}})
+		}
+	}
+}
+
+// annotateBatch calls the LLM once for a slice of operations, returning a map
+// of operationId → SemanticAnnotation. Unrecognised or unparseable entries are
+// silently omitted so callers can fall through to the no-annotation path.
+func (e *Engine) annotateBatch(ops []*spec.Operation) (map[string]*spec.SemanticAnnotation, error) {
+	// Build prompt listing all operations.
+	var sb strings.Builder
+	sb.WriteString("Analyze these API operations. Return a JSON array — one object per operation, in any order.\n")
+	sb.WriteString("Each object must include \"operation_id\" plus these fields: resource_type, action_type, has_state_machine, state_field, unique_fields, implicit_rules.\n\n")
+	for _, op := range ops {
+		id := op.OperationID
+		if id == "" {
+			id = op.Method + "_" + op.Path
+		}
+		desc := op.Summary
+		if op.Description != "" {
+			desc = op.Summary + " — " + op.Description
+		}
+		fmt.Fprintf(&sb, "- operation_id: %q  %s %s  summary: %s\n", id, op.Method, op.Path, desc)
+	}
+	sb.WriteString("\nReturn ONLY the JSON array, no other text.")
+
+	ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second)
+	defer cancel()
+
+	req := &llm.CompletionRequest{
+		System:    "You are an API testing expert. Analyze operations and return structured JSON.",
+		Messages:  []llm.Message{{Role: "user", Content: sb.String()}},
+		MaxTokens: min(256*len(ops), 8192), // cap at 8192 — smallest common provider output limit
+	}
+	resp, err := llm.Retry(ctx, 5, func() (*llm.CompletionResponse, error) {
+		return e.llm.Complete(ctx, req)
+	})
+	if err != nil {
+		return nil, err
+	}
+	return parseBatchAnnotations(resp.Text), nil
+}
+
+// parseBatchAnnotations extracts a JSON array of per-operation annotations from
+// the LLM response and returns a map keyed by operation_id.
+func parseBatchAnnotations(text string) map[string]*spec.SemanticAnnotation {
+	extracted := llm.ExtractJSON(text)
+	var items []struct {
+		OperationID     string   `json:"operation_id"`
+		ResourceType    string   `json:"resource_type"`
+		ActionType      string   `json:"action_type"`
+		HasStateMachine bool     `json:"has_state_machine"`
+		StateField      string   `json:"state_field"`
+		UniqueFields    []string `json:"unique_fields"`
+		ImplicitRules   []string `json:"implicit_rules"`
+	}
+	if err := json.Unmarshal([]byte(extracted), &items); err != nil {
+		return nil
+	}
+	out := make(map[string]*spec.SemanticAnnotation, len(items))
+	for _, item := range items {
+		if item.OperationID == "" {
+			continue
+		}
+		out[item.OperationID] = &spec.SemanticAnnotation{
+			ResourceType:    item.ResourceType,
+			ActionType:      item.ActionType,
+			HasStateMachine: item.HasStateMachine,
+			StateField:      item.StateField,
+			UniqueFields:    item.UniqueFields,
+			ImplicitRules:   item.ImplicitRules,
+		}
+	}
+	return out
+}
+
 func (e *Engine) annotateOperation(op *spec.Operation) (*spec.SemanticAnnotation, error) {
 	prompt := fmt.Sprintf(
 		"Analyze this API operation and return JSON:\n"+

diff --git a/internal/methodology/engine_test.go b/internal/methodology/engine_test.go
@@ -3,6 +3,7 @@ package methodology
 
 import (
 	"context"
+	"fmt"
 	"sync"
 	"testing"
 
@@ -346,3 +347,156 @@ func TestEngine_MaxCasesPerOp_TruncatesByPriority(t *testing.T) {
 	assert.LessOrEqual(t, len(cases), 2,
 		"engine must not produce more than maxCasesPerOp cases for a single operation")
 }
+
+// batchLLMProvider captures LLM calls and returns a canned batch JSON response.
+type batchLLMProvider struct {
+	calls   int
+	muCalls sync.Mutex
+	// responseFor returns the response text for each call (indexed by call number).
+	responseFor func(req string) string
+}
+
+func (b *batchLLMProvider) IsAvailable() bool { return true }
+func (b *batchLLMProvider) Name() string       { return "batch-stub" }
+func (b *batchLLMProvider) Complete(_ context.Context, req *llm.CompletionRequest) (*llm.CompletionResponse, error) {
+	b.muCalls.Lock()
+	b.calls++
+	b.muCalls.Unlock()
+	text := b.responseFor(req.Messages[0].Content)
+	return &llm.CompletionResponse{Text: text}, nil
+}
+
+func TestEngine_BatchAnnotation_EmitsOneEventPerOp(t *testing.T) {
+	var got []event.EventType
+	mu := sync.Mutex{}
+	sink := event.SinkFunc(func(e event.Event) {
+		mu.Lock()
+		got = append(got, e.Type)
+		mu.Unlock()
+	})
+
+	stub := &batchLLMProvider{responseFor: func(_ string) string {
+		return `[
+			{"operation_id":"op1","resource_type":"pet","action_type":"list"},
+			{"operation_id":"op2","resource_type":"pet","action_type":"create"}
+		]`
+	}}
+	engine := NewEngine(stub, NewEquivalenceTechnique())
+	engine.SetAnnotationBatch(10) // both ops fit in one batch
+	engine.SetSink(sink)
+
+	ps := &spec.ParsedSpec{Operations: []*spec.Operation{
+		{OperationID: "op1", Method: "GET", Path: "/pets", Responses: map[string]*spec.Response{"200": {}}},
+		{OperationID: "op2", Method: "POST", Path: "/pets", Responses: map[string]*spec.Response{"201": {}}},
+	}}
+	_, err := engine.Generate(ps)
+	require.NoError(t, err)
+
+	var annotatingCount int
+	for _, typ := range got {
+		if typ == event.EventOperationAnnotating {
+			annotatingCount++
+		}
+	}
+	assert.Equal(t, 2, annotatingCount, "batch mode must still emit one EventOperationAnnotating per operation")
+}
+
+func TestEngine_BatchAnnotation_AnnotatesOpsCorrectly(t *testing.T) {
+	stub := &batchLLMProvider{responseFor: func(_ string) string {
+		return `[
+			{"operation_id":"createPet","resource_type":"pet","action_type":"create","unique_fields":["name"]},
+			{"operation_id":"listPets","resource_type":"pet","action_type":"list"}
+		]`
+	}}
+	engine := NewEngine(stub)
+	engine.SetAnnotationBatch(10)
+
+	ops := []*spec.Operation{
+		{OperationID: "listPets", Method: "GET", Path: "/pets", Responses: map[string]*spec.Response{"200": {}}},
+		{OperationID: "createPet", Method: "POST", Path: "/pets", Responses: map[string]*spec.Response{"201": {}}},
+	}
+	ps := &spec.ParsedSpec{Operations: ops}
+	_, err := engine.Generate(ps)
+	require.NoError(t, err)
+
+	var listOp, createOp *spec.Operation
+	for _, op := range ops {
+		if op.OperationID == "listPets" {
+			listOp = op
+		} else if op.OperationID == "createPet" {
+			createOp = op
+		}
+	}
+	require.NotNil(t, listOp.SemanticInfo, "listPets should have annotation")
+	assert.Equal(t, "list", listOp.SemanticInfo.ActionType)
+
+	require.NotNil(t, createOp.SemanticInfo, "createPet should have annotation")
+	assert.Equal(t, "create", createOp.SemanticInfo.ActionType)
+	assert.Equal(t, []string{"name"}, createOp.SemanticInfo.UniqueFields)
+}
+
+func TestEngine_BatchAnnotation_BatchFailureIsGraceful(t *testing.T) {
+	// LLM returns invalid JSON — ops should get no annotation but gen still succeeds.
+	stub := &batchLLMProvider{responseFor: func(_ string) string {
+		return `not valid json`
+	}}
+	engine := NewEngine(stub, NewEquivalenceTechnique())
+	engine.SetAnnotationBatch(5)
+
+	ops := []*spec.Operation{
+		{OperationID: "op1", Method: "GET", Path: "/x", Responses: map[string]*spec.Response{"200": {}}},
+	}
+	_, err := engine.Generate(&spec.ParsedSpec{Operations: ops})
+	require.NoError(t, err, "batch annotation failure must not fail generation")
+	assert.Nil(t, ops[0].SemanticInfo, "failed batch should leave SemanticInfo nil")
+}
+
+func TestEngine_BatchAnnotation_SplitsIntoBatches(t *testing.T) {
+	var callCount int
+	mu := sync.Mutex{}
+	stub := &batchLLMProvider{responseFor: func(_ string) string {
+		mu.Lock()
+		callCount++
+		mu.Unlock()
+		return `[]` // empty but valid
+	}}
+	engine := NewEngine(stub)
+	engine.SetAnnotationBatch(3) // 5 ops → 2 batches
+
+	ops := make([]*spec.Operation, 5)
+	for i := range ops {
+		ops[i] = &spec.Operation{
+			OperationID: fmt.Sprintf("op%d", i),
+			Method:      "GET", Path: fmt.Sprintf("/x%d", i),
+			Responses: map[string]*spec.Response{"200": {}},
+		}
+	}
+	_, err := engine.Generate(&spec.ParsedSpec{Operations: ops})
+	require.NoError(t, err)
+	assert.Equal(t, 2, callCount, "5 ops with batch size 3 should make exactly 2 LLM calls")
+}
+
+func TestParseBatchAnnotations_HandlesValidArray(t *testing.T) {
+	text := `[
+		{"operation_id":"getUser","resource_type":"user","action_type":"read","has_state_machine":false,"unique_fields":["email"],"implicit_rules":["email must be unique"]},
+		{"operation_id":"createUser","resource_type":"user","action_type":"create"}
+	]`
+	result := parseBatchAnnotations(text)
+	require.Len(t, result, 2)
+	assert.Equal(t, "user", result["getUser"].ResourceType)
+	assert.Equal(t, "read", result["getUser"].ActionType)
+	assert.Equal(t, []string{"email"}, result["getUser"].UniqueFields)
+	assert.Equal(t, "create", result["createUser"].ActionType)
+}
+
+func TestParseBatchAnnotations_DropsEntryWithoutOperationID(t *testing.T) {
+	text := `[{"resource_type":"user","action_type":"read"},{"operation_id":"op2","action_type":"list"}]`
+	result := parseBatchAnnotations(text)
+	assert.NotContains(t, result, "", "entry without operation_id must be dropped")
+	assert.Contains(t, result, "op2")
+}
+
+func TestParseBatchAnnotations_InvalidJSONReturnsNil(t *testing.T) {
+	assert.Nil(t, parseBatchAnnotations("not json"))
+	assert.Nil(t, parseBatchAnnotations("{}")) // object not array
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -41,3 +41,6 @@ docs/design/ @@
     # Build output
     bin/
+    # Generated test case output (caseforge gen default output dir)
+    cases/