diff --git a/.gitignore b/.gitignore index a3792ca..17a2cdd 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,6 @@ docs/design/ # Build output bin/ + +# Generated test case output (caseforge gen default output dir) +cases/ diff --git a/cmd/gen.go b/cmd/gen.go index 109dec5..ad06dc1 100644 --- a/cmd/gen.go +++ b/cmd/gen.go @@ -50,9 +50,10 @@ var ( genExcludePath string genIncludeTag string genExcludeTag string - genAuthBootstrap bool - genWithOracles bool - genForce bool + genAuthBootstrap bool + genWithOracles bool + genForce bool + genAnnotationBatch int ) // allTechniqueNames is the canonical list used for --technique completion. @@ -108,6 +109,7 @@ func init() { genCmd.Flags().BoolVar(&genAuthBootstrap, "auth-bootstrap", false, "Wrap all secured-endpoint cases with an auth setup step") genCmd.Flags().BoolVar(&genWithOracles, "with-oracles", false, "Mine response body constraints via LLM and inject as assertions (requires LLM)") genCmd.Flags().BoolVar(&genForce, "force", false, "Regenerate even when spec hash matches existing output") + genCmd.Flags().IntVar(&genAnnotationBatch, "annotation-batch", 0, "Number of operations to annotate per LLM call (0 = one call per operation, recommended: 8–20)") _ = genCmd.MarkFlagRequired("spec") // Dynamic completion: --operations reads the spec and suggests operationIds. @@ -401,6 +403,9 @@ func runGen(cmd *cobra.Command, args []string) error { if genMaxCasesPerOp > 0 { engine.SetMaxCasesPerOp(genMaxCasesPerOp) } + if genAnnotationBatch > 0 { + engine.SetAnnotationBatch(genAnnotationBatch) + } newCases, err := engine.Generate(parsedSpec) if err != nil { return fmt.Errorf("generating test cases: %w", err) diff --git a/cmd/gen_e2e_test.go b/cmd/gen_e2e_test.go index 6869f2f..a744ca1 100644 --- a/cmd/gen_e2e_test.go +++ b/cmd/gen_e2e_test.go @@ -37,6 +37,7 @@ func resetGenGlobals(t *testing.T) func() { genConcurrency = 1 genResume = false genForce = false + genAnnotationBatch = 0 genTupleLevel = 2 genSeed = 0 } diff --git a/docs/acceptance/acceptance-tests.md b/docs/acceptance/acceptance-tests.md index 5ac5c77..73ca4e4 100644 --- a/docs/acceptance/acceptance-tests.md +++ b/docs/acceptance/acceptance-tests.md @@ -146,6 +146,7 @@ | AT-249 | Hurl output contains case_name field | `caseforge gen --no-ai --format hurl --spec petstore.yaml --output /tmp/at249` | Every `.hurl` file has a `# case_name=` header line | ✅ PASS | | AT-250 | gen skips regeneration on unchanged spec | Run `gen` twice on the same spec | Second run prints "unchanged" and exits without regenerating | ✅ PASS | | AT-251 | gen --force regenerates despite matching hash | Run `gen` then `gen --force` on the same spec | `--force` run prints "Generated" (bypasses dedup) | ✅ PASS | +| AT-252 | gen --annotation-batch flag is registered and runs without error | `caseforge gen --help` + `caseforge gen --no-ai --annotation-batch 5 --spec petstore.yaml --output /tmp/at252` | Help text contains `annotation-batch`; gen completes successfully with flag set | ✅ PASS | --- diff --git a/internal/methodology/engine.go b/internal/methodology/engine.go index e213331..2bf976d 100644 --- a/internal/methodology/engine.go +++ b/internal/methodology/engine.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "os" + "strings" "sync" "time" @@ -37,14 +38,15 @@ type Seedable interface { } type Engine struct { - techniques []Technique - specTechniques []SpecTechnique - llm llm.LLMProvider - sink event.Sink - warnWriter io.Writer // destination for warn: lines; defaults to os.Stderr - concurrency int // 0 or 1 = serial; >1 = parallel worker pool - seed int64 // 0 = random - maxCasesPerOp int // 0 = unlimited + techniques []Technique + specTechniques []SpecTechnique + llm llm.LLMProvider + sink event.Sink + warnWriter io.Writer // destination for warn: lines; defaults to os.Stderr + concurrency int // 0 or 1 = serial; >1 = parallel worker pool + seed int64 // 0 = random + maxCasesPerOp int // 0 = unlimited + annotationBatch int // 0 = sequential (one call per op); >0 = batch size } func NewEngine(provider llm.LLMProvider, techniques ...Technique) *Engine { @@ -83,6 +85,14 @@ func (e *Engine) SetMaxCasesPerOp(n int) { e.maxCasesPerOp = n } +// SetAnnotationBatch sets the number of operations to annotate per LLM call. +// 0 (default) uses sequential mode: one call per operation. +// Values > 0 batch that many operations into a single call, reducing round-trips +// at the cost of larger prompts. Recommended range: 5–20. +func (e *Engine) SetAnnotationBatch(n int) { + e.annotationBatch = n +} + // SetSink registers an event sink for progress events. func (e *Engine) SetSink(s event.Sink) { e.sink = s @@ -233,6 +243,11 @@ func (e *Engine) annotateOperations(ops []*spec.Operation) { if !e.llm.IsAvailable() { return // NoopProvider: skip annotation, SemanticInfo stays nil } + if e.annotationBatch >= 1 { + e.annotateOperationsBatch(ops, e.annotationBatch) + return + } + // Sequential mode: one LLM call per operation. for i, op := range ops { if i > 0 { time.Sleep(500 * time.Millisecond) // throttle to reduce rate-limit pressure @@ -251,6 +266,108 @@ func (e *Engine) annotateOperations(ops []*spec.Operation) { } } +// annotateOperationsBatch sends ops in groups of batchSize to the LLM, each +// group in a single call. Responses are matched back to ops by operation_id. +// Failures are per-batch: if a batch call fails, those ops get no annotation +// and generation continues unaffected (annotation is best-effort). +func (e *Engine) annotateOperationsBatch(ops []*spec.Operation, batchSize int) { + for start := 0; start < len(ops); start += batchSize { + if start > 0 { + time.Sleep(200 * time.Millisecond) // light throttle between batches + } + end := start + batchSize + if end > len(ops) { + end = len(ops) + } + batch := ops[start:end] + + annotations, err := e.annotateBatch(batch) + for _, op := range batch { + if err != nil { + e.warn("warn: batch annotation failed for %s %s: %v\n", op.Method, op.Path, err) + } else if a, ok := annotations[op.OperationID]; ok { + op.SemanticInfo = a + } + e.emit(event.Event{Type: event.EventOperationAnnotating, Payload: event.OperationDonePayload{ + OperationID: op.OperationID, + Method: op.Method, + Path: op.Path, + }}) + } + } +} + +// annotateBatch calls the LLM once for a slice of operations, returning a map +// of operationId → SemanticAnnotation. Unrecognised or unparseable entries are +// silently omitted so callers can fall through to the no-annotation path. +func (e *Engine) annotateBatch(ops []*spec.Operation) (map[string]*spec.SemanticAnnotation, error) { + // Build prompt listing all operations. + var sb strings.Builder + sb.WriteString("Analyze these API operations. Return a JSON array — one object per operation, in any order.\n") + sb.WriteString("Each object must include \"operation_id\" plus these fields: resource_type, action_type, has_state_machine, state_field, unique_fields, implicit_rules.\n\n") + for _, op := range ops { + id := op.OperationID + if id == "" { + id = op.Method + "_" + op.Path + } + desc := op.Summary + if op.Description != "" { + desc = op.Summary + " — " + op.Description + } + fmt.Fprintf(&sb, "- operation_id: %q %s %s summary: %s\n", id, op.Method, op.Path, desc) + } + sb.WriteString("\nReturn ONLY the JSON array, no other text.") + + ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second) + defer cancel() + + req := &llm.CompletionRequest{ + System: "You are an API testing expert. Analyze operations and return structured JSON.", + Messages: []llm.Message{{Role: "user", Content: sb.String()}}, + MaxTokens: min(256*len(ops), 8192), // cap at 8192 — smallest common provider output limit + } + resp, err := llm.Retry(ctx, 5, func() (*llm.CompletionResponse, error) { + return e.llm.Complete(ctx, req) + }) + if err != nil { + return nil, err + } + return parseBatchAnnotations(resp.Text), nil +} + +// parseBatchAnnotations extracts a JSON array of per-operation annotations from +// the LLM response and returns a map keyed by operation_id. +func parseBatchAnnotations(text string) map[string]*spec.SemanticAnnotation { + extracted := llm.ExtractJSON(text) + var items []struct { + OperationID string `json:"operation_id"` + ResourceType string `json:"resource_type"` + ActionType string `json:"action_type"` + HasStateMachine bool `json:"has_state_machine"` + StateField string `json:"state_field"` + UniqueFields []string `json:"unique_fields"` + ImplicitRules []string `json:"implicit_rules"` + } + if err := json.Unmarshal([]byte(extracted), &items); err != nil { + return nil + } + out := make(map[string]*spec.SemanticAnnotation, len(items)) + for _, item := range items { + if item.OperationID == "" { + continue + } + out[item.OperationID] = &spec.SemanticAnnotation{ + ResourceType: item.ResourceType, + ActionType: item.ActionType, + HasStateMachine: item.HasStateMachine, + StateField: item.StateField, + UniqueFields: item.UniqueFields, + ImplicitRules: item.ImplicitRules, + } + } + return out +} + func (e *Engine) annotateOperation(op *spec.Operation) (*spec.SemanticAnnotation, error) { prompt := fmt.Sprintf( "Analyze this API operation and return JSON:\n"+ diff --git a/internal/methodology/engine_test.go b/internal/methodology/engine_test.go index 34cba17..6aba632 100644 --- a/internal/methodology/engine_test.go +++ b/internal/methodology/engine_test.go @@ -3,6 +3,7 @@ package methodology import ( "context" + "fmt" "sync" "testing" @@ -346,3 +347,156 @@ func TestEngine_MaxCasesPerOp_TruncatesByPriority(t *testing.T) { assert.LessOrEqual(t, len(cases), 2, "engine must not produce more than maxCasesPerOp cases for a single operation") } + +// batchLLMProvider captures LLM calls and returns a canned batch JSON response. +type batchLLMProvider struct { + calls int + muCalls sync.Mutex + // responseFor returns the response text for each call (indexed by call number). + responseFor func(req string) string +} + +func (b *batchLLMProvider) IsAvailable() bool { return true } +func (b *batchLLMProvider) Name() string { return "batch-stub" } +func (b *batchLLMProvider) Complete(_ context.Context, req *llm.CompletionRequest) (*llm.CompletionResponse, error) { + b.muCalls.Lock() + b.calls++ + b.muCalls.Unlock() + text := b.responseFor(req.Messages[0].Content) + return &llm.CompletionResponse{Text: text}, nil +} + +func TestEngine_BatchAnnotation_EmitsOneEventPerOp(t *testing.T) { + var got []event.EventType + mu := sync.Mutex{} + sink := event.SinkFunc(func(e event.Event) { + mu.Lock() + got = append(got, e.Type) + mu.Unlock() + }) + + stub := &batchLLMProvider{responseFor: func(_ string) string { + return `[ + {"operation_id":"op1","resource_type":"pet","action_type":"list"}, + {"operation_id":"op2","resource_type":"pet","action_type":"create"} + ]` + }} + engine := NewEngine(stub, NewEquivalenceTechnique()) + engine.SetAnnotationBatch(10) // both ops fit in one batch + engine.SetSink(sink) + + ps := &spec.ParsedSpec{Operations: []*spec.Operation{ + {OperationID: "op1", Method: "GET", Path: "/pets", Responses: map[string]*spec.Response{"200": {}}}, + {OperationID: "op2", Method: "POST", Path: "/pets", Responses: map[string]*spec.Response{"201": {}}}, + }} + _, err := engine.Generate(ps) + require.NoError(t, err) + + var annotatingCount int + for _, typ := range got { + if typ == event.EventOperationAnnotating { + annotatingCount++ + } + } + assert.Equal(t, 2, annotatingCount, "batch mode must still emit one EventOperationAnnotating per operation") +} + +func TestEngine_BatchAnnotation_AnnotatesOpsCorrectly(t *testing.T) { + stub := &batchLLMProvider{responseFor: func(_ string) string { + return `[ + {"operation_id":"createPet","resource_type":"pet","action_type":"create","unique_fields":["name"]}, + {"operation_id":"listPets","resource_type":"pet","action_type":"list"} + ]` + }} + engine := NewEngine(stub) + engine.SetAnnotationBatch(10) + + ops := []*spec.Operation{ + {OperationID: "listPets", Method: "GET", Path: "/pets", Responses: map[string]*spec.Response{"200": {}}}, + {OperationID: "createPet", Method: "POST", Path: "/pets", Responses: map[string]*spec.Response{"201": {}}}, + } + ps := &spec.ParsedSpec{Operations: ops} + _, err := engine.Generate(ps) + require.NoError(t, err) + + var listOp, createOp *spec.Operation + for _, op := range ops { + if op.OperationID == "listPets" { + listOp = op + } else if op.OperationID == "createPet" { + createOp = op + } + } + require.NotNil(t, listOp.SemanticInfo, "listPets should have annotation") + assert.Equal(t, "list", listOp.SemanticInfo.ActionType) + + require.NotNil(t, createOp.SemanticInfo, "createPet should have annotation") + assert.Equal(t, "create", createOp.SemanticInfo.ActionType) + assert.Equal(t, []string{"name"}, createOp.SemanticInfo.UniqueFields) +} + +func TestEngine_BatchAnnotation_BatchFailureIsGraceful(t *testing.T) { + // LLM returns invalid JSON — ops should get no annotation but gen still succeeds. + stub := &batchLLMProvider{responseFor: func(_ string) string { + return `not valid json` + }} + engine := NewEngine(stub, NewEquivalenceTechnique()) + engine.SetAnnotationBatch(5) + + ops := []*spec.Operation{ + {OperationID: "op1", Method: "GET", Path: "/x", Responses: map[string]*spec.Response{"200": {}}}, + } + _, err := engine.Generate(&spec.ParsedSpec{Operations: ops}) + require.NoError(t, err, "batch annotation failure must not fail generation") + assert.Nil(t, ops[0].SemanticInfo, "failed batch should leave SemanticInfo nil") +} + +func TestEngine_BatchAnnotation_SplitsIntoBatches(t *testing.T) { + var callCount int + mu := sync.Mutex{} + stub := &batchLLMProvider{responseFor: func(_ string) string { + mu.Lock() + callCount++ + mu.Unlock() + return `[]` // empty but valid + }} + engine := NewEngine(stub) + engine.SetAnnotationBatch(3) // 5 ops → 2 batches + + ops := make([]*spec.Operation, 5) + for i := range ops { + ops[i] = &spec.Operation{ + OperationID: fmt.Sprintf("op%d", i), + Method: "GET", Path: fmt.Sprintf("/x%d", i), + Responses: map[string]*spec.Response{"200": {}}, + } + } + _, err := engine.Generate(&spec.ParsedSpec{Operations: ops}) + require.NoError(t, err) + assert.Equal(t, 2, callCount, "5 ops with batch size 3 should make exactly 2 LLM calls") +} + +func TestParseBatchAnnotations_HandlesValidArray(t *testing.T) { + text := `[ + {"operation_id":"getUser","resource_type":"user","action_type":"read","has_state_machine":false,"unique_fields":["email"],"implicit_rules":["email must be unique"]}, + {"operation_id":"createUser","resource_type":"user","action_type":"create"} + ]` + result := parseBatchAnnotations(text) + require.Len(t, result, 2) + assert.Equal(t, "user", result["getUser"].ResourceType) + assert.Equal(t, "read", result["getUser"].ActionType) + assert.Equal(t, []string{"email"}, result["getUser"].UniqueFields) + assert.Equal(t, "create", result["createUser"].ActionType) +} + +func TestParseBatchAnnotations_DropsEntryWithoutOperationID(t *testing.T) { + text := `[{"resource_type":"user","action_type":"read"},{"operation_id":"op2","action_type":"list"}]` + result := parseBatchAnnotations(text) + assert.NotContains(t, result, "", "entry without operation_id must be dropped") + assert.Contains(t, result, "op2") +} + +func TestParseBatchAnnotations_InvalidJSONReturnsNil(t *testing.T) { + assert.Nil(t, parseBatchAnnotations("not json")) + assert.Nil(t, parseBatchAnnotations("{}")) // object not array +} diff --git a/scripts/acceptance.sh b/scripts/acceptance.sh index 66c6bb4..7b165d2 100755 --- a/scripts/acceptance.sh +++ b/scripts/acceptance.sh @@ -1055,6 +1055,13 @@ run "AT-251" "gen --force regenerates even when spec is unchanged" \ "'$BIN' gen --spec '$WORKDIR/petstore.yaml' --no-ai --output '$AT251DIR' 2>&1 | grep -q 'Generated' && \ '$BIN' gen --spec '$WORKDIR/petstore.yaml' --no-ai --force --output '$AT251DIR' 2>&1 | grep -q 'Generated'" +# AT-252: --annotation-batch flag is registered and runs without error +AT252DIR=$(mktemp -d) +contains "AT-252" "gen --annotation-batch flag is registered" "annotation-batch" \ + "$BIN gen --help" +run "AT-252b" "gen --annotation-batch flag runs gen to completion" \ + "'$BIN' gen --spec '$WORKDIR/petstore.yaml' --no-ai --annotation-batch 5 --output '$AT252DIR' 2>&1 | grep -q 'Generated'" + echo "" # -------------------------------------------------------