Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,6 @@ docs/design/

# Build output
bin/

# Generated test case output (caseforge gen default output dir)
cases/
11 changes: 8 additions & 3 deletions cmd/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ var (
genExcludePath string
genIncludeTag string
genExcludeTag string
genAuthBootstrap bool
genWithOracles bool
genForce bool
genAuthBootstrap bool
genWithOracles bool
genForce bool
genAnnotationBatch int
)

// allTechniqueNames is the canonical list used for --technique completion.
Expand Down Expand Up @@ -108,6 +109,7 @@ func init() {
genCmd.Flags().BoolVar(&genAuthBootstrap, "auth-bootstrap", false, "Wrap all secured-endpoint cases with an auth setup step")
genCmd.Flags().BoolVar(&genWithOracles, "with-oracles", false, "Mine response body constraints via LLM and inject as assertions (requires LLM)")
genCmd.Flags().BoolVar(&genForce, "force", false, "Regenerate even when spec hash matches existing output")
genCmd.Flags().IntVar(&genAnnotationBatch, "annotation-batch", 0, "Number of operations to annotate per LLM call (0 = one call per operation, recommended: 8–20)")
_ = genCmd.MarkFlagRequired("spec")

// Dynamic completion: --operations reads the spec and suggests operationIds.
Expand Down Expand Up @@ -401,6 +403,9 @@ func runGen(cmd *cobra.Command, args []string) error {
if genMaxCasesPerOp > 0 {
engine.SetMaxCasesPerOp(genMaxCasesPerOp)
}
if genAnnotationBatch > 0 {
engine.SetAnnotationBatch(genAnnotationBatch)
}
newCases, err := engine.Generate(parsedSpec)
if err != nil {
return fmt.Errorf("generating test cases: %w", err)
Expand Down
1 change: 1 addition & 0 deletions cmd/gen_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ func resetGenGlobals(t *testing.T) func() {
genConcurrency = 1
genResume = false
genForce = false
genAnnotationBatch = 0
genTupleLevel = 2
genSeed = 0
}
Expand Down
1 change: 1 addition & 0 deletions docs/acceptance/acceptance-tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@
| AT-249 | Hurl output contains case_name field | `caseforge gen --no-ai --format hurl --spec petstore.yaml --output /tmp/at249` | Every `.hurl` file has a `# case_name=` header line | ✅ PASS |
| AT-250 | gen skips regeneration on unchanged spec | Run `gen` twice on the same spec | Second run prints "unchanged" and exits without regenerating | ✅ PASS |
| AT-251 | gen --force regenerates despite matching hash | Run `gen` then `gen --force` on the same spec | `--force` run prints "Generated" (bypasses dedup) | ✅ PASS |
| AT-252 | gen --annotation-batch flag is registered and runs without error | `caseforge gen --help` + `caseforge gen --no-ai --annotation-batch 5 --spec petstore.yaml --output /tmp/at252` | Help text contains `annotation-batch`; gen completes successfully with flag set | ✅ PASS |

---

Expand Down
133 changes: 125 additions & 8 deletions internal/methodology/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"io"
"os"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -37,14 +38,15 @@ type Seedable interface {
}

type Engine struct {
techniques []Technique
specTechniques []SpecTechnique
llm llm.LLMProvider
sink event.Sink
warnWriter io.Writer // destination for warn: lines; defaults to os.Stderr
concurrency int // 0 or 1 = serial; >1 = parallel worker pool
seed int64 // 0 = random
maxCasesPerOp int // 0 = unlimited
techniques []Technique
specTechniques []SpecTechnique
llm llm.LLMProvider
sink event.Sink
warnWriter io.Writer // destination for warn: lines; defaults to os.Stderr
concurrency int // 0 or 1 = serial; >1 = parallel worker pool
seed int64 // 0 = random
maxCasesPerOp int // 0 = unlimited
annotationBatch int // 0 = sequential (one call per op); >0 = batch size
}

func NewEngine(provider llm.LLMProvider, techniques ...Technique) *Engine {
Expand Down Expand Up @@ -83,6 +85,14 @@ func (e *Engine) SetMaxCasesPerOp(n int) {
e.maxCasesPerOp = n
}

// SetAnnotationBatch sets the number of operations to annotate per LLM call.
// 0 (default) uses sequential mode: one call per operation.
// Values > 0 batch that many operations into a single call, reducing round-trips
// at the cost of larger prompts. Recommended range: 5–20.
func (e *Engine) SetAnnotationBatch(n int) {
e.annotationBatch = n
}

// SetSink registers an event sink for progress events.
func (e *Engine) SetSink(s event.Sink) {
e.sink = s
Expand Down Expand Up @@ -233,6 +243,11 @@ func (e *Engine) annotateOperations(ops []*spec.Operation) {
if !e.llm.IsAvailable() {
return // NoopProvider: skip annotation, SemanticInfo stays nil
}
if e.annotationBatch >= 1 {
e.annotateOperationsBatch(ops, e.annotationBatch)
return
}
// Sequential mode: one LLM call per operation.
for i, op := range ops {
if i > 0 {
time.Sleep(500 * time.Millisecond) // throttle to reduce rate-limit pressure
Expand All @@ -251,6 +266,108 @@ func (e *Engine) annotateOperations(ops []*spec.Operation) {
}
}

// annotateOperationsBatch sends ops in groups of batchSize to the LLM, each
// group in a single call. Responses are matched back to ops by operation_id.
// Failures are per-batch: if a batch call fails, those ops get no annotation
// and generation continues unaffected (annotation is best-effort).
func (e *Engine) annotateOperationsBatch(ops []*spec.Operation, batchSize int) {
for start := 0; start < len(ops); start += batchSize {
if start > 0 {
time.Sleep(200 * time.Millisecond) // light throttle between batches
}
end := start + batchSize
if end > len(ops) {
end = len(ops)
}
batch := ops[start:end]

annotations, err := e.annotateBatch(batch)
for _, op := range batch {
if err != nil {
e.warn("warn: batch annotation failed for %s %s: %v\n", op.Method, op.Path, err)
} else if a, ok := annotations[op.OperationID]; ok {
op.SemanticInfo = a
}
e.emit(event.Event{Type: event.EventOperationAnnotating, Payload: event.OperationDonePayload{
OperationID: op.OperationID,
Method: op.Method,
Path: op.Path,
}})
}
}
}

// annotateBatch calls the LLM once for a slice of operations, returning a map
// of operationId → SemanticAnnotation. Unrecognised or unparseable entries are
// silently omitted so callers can fall through to the no-annotation path.
func (e *Engine) annotateBatch(ops []*spec.Operation) (map[string]*spec.SemanticAnnotation, error) {
// Build prompt listing all operations.
var sb strings.Builder
sb.WriteString("Analyze these API operations. Return a JSON array — one object per operation, in any order.\n")
sb.WriteString("Each object must include \"operation_id\" plus these fields: resource_type, action_type, has_state_machine, state_field, unique_fields, implicit_rules.\n\n")
for _, op := range ops {
id := op.OperationID
if id == "" {
id = op.Method + "_" + op.Path
}
desc := op.Summary
if op.Description != "" {
desc = op.Summary + " — " + op.Description
}
fmt.Fprintf(&sb, "- operation_id: %q %s %s summary: %s\n", id, op.Method, op.Path, desc)
}
sb.WriteString("\nReturn ONLY the JSON array, no other text.")

ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second)
defer cancel()

req := &llm.CompletionRequest{
System: "You are an API testing expert. Analyze operations and return structured JSON.",
Messages: []llm.Message{{Role: "user", Content: sb.String()}},
MaxTokens: min(256*len(ops), 8192), // cap at 8192 — smallest common provider output limit
}
resp, err := llm.Retry(ctx, 5, func() (*llm.CompletionResponse, error) {
return e.llm.Complete(ctx, req)
})
if err != nil {
return nil, err
}
return parseBatchAnnotations(resp.Text), nil
}

// parseBatchAnnotations extracts a JSON array of per-operation annotations from
// the LLM response and returns a map keyed by operation_id.
func parseBatchAnnotations(text string) map[string]*spec.SemanticAnnotation {
extracted := llm.ExtractJSON(text)
var items []struct {
OperationID string `json:"operation_id"`
ResourceType string `json:"resource_type"`
ActionType string `json:"action_type"`
HasStateMachine bool `json:"has_state_machine"`
StateField string `json:"state_field"`
UniqueFields []string `json:"unique_fields"`
ImplicitRules []string `json:"implicit_rules"`
}
if err := json.Unmarshal([]byte(extracted), &items); err != nil {
return nil
}
out := make(map[string]*spec.SemanticAnnotation, len(items))
for _, item := range items {
if item.OperationID == "" {
continue
}
out[item.OperationID] = &spec.SemanticAnnotation{
ResourceType: item.ResourceType,
ActionType: item.ActionType,
HasStateMachine: item.HasStateMachine,
StateField: item.StateField,
UniqueFields: item.UniqueFields,
ImplicitRules: item.ImplicitRules,
}
}
return out
}

func (e *Engine) annotateOperation(op *spec.Operation) (*spec.SemanticAnnotation, error) {
prompt := fmt.Sprintf(
"Analyze this API operation and return JSON:\n"+
Expand Down
154 changes: 154 additions & 0 deletions internal/methodology/engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package methodology

import (
"context"
"fmt"
"sync"
"testing"

Expand Down Expand Up @@ -346,3 +347,156 @@ func TestEngine_MaxCasesPerOp_TruncatesByPriority(t *testing.T) {
assert.LessOrEqual(t, len(cases), 2,
"engine must not produce more than maxCasesPerOp cases for a single operation")
}

// batchLLMProvider captures LLM calls and returns a canned batch JSON response.
type batchLLMProvider struct {
calls int
muCalls sync.Mutex
// responseFor returns the response text for each call (indexed by call number).
responseFor func(req string) string
}

func (b *batchLLMProvider) IsAvailable() bool { return true }
func (b *batchLLMProvider) Name() string { return "batch-stub" }
func (b *batchLLMProvider) Complete(_ context.Context, req *llm.CompletionRequest) (*llm.CompletionResponse, error) {
b.muCalls.Lock()
b.calls++
b.muCalls.Unlock()
text := b.responseFor(req.Messages[0].Content)
return &llm.CompletionResponse{Text: text}, nil
}

func TestEngine_BatchAnnotation_EmitsOneEventPerOp(t *testing.T) {
var got []event.EventType
mu := sync.Mutex{}
sink := event.SinkFunc(func(e event.Event) {
mu.Lock()
got = append(got, e.Type)
mu.Unlock()
})

stub := &batchLLMProvider{responseFor: func(_ string) string {
return `[
{"operation_id":"op1","resource_type":"pet","action_type":"list"},
{"operation_id":"op2","resource_type":"pet","action_type":"create"}
]`
}}
engine := NewEngine(stub, NewEquivalenceTechnique())
engine.SetAnnotationBatch(10) // both ops fit in one batch
engine.SetSink(sink)

ps := &spec.ParsedSpec{Operations: []*spec.Operation{
{OperationID: "op1", Method: "GET", Path: "/pets", Responses: map[string]*spec.Response{"200": {}}},
{OperationID: "op2", Method: "POST", Path: "/pets", Responses: map[string]*spec.Response{"201": {}}},
}}
_, err := engine.Generate(ps)
require.NoError(t, err)

var annotatingCount int
for _, typ := range got {
if typ == event.EventOperationAnnotating {
annotatingCount++
}
}
assert.Equal(t, 2, annotatingCount, "batch mode must still emit one EventOperationAnnotating per operation")
}

func TestEngine_BatchAnnotation_AnnotatesOpsCorrectly(t *testing.T) {
stub := &batchLLMProvider{responseFor: func(_ string) string {
return `[
{"operation_id":"createPet","resource_type":"pet","action_type":"create","unique_fields":["name"]},
{"operation_id":"listPets","resource_type":"pet","action_type":"list"}
]`
}}
engine := NewEngine(stub)
engine.SetAnnotationBatch(10)

ops := []*spec.Operation{
{OperationID: "listPets", Method: "GET", Path: "/pets", Responses: map[string]*spec.Response{"200": {}}},
{OperationID: "createPet", Method: "POST", Path: "/pets", Responses: map[string]*spec.Response{"201": {}}},
}
ps := &spec.ParsedSpec{Operations: ops}
_, err := engine.Generate(ps)
require.NoError(t, err)

var listOp, createOp *spec.Operation
for _, op := range ops {
if op.OperationID == "listPets" {
listOp = op
} else if op.OperationID == "createPet" {
createOp = op
}
}
require.NotNil(t, listOp.SemanticInfo, "listPets should have annotation")
assert.Equal(t, "list", listOp.SemanticInfo.ActionType)

require.NotNil(t, createOp.SemanticInfo, "createPet should have annotation")
assert.Equal(t, "create", createOp.SemanticInfo.ActionType)
assert.Equal(t, []string{"name"}, createOp.SemanticInfo.UniqueFields)
}

func TestEngine_BatchAnnotation_BatchFailureIsGraceful(t *testing.T) {
// LLM returns invalid JSON — ops should get no annotation but gen still succeeds.
stub := &batchLLMProvider{responseFor: func(_ string) string {
return `not valid json`
}}
engine := NewEngine(stub, NewEquivalenceTechnique())
engine.SetAnnotationBatch(5)

ops := []*spec.Operation{
{OperationID: "op1", Method: "GET", Path: "/x", Responses: map[string]*spec.Response{"200": {}}},
}
_, err := engine.Generate(&spec.ParsedSpec{Operations: ops})
require.NoError(t, err, "batch annotation failure must not fail generation")
assert.Nil(t, ops[0].SemanticInfo, "failed batch should leave SemanticInfo nil")
}

func TestEngine_BatchAnnotation_SplitsIntoBatches(t *testing.T) {
var callCount int
mu := sync.Mutex{}
stub := &batchLLMProvider{responseFor: func(_ string) string {
mu.Lock()
callCount++
mu.Unlock()
return `[]` // empty but valid
}}
engine := NewEngine(stub)
engine.SetAnnotationBatch(3) // 5 ops → 2 batches

ops := make([]*spec.Operation, 5)
for i := range ops {
ops[i] = &spec.Operation{
OperationID: fmt.Sprintf("op%d", i),
Method: "GET", Path: fmt.Sprintf("/x%d", i),
Responses: map[string]*spec.Response{"200": {}},
}
}
_, err := engine.Generate(&spec.ParsedSpec{Operations: ops})
require.NoError(t, err)
assert.Equal(t, 2, callCount, "5 ops with batch size 3 should make exactly 2 LLM calls")
}

func TestParseBatchAnnotations_HandlesValidArray(t *testing.T) {
text := `[
{"operation_id":"getUser","resource_type":"user","action_type":"read","has_state_machine":false,"unique_fields":["email"],"implicit_rules":["email must be unique"]},
{"operation_id":"createUser","resource_type":"user","action_type":"create"}
]`
result := parseBatchAnnotations(text)
require.Len(t, result, 2)
assert.Equal(t, "user", result["getUser"].ResourceType)
assert.Equal(t, "read", result["getUser"].ActionType)
assert.Equal(t, []string{"email"}, result["getUser"].UniqueFields)
assert.Equal(t, "create", result["createUser"].ActionType)
}

func TestParseBatchAnnotations_DropsEntryWithoutOperationID(t *testing.T) {
text := `[{"resource_type":"user","action_type":"read"},{"operation_id":"op2","action_type":"list"}]`
result := parseBatchAnnotations(text)
assert.NotContains(t, result, "", "entry without operation_id must be dropped")
assert.Contains(t, result, "op2")
}

func TestParseBatchAnnotations_InvalidJSONReturnsNil(t *testing.T) {
assert.Nil(t, parseBatchAnnotations("not json"))
assert.Nil(t, parseBatchAnnotations("{}")) // object not array
}
Loading
Loading