diff --git a/internal/mcp/response_cap_test.go b/internal/mcp/response_cap_test.go new file mode 100644 index 0000000..57a4162 --- /dev/null +++ b/internal/mcp/response_cap_test.go @@ -0,0 +1,54 @@ +package mcp + +import ( + "strings" + "testing" +) + +// TestTextResult_ResponseCap verifies the byte-cap defense for tool +// responses. Without it, get_trace / get_graph_snapshot / correlated_signals +// can OOM the process on adversarial input. +func TestTextResult_ResponseCap(t *testing.T) { + t.Parallel() + + t.Run("UnderCapPassesThrough", func(t *testing.T) { + text := strings.Repeat("x", 1024) // 1 KiB + got := textResult(text) + if got.IsError { + t.Fatalf("expected success, got IsError=true: %+v", got) + } + if len(got.Content) != 1 || got.Content[0].Text != text { + t.Fatalf("payload mangled: %+v", got) + } + }) + + t.Run("AtCapPassesThrough", func(t *testing.T) { + // Exactly at the cap is allowed. The error fires only on > cap. + text := strings.Repeat("x", MaxToolResponseBytes) + got := textResult(text) + if got.IsError { + t.Fatalf("expected at-cap to pass, got IsError=true") + } + }) + + t.Run("OverCapErrors", func(t *testing.T) { + text := strings.Repeat("x", MaxToolResponseBytes+1) + got := textResult(text) + if !got.IsError { + t.Fatalf("expected over-cap to error, got success") + } + if len(got.Content) == 0 || !strings.Contains(got.Content[0].Text, "response too large") { + t.Fatalf("expected 'response too large' marker in error message, got: %+v", got) + } + if !strings.Contains(got.Content[0].Text, "narrow time range") { + t.Fatalf("expected actionable hint in error message, got: %+v", got) + } + }) + + t.Run("EmptyTextOK", func(t *testing.T) { + got := textResult("") + if got.IsError { + t.Fatalf("empty text should not error: %+v", got) + } + }) +} diff --git a/internal/mcp/tools.go b/internal/mcp/tools.go index 96b1fe1..e3e7283 100644 --- a/internal/mcp/tools.go +++ b/internal/mcp/tools.go @@ -876,7 +876,27 @@ func parseTimeRange(args map[string]any, key string, since *time.Time) { // --- Helpers --- +// MaxToolResponseBytes caps the rendered length of any tool response. Without +// this, get_trace / get_graph_snapshot / correlated_signals can produce +// 100MB+ JSON on adversarial input, OOM the process, and stall every +// concurrent MCP call until MCP_CALL_TIMEOUT_MS fires. +// +// The cap is intentionally set well above any legitimate row-capped tool +// response (search_logs at 200 rows is typically <1 MB) so it triggers only +// on pathological cases. Operators hitting it should narrow their query +// time range or use pagination. +const MaxToolResponseBytes = 4 * 1024 * 1024 + +// textResult wraps a successful tool response. Inputs over MaxToolResponseBytes +// are converted to a structured error so callers see a clear failure mode +// instead of a hung connection. func textResult(text string) ToolCallResult { + if len(text) > MaxToolResponseBytes { + return errorResult(fmt.Sprintf( + "response too large: %d bytes exceeds %d-byte cap; narrow time range or use pagination", + len(text), MaxToolResponseBytes, + )) + } return ToolCallResult{ Content: []ContentItem{{Type: "text", Text: text}}, }