agenticenv · vinodvx · Jun 20, 2026 · Jun 20, 2026
@@ -72,7 +72,7 @@ task examples:all
 
 Requires Task, Docker, and LLM credentials — see [examples/README.md](examples/README.md).
 
-If you change **agent behavior** (e.g. `pkg/agent`, telemetry, tools, runtime) or **`eval-harness/`**, run:
+If you change **agent behavior** (e.g. `pkg/agent`, `pkg/memory`, telemetry, tools, runtime) or **`eval-harness/`**, run:
 
 ```bash
 make eval-harness
@@ -136,7 +136,7 @@ Or run a single example:
 go run ./examples/simple_agent "Hello"
 ```
 
-See [examples/README.md](examples/README.md) for all examples, env vars, Task install, and infra commands (`task infra:*`, `task examples:local`).
+See [examples/README.md](examples/README.md) for all examples, env vars, Task install, and infra commands (`task infra:*`, `task examples:local`). Memory examples (`examples/agent_with_memory/`) need Weaviate or pgvector — see [examples/agent_with_memory/README.md](examples/agent_with_memory/README.md).
 
 ## Ways to Contribute
 
@@ -180,7 +180,7 @@ Using the SDK and ran into issues, unclear docs, or confusing behavior? **Raise
 2. **Tests**
    - Add tests for new features and bug fixes.
    - Unit tests go in `*_test.go` files alongside the code.
-   - Agent behavior changes (`pkg/agent`, telemetry, tools, runtime) or **`eval-harness/`** edits — run `make eval-harness` before submitting a PR.
+   - Agent behavior changes (`pkg/agent`, `pkg/memory`, telemetry, tools, runtime) or **`eval-harness/`** edits — run `make eval-harness` before submitting a PR.
 
 3. **Commits**
    - Use [conventional commits](https://www.conventionalcommits.org) — these drive the release changelog:
@@ -201,18 +201,7 @@ Using the SDK and ran into issues, unclear docs, or confusing behavior? **Raise
    - Keep changes focused. For larger work, consider splitting into multiple PRs.
    - For new LLM providers: implement `interfaces.LLMClient` (see `pkg/interfaces/llm.go` and existing providers in `pkg/llm/`).
    - For new tools: implement `interfaces.Tool` (see `pkg/interfaces/tools.go` and `pkg/tools/`).
-
-## Project Layout
-
-| Path | Purpose |
-|------|---------|
-| `pkg/agent/` | Agent core, workflow, config |
-| `pkg/llm/` | LLM providers (OpenAI, Anthropic, Gemini) |
-| `pkg/interfaces/` | Interfaces for LLM clients, tools, messages |
-| `pkg/tools/` | Built-in and custom tools |
-| `pkg/conversation/` | Message history (in-memory, Redis) |
-| `cmd/` | agentctl CLI |
-| `examples/` | Example programs |
+   - For new memory backends: implement `interfaces.Memory` (see `pkg/interfaces/memory.go` and `pkg/memory/weaviate` or `pkg/memory/pgvector`).
 
 ## Releasing (maintainers only)
 

@@ -45,6 +45,7 @@ Mock components apply configurable latency and jitter so results reflect realist
 - Process CPU time
 - Total input/output tokens (from mock LLM stats; includes sub-agent LLM calls)
 - Success rate (`Run()` completed without error)
+- Long-term memory recalls/stores (when `memory.enabled: true`; from run telemetry)
 - `est_cost_usd` — placeholder `0` until pricing is configured
 
 Reports are written to `benchmarks/reports/` (JSON or text). SDK logs (optional) go to `benchmarks/logs/`.
@@ -205,6 +206,18 @@ All paths in config (`dir` fields) are relative to the **repository root** unles
 | `subagents.count` | Sub-agents per level (0 to disable). |
 | `subagents.levels` | Max sub-agent nesting depth (1–5). |
 
+### `memory`
+
+Long-term memory (`agent.WithMemory`) using an in-process inmem backend (no Docker). Disabled by default.
+
+| Field | Description |
+| :--- | :--- |
+| `enabled` | `true` wires recall before each run and store after (mode-dependent). |
+| `store_mode` | `ondemand` (LLM `save_memory` tool) or `always` (extract at run end). |
+| `user_id` | Scope user ID passed via `memory.WithContextUserID` (default `benchmark-user`). |
+
+When `memory.enabled: true`, `agent.tools.count` may be `0` (memory-only runs). The mock LLM handles `save_memory` tool args and memory-extract JSON like the eval harness.
+
 ### `logger`
 
 | Field | Description |

@@ -16,7 +16,10 @@ type AgentBundle struct {
 
 func buildAgentBundle(cfg *setup.Config, llm *setup.MockLLMClient, lgr logger.Logger, tree *setup.AgentTree) (*AgentBundle, error) {
 	enableRemote := cfg.ExternalWorkersEnabled()
-	opts := setup.RootOptions(cfg, llm, lgr, setup.RootAgentName, tree.RootPrompt, tree.SubAgents, cfg.Temporal.TaskQueue, enableRemote)
+	opts, err := setup.AppendMemoryOptions(cfg, setup.RootOptions(cfg, llm, lgr, setup.RootAgentName, tree.RootPrompt, tree.SubAgents, cfg.Temporal.TaskQueue, enableRemote))
+	if err != nil {
+		return nil, err
+	}
 
 	root, err := agent.NewAgent(opts...)
 	if err != nil {

@@ -29,6 +29,11 @@ agent:
     count: 2
     levels: 1             # 1 or 2
 
+memory:
+  enabled: false
+  store_mode: ondemand    # ondemand or always
+  user_id: benchmark-user
+
 logger:
   enabled: false          # true writes SDK logs under benchmarks/logs
   dir: benchmarks/logs

@@ -28,6 +28,9 @@ type BenchmarkMetrics struct {
 
 	TotalRuns   int     `json:"total_runs"`
 	SuccessRate float64 `json:"success_rate"`
+
+	TotalMemoryRecalls int64 `json:"total_memory_recalls"`
+	TotalMemoryStores  int64 `json:"total_memory_stores"`
 }
 
 func main() {
@@ -67,6 +70,9 @@ func main() {
 	fmt.Printf("Starting agent-sdk-go benchmark (%s runtime)\n", cfg.Runtime)
 	fmt.Printf("Runs: %d  Concurrent: %t  Tools: %d  Sub-agents: %d (levels %d)\n",
 		cfg.Agent.Runs, cfg.Agent.Concurrent, cfg.Agent.Tools.Count, cfg.Agent.Subagents.Count, cfg.Agent.Subagents.Levels)
+	if cfg.MemoryEnabled() {
+		fmt.Printf("Memory         : enabled (store_mode=%s, user_id=%s)\n", cfg.Memory.StoreMode, cfg.Memory.UserID)
+	}
 	if cfg.UseTemporal() {
 		fmt.Printf("External workers : %d\n", cfg.Temporal.WorkersCount)
 	}

@@ -9,11 +9,14 @@ import (
 func aggregateMetrics(outcomes []runOutcome, memBefore, memAfter runtime.MemStats, cpuMs float64, inputTokens, outputTokens int) *BenchmarkMetrics {
 	latencies := make([]float64, 0, len(outcomes))
 	successes := 0
+	var totalRecalls, totalStores int64
 	for _, o := range outcomes {
 		latencies = append(latencies, o.latencyMs)
 		if o.success {
 			successes++
 		}
+		totalRecalls += o.memoryRecalls
+		totalStores += o.memoryStores
 	}
 	sort.Float64s(latencies)
 
@@ -24,18 +27,20 @@ func aggregateMetrics(outcomes []runOutcome, memBefore, memAfter runtime.MemStat
 	}
 
 	return &BenchmarkMetrics{
-		P50Ms:             percentile(latencies, 50),
-		P95Ms:             percentile(latencies, 95),
-		P99Ms:             percentile(latencies, 99),
-		AvgMs:             average(latencies),
-		HeapAllocBytes:    deltaUint64(memAfter.Alloc, memBefore.Alloc),
-		TotalAllocBytes:   deltaUint64(memAfter.TotalAlloc, memBefore.TotalAlloc),
-		CPUTimeMs:         cpuMs,
-		TotalInputTokens:  inputTokens,
-		TotalOutputTokens: outputTokens,
-		EstCostUSD:        0, // pricing to be defined later
-		TotalRuns:         totalRuns,
-		SuccessRate:       successRate,
+		P50Ms:              percentile(latencies, 50),
+		P95Ms:              percentile(latencies, 95),
+		P99Ms:              percentile(latencies, 99),
+		AvgMs:              average(latencies),
+		HeapAllocBytes:     deltaUint64(memAfter.Alloc, memBefore.Alloc),
+		TotalAllocBytes:    deltaUint64(memAfter.TotalAlloc, memBefore.TotalAlloc),
+		CPUTimeMs:          cpuMs,
+		TotalInputTokens:   inputTokens,
+		TotalOutputTokens:  outputTokens,
+		EstCostUSD:         0, // pricing to be defined later
+		TotalRuns:          totalRuns,
+		SuccessRate:        successRate,
+		TotalMemoryRecalls: totalRecalls,
+		TotalMemoryStores:  totalStores,
 	}
 }
 

@@ -89,5 +89,9 @@ func formatTextReport(cfg *setup.Config, metrics *BenchmarkMetrics) string {
 	fmt.Fprintf(&b, "Output tokens    : %d\n", metrics.TotalOutputTokens)
 	fmt.Fprintf(&b, "Est. cost (USD)  : %.4f  # pricing placeholder\n", metrics.EstCostUSD)
 	fmt.Fprintf(&b, "Success rate (%%) : %.2f\n", metrics.SuccessRate)
+	if cfg.MemoryEnabled() {
+		fmt.Fprintf(&b, "Memory recalls   : %d\n", metrics.TotalMemoryRecalls)
+		fmt.Fprintf(&b, "Memory stores    : %d\n", metrics.TotalMemoryStores)
+	}
 	return b.String()
 }
@@ -12,11 +12,14 @@ import (
 	"github.com/agenticenv/agent-sdk-go/benchmarks/setup"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 	"github.com/agenticenv/agent-sdk-go/pkg/logger"
+	"github.com/agenticenv/agent-sdk-go/pkg/memory"
 )
 
 type runOutcome struct {
-	latencyMs float64
-	success   bool
+	latencyMs     float64
+	success       bool
+	memoryRecalls int64
+	memoryStores  int64
 }
 
 func runBenchmark(ctx context.Context, cfg *setup.Config, llm *setup.MockLLMClient, lgr logger.Logger, runRng *rand.Rand) (*BenchmarkMetrics, error) {
@@ -64,7 +67,7 @@ func runBenchmark(ctx context.Context, cfg *setup.Config, llm *setup.MockLLMClie
 			agentIdx := i % len(bundles)
 			go func(bundle *AgentBundle) {
 				defer wg.Done()
-				outcome := executeRun(ctx, bundle.Root, runRng)
+				outcome := executeRun(ctx, cfg, bundle.Root, runRng)
 				outcomesMu.Lock()
 				outcomes = append(outcomes, outcome)
 				outcomesMu.Unlock()
@@ -89,13 +92,22 @@ func runBenchmark(ctx context.Context, cfg *setup.Config, llm *setup.MockLLMClie
 	return aggregateMetrics(outcomes, memBefore, memAfter, cpuAfter-cpuBefore, inputTokens, outputTokens), nil
 }
 
-func executeRun(ctx context.Context, a *agent.Agent, rng *rand.Rand) runOutcome {
+func executeRun(ctx context.Context, cfg *setup.Config, a *agent.Agent, rng *rand.Rand) runOutcome {
+	runCtx := ctx
+	if cfg.MemoryEnabled() {
+		runCtx = memory.WithContextUserID(ctx, cfg.Memory.UserID)
+	}
 	start := time.Now()
-	_, err := a.Run(ctx, setup.RandomUserPrompt(rng), nil)
-	return runOutcome{
+	result, err := a.Run(runCtx, setup.RandomUserPrompt(rng), nil)
+	outcome := runOutcome{
 		latencyMs: float64(time.Since(start).Milliseconds()),
 		success:   err == nil,
 	}
+	if result != nil && result.Telemetry != nil {
+		outcome.memoryRecalls = result.Telemetry.Storage.TotalMemoryRecalls
+		outcome.memoryStores = result.Telemetry.Storage.TotalMemoryStores
+	}
+	return outcome
 }
 
 func processCPUTimeMs() (float64, error) {

@@ -6,17 +6,22 @@ import (
 	"path/filepath"
 	"strings"
 
+	testutil "github.com/agenticenv/agent-sdk-go/internal/testing"
+	"github.com/agenticenv/agent-sdk-go/pkg/agent"
+	"github.com/agenticenv/agent-sdk-go/pkg/memory"
 	"github.com/spf13/viper"
 )
 
 const BenchmarkTreeSeed int64 = 42
+const defaultMemoryUserID = "benchmark-user"
 
 type Config struct {
 	Runtime  string         `mapstructure:"runtime"`
 	Temporal TemporalConfig `mapstructure:"temporal"`
 	LLM      LLMConfig      `mapstructure:"llm"`
 	Tool     ToolConfig     `mapstructure:"tool"`
 	Agent    AgentConfig    `mapstructure:"agent"`
+	Memory   MemoryConfig   `mapstructure:"memory"`
 	Logger   LoggerConfig   `mapstructure:"logger"`
 	Output   OutputConfig   `mapstructure:"output"`
 }
@@ -58,6 +63,13 @@ type SubagentsConfig struct {
 	Levels int `mapstructure:"levels"`
 }
 
+// MemoryConfig configures long-term memory for benchmark runs.
+type MemoryConfig struct {
+	Enabled   bool   `mapstructure:"enabled"`
+	StoreMode string `mapstructure:"store_mode"`
+	UserID    string `mapstructure:"user_id"`
+}
+
 type LoggerConfig struct {
 	Enabled bool   `mapstructure:"enabled"`
 	Dir     string `mapstructure:"dir"`
@@ -79,6 +91,50 @@ func (c *Config) ExternalWorkersEnabled() bool {
 	return c.UseTemporal() && c.Temporal.WorkersCount > 0
 }
 
+// MemoryEnabled reports whether long-term memory is wired for benchmark runs.
+func (c *Config) MemoryEnabled() bool {
+	return c != nil && c.Memory.Enabled
+}
+
+func (m *MemoryConfig) applyDefaults() {
+	if m == nil {
+		return
+	}
+	if strings.TrimSpace(m.UserID) == "" {
+		m.UserID = defaultMemoryUserID
+	}
+	if strings.TrimSpace(m.StoreMode) == "" {
+		m.StoreMode = string(memory.StoreModeOnDemand)
+	}
+}
+
+func parseMemoryStoreMode(raw string) (memory.StoreMode, error) {
+	switch strings.ToLower(strings.TrimSpace(raw)) {
+	case "", string(memory.StoreModeOnDemand), "on-demand", "on_demand":
+		return memory.StoreModeOnDemand, nil
+	case string(memory.StoreModeAlways):
+		return memory.StoreModeAlways, nil
+	default:
+		return "", fmt.Errorf("memory.store_mode must be %q or %q", memory.StoreModeOnDemand, memory.StoreModeAlways)
+	}
+}
+
+// MemoryAgentOption returns WithMemory when memory is enabled.
+func MemoryAgentOption(cfg *Config) (agent.Option, error) {
+	if cfg == nil || !cfg.MemoryEnabled() {
+		return nil, nil
+	}
+	cfg.Memory.applyDefaults()
+	mode, err := parseMemoryStoreMode(cfg.Memory.StoreMode)
+	if err != nil {
+		return nil, err
+	}
+	memCfg := memory.DefaultConfig(testutil.NewInmemMemory())
+	memCfg.Store.Mode = mode
+	memCfg.Recall.Enabled = true
+	return agent.WithMemory(memCfg), nil
+}
+
 func LoadConfig(path string) (*Config, error) {
 	if path == "" {
 		path = defaultConfigPath()
@@ -106,8 +162,8 @@ func (c *Config) validate() error {
 	if c.Agent.Concurrent && c.Agent.ConcurrentCount <= 0 {
 		return fmt.Errorf("agent.concurrent_count must be > 0 when concurrent is true")
 	}
-	if c.Agent.Tools.Count <= 0 {
-		return fmt.Errorf("agent.tools.count must be > 0")
+	if c.Agent.Tools.Count <= 0 && !c.Memory.Enabled {
+		return fmt.Errorf("agent.tools.count must be > 0 when memory is disabled")
 	}
 	if c.Agent.Subagents.Levels < 0 {
 		return fmt.Errorf("agent.subagents.levels must be >= 0")
@@ -148,6 +204,12 @@ func (c *Config) validate() error {
 	if c.Temporal.Namespace == "" {
 		c.Temporal.Namespace = "default"
 	}
+	c.Memory.applyDefaults()
+	if c.Memory.Enabled {
+		if _, err := parseMemoryStoreMode(c.Memory.StoreMode); err != nil {
+			return err
+		}
+	}
 	return nil
 }
 

@@ -9,11 +9,14 @@ import (
 	"time"
 
 	"github.com/agenticenv/agent-sdk-go/internal/runtime"
+	"github.com/agenticenv/agent-sdk-go/internal/types"
 	"github.com/agenticenv/agent-sdk-go/pkg/interfaces"
 )
 
 const MockLLMModel = "benchmark-mock"
 
+const mockMemoryExtractText = "User prefers concise answers"
+
 type LLMStats struct {
 	mu                sync.Mutex
 	TotalInputTokens  int
@@ -64,6 +67,17 @@ func (m *MockLLMClient) Generate(ctx context.Context, request *interfaces.LLMReq
 	promptTokens, completionTokens := splitMockTokens(m.cfg.MockTokens)
 	m.stats.add(promptTokens, completionTokens)
 
+	if isMemoryExtractRequest(request) {
+		return &interfaces.LLMResponse{
+			Content: fmt.Sprintf(`{"memories":[{"text":%q,"kind":"preference"}]}`, mockMemoryExtractText),
+			Usage: &interfaces.LLMUsage{
+				PromptTokens:     int64(promptTokens),
+				CompletionTokens: int64(completionTokens),
+				TotalTokens:      int64(promptTokens + completionTokens),
+			},
+		}, nil
+	}
+
 	if hasToolResultMessages(request) {
 		return &interfaces.LLMResponse{
 			Content: "benchmark complete",
@@ -149,12 +163,26 @@ func hasToolResultMessages(request *interfaces.LLMRequest) bool {
 }
 
 func mockToolArgs(toolName string) map[string]any {
+	if toolName == types.SaveMemoryToolName {
+		return map[string]any{
+			types.MemoryToolParamText: mockMemoryExtractText,
+			types.MemoryToolParamKind: "preference",
+		}
+	}
 	if strings.HasPrefix(toolName, "subagent_") {
 		return map[string]any{runtime.SubAgentToolParamQuery: "benchmark subtask"}
 	}
 	return map[string]any{"input": "benchmark"}
 }
 
+func isMemoryExtractRequest(request *interfaces.LLMRequest) bool {
+	if request == nil || request.ResponseFormat == nil {
+		return false
+	}
+	return request.ResponseFormat.Type == interfaces.ResponseFormatJSON &&
+		request.ResponseFormat.Name == "MemoryExtraction"
+}
+
 func splitMockTokens(total int) (prompt, completion int) {
 	if total <= 0 {
 		return 0, 0