makeitworkok · makeitworkok · May 31, 2026 · May 25, 2026
diff --git a/README.md b/README.md
@@ -183,6 +183,35 @@ Recommended maintenance workflow:
 
 ---
 
+## Packaging SQLite For Executables
+
+For novice-friendly installs, keep SQLite writable and outside the executable bundle:
+
+* Runtime DB path is under per-user app data (for example `%APPDATA%/nMCP-client/memory/memory.sqlite` on Windows).
+* On first run, the app bootstraps schema automatically.
+* If a bundled seed DB exists at `assets/memory_seed.sqlite`, it is copied to the writable runtime path before schema checks.
+
+This design works for both one-file and one-folder builds and avoids write failures inside bundled executables.
+
+### Windows Build Script
+
+Use `scripts/build_windows.ps1`:
+
+```powershell
+# One-folder build (recommended for field deployments)
+powershell -ExecutionPolicy Bypass -File scripts/build_windows.ps1 -Mode onedir
+
+# One-file build
+powershell -ExecutionPolicy Bypass -File scripts/build_windows.ps1 -Mode onefile
+```
+
+The script automatically includes:
+
+* `.private/Candy` memory guidance docs
+* `assets/memory_seed.sqlite` when present
+
+---
+
 ## License
 
 MIT

diff --git a/config.py b/config.py
@@ -41,6 +41,16 @@ def _default_config_path() -> Path:
 CONFIG_PATH = _default_config_path()
 
 
+def _default_memory_root() -> str:
+    """Return the default local memory root directory."""
+    return str(CONFIG_PATH.parent / "memory")
+
+
+def _default_candy_docs_dir() -> str:
+    """Return the default Candy docs directory in the repository."""
+    return str((Path(__file__).resolve().parent / ".private" / "Candy").resolve())
+
+
 class ConnectionConfig(BaseModel):
     mcp_url: str = Field(
         default_factory=lambda: os.getenv("MCP_SERVER_URL", "http://localhost:8000/mcp")
@@ -68,9 +78,17 @@ class LLMConfig(BaseModel):
     base_url: str = ""  # optional override (e.g. local proxy)
 
 
+class MemoryConfig(BaseModel):
+    enabled: bool = True
+    prompt_token_budget: int = 1400
+    memory_root: str = Field(default_factory=_default_memory_root)
+    candy_docs_dir: str = Field(default_factory=_default_candy_docs_dir)
+
+
 class AppConfig(BaseModel):
     connection: ConnectionConfig = Field(default_factory=ConnectionConfig)
     llm: LLMConfig = Field(default_factory=LLMConfig)
+    memory: MemoryConfig = Field(default_factory=MemoryConfig)
 
 
 def load_config() -> AppConfig:

diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1
@@ -0,0 +1,47 @@
+param(
+    [ValidateSet("onedir", "onefile")]
+    [string]$Mode = "onedir",
+
+    [string]$AppName = "nMCP-client"
+)
+
+$ErrorActionPreference = "Stop"
+
+$root = Split-Path -Parent $PSScriptRoot
+Set-Location $root
+
+$python = Join-Path $root ".venv\Scripts\python.exe"
+if (-not (Test-Path $python)) {
+    $python = "python"
+}
+
+$distPath = if ($Mode -eq "onefile") { "dist_onefile" } else { "dist_release" }
+$workPath = if ($Mode -eq "onefile") { "build_onefile" } else { "build_release" }
+
+$commonArgs = @(
+    "-m", "PyInstaller",
+    "--noconfirm",
+    "--windowed",
+    "--name", $AppName,
+    "--distpath", $distPath,
+    "--workpath", $workPath,
+    "--add-data", ".private/Candy;.private/Candy"
+)
+
+# Optional seed database for memory bootstrap.
+if (Test-Path "assets\memory_seed.sqlite") {
+    $commonArgs += @("--add-data", "assets/memory_seed.sqlite;assets")
+}
+
+if ($Mode -eq "onefile") {
+    $commonArgs += "--onefile"
+}
+
+$commonArgs += "main.py"
+
+Write-Host "Building $AppName ($Mode)..."
+Write-Host "$python $($commonArgs -join ' ')"
+
+& $python @commonArgs
+
+Write-Host "Build complete. Output folder: $distPath"
diff --git a/src/agent.py b/src/agent.py
@@ -10,7 +10,8 @@
 import asyncio
 import json
 import logging
-from typing import Any
+import re
+from typing import Any, Callable
 
 from PySide6.QtCore import QObject, Signal
 
@@ -65,6 +66,8 @@
 )
 
 _MAX_ITERATIONS = 20  # safety cap to prevent infinite loops
+_LLM_RATE_LIMIT_MAX_RETRIES = 3
+_MAX_TOOL_RESULT_CHARS = 8000
 _WIRESHEET_OPERATION_TYPES = {
     "createComponent",
     "setSlot",
@@ -99,12 +102,16 @@ def __init__(
         planning_mode: bool = False,
         writes_permitted: bool = True,
         strict_paths: bool = True,
+        memory_context: str = "",
+        tool_observer: Callable[[str, dict[str, Any], str], None] | None = None,
     ) -> None:
         self._mcp = mcp_client
         self._llm = llm_provider
         self._planning_mode = planning_mode
         self._writes_permitted = writes_permitted
         self._strict_paths = strict_paths
+        self._memory_context = memory_context.strip()
+        self._tool_observer = tool_observer
         self.signals = AgentSignals()
 
         self._loop: asyncio.AbstractEventLoop | None = None
@@ -132,6 +139,8 @@ async def run(self, user_message: str, tools: list[Any]) -> None:
         """Execute one user request end-to-end."""
         self._loop = asyncio.get_event_loop()
         system_prompt = _SYSTEM_PROMPT
+        if self._memory_context:
+            system_prompt += "\n\n" + self._memory_context
         if self._planning_mode:
             system_prompt += (
                 " You are currently in PLAN MODE. Do not execute tools. "
@@ -156,11 +165,35 @@ async def run(self, user_message: str, tools: list[Any]) -> None:
             self.signals.status_changed.emit("Thinking…")
             logger.debug("Agent iteration %d", iteration + 1)
 
-            try:
-                response = await self._llm.get_response(tools)
-            except Exception as exc:
-                self.signals.error_occurred.emit(f"LLM error: {exc}")
-                logger.exception("LLM error on iteration %d", iteration + 1)
+            response = None
+            for attempt in range(_LLM_RATE_LIMIT_MAX_RETRIES + 1):
+                try:
+                    response = await self._llm.get_response(tools)
+                    break
+                except Exception as exc:
+                    wait_seconds = _parse_rate_limit_wait_seconds(str(exc))
+                    is_last_attempt = attempt >= _LLM_RATE_LIMIT_MAX_RETRIES
+                    if wait_seconds is None or is_last_attempt:
+                        self.signals.error_occurred.emit(f"LLM error: {exc}")
+                        logger.exception("LLM error on iteration %d", iteration + 1)
+                        return
+
+                    wait_seconds = min(max(wait_seconds, 0.5), 12.0)
+                    self.signals.status_changed.emit(
+                        f"Rate limited by provider; retrying in {wait_seconds:.1f}s…"
+                    )
+                    logger.warning(
+                        "Rate limited on iteration %d, attempt %d/%d. Retrying in %.2fs",
+                        iteration + 1,
+                        attempt + 1,
+                        _LLM_RATE_LIMIT_MAX_RETRIES + 1,
+                        wait_seconds,
+                    )
+                    await asyncio.sleep(wait_seconds)
+
+            if response is None:
+                self.signals.error_occurred.emit("LLM error: no response returned.")
+                self.signals.status_changed.emit("Ready")
                 return
 
             # Emit any intermediate text alongside tool calls
@@ -220,12 +253,23 @@ async def _execute_tool(self, tc: ToolCall) -> str:
             raw_result = await self._mcp.call_tool(tc.name, tc.arguments)
             result_text = _format_tool_result(raw_result)
             result_text = _augment_path_error(result_text, tc.arguments)
+            result_text = _balance_tool_result_text(result_text)
+            if self._tool_observer:
+                try:
+                    self._tool_observer(tc.name, tc.arguments, result_text)
+                except Exception as obs_exc:
+                    logger.warning("Tool observer failed for %s: %s", tc.name, obs_exc)
             preview = result_text[:300] + ("…" if len(result_text) > 300 else "")
             self.signals.tool_executed.emit(tc.name, preview)
             logger.info("Tool %s → %s", tc.name, result_text[:500])
             return result_text
         except Exception as exc:
             error = f"Tool execution error: {exc}"
+            if self._tool_observer:
+                try:
+                    self._tool_observer(tc.name, tc.arguments, error)
+                except Exception as obs_exc:
+                    logger.warning("Tool observer failed for %s: %s", tc.name, obs_exc)
             self.signals.status_changed.emit("Ready")
             self.signals.error_occurred.emit(error)
             logger.exception("Tool %s failed", tc.name)
@@ -266,6 +310,45 @@ def _format_tool_result(result: Any) -> str:
     "station:|slot:/",
 ]
 
+_RATE_LIMIT_WAIT_PATTERN = re.compile(r"try again in\s+([0-9]+(?:\.[0-9]+)?)s", re.IGNORECASE)
+
+
+def _parse_rate_limit_wait_seconds(error_text: str) -> float | None:
+    """Extract provider-suggested retry delay (seconds) from a rate-limit message."""
+    lowered = error_text.lower()
+    if "rate limit" not in lowered and "429" not in lowered:
+        return None
+
+    match = _RATE_LIMIT_WAIT_PATTERN.search(error_text)
+    if not match:
+        return 2.0
+
+    try:
+        return float(match.group(1))
+    except ValueError:
+        return 2.0
+
+
+def _balance_tool_result_text(result_text: str) -> str:
+    """Cap tool result size before feeding it back to the LLM to reduce TPM spikes."""
+    if len(result_text) <= _MAX_TOOL_RESULT_CHARS:
+        return result_text
+
+    head_len = 5000
+    tail_len = 2200
+    omitted = len(result_text) - head_len - tail_len
+    if omitted < 0:
+        omitted = len(result_text) - _MAX_TOOL_RESULT_CHARS
+
+    head = result_text[:head_len].rstrip()
+    tail = result_text[-tail_len:].lstrip()
+    return (
+        f"{head}\n\n"
+        f"[TRUNCATED TOOL RESULT: omitted {omitted} characters to control token usage. "
+        f"If you need more detail, call the tool again with tighter filters/limits.]\n\n"
+        f"{tail}"
+    )
+
 
 def _augment_path_error(result_text: str, tool_args: dict[str, Any] | None = None) -> str:
     """Append a discovery hint when the server returns a path-not-allowlisted error."""

diff --git a/src/memory/__init__.py b/src/memory/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) 2026 Chris Favre. All rights reserved.
+"""Memory package for local context and station profile persistence."""
+
+from src.memory.manager import MemoryHealthSnapshot, MemoryManager
+
+__all__ = ["MemoryManager", "MemoryHealthSnapshot"]