diff --git a/README.md b/README.md index 7f29547814..fc5e1f57cf 100644 --- a/README.md +++ b/README.md @@ -1,254 +1,145 @@ -

- VS Code Marketplace - X - YouTube - Join Discord - Join r/ZooCode - GitHub Issues -

-

- Get help fast → Join Discord • Prefer async? → Join r/ZooCode -

- -# Zoo Code - -> Your AI-Powered Dev Team, Right in Your Editor - -## We are Zoo Code - -> You may have seen the -> [recent announcement](https://x.com/mattrubens/status/2046636598859559114) -> from the Roo team 🦘🦘🦘. The TLDR is the team is winding down active Roo -> Code development as they focus on [Roomote](https://roomote.dev/). That news -> was difficult for many Roo users, this plugin means a lot to this community. -> -> We want to thank the entire Roo team for the work they put into this plugin. -> We won't call out each person by name here, but we can all agree they are -> exceptional developers and, just as importantly, incredible people. Thank you -> to the Roo team. -> -> As Roo coders, we come in all kinds of shapes and sizes. Some of us are using -> it professionally in our day-to-day work, some are using it to tinker and -> scheme unimaginably complicated workflows. Some are using it to improve Roo -> itself while others are using it to improve the very models that Roo is using -> (super meta). The point we are making is that the community is -> diverse, and although a kangaroo 🦘🦘🦘 is a distinguished and noble animal, -> we felt a "Zoo" 🐘🦡🦒🦓🦛🦧🦭🦦 of different species better reflected this -> diversity of the plugin's users. -> -> So we would like to announce that **Zoo Code** will continue development on -> this important project. The core team is a group of developers who contributed -> to Roo previously and care deeply about this plugin. We will continue to make -> model updates, fix bugs, and release features. But more than anything, we plan -> to listen to the community that made this plugin so special. Feel free to join -> us on [Discord](https://discord.gg/VxfP4Vx3gX), -> [Reddit](https://www.reddit.com/r/ZooCode), or -> [open a PR or issue](https://github.com/Zoo-Code-Org/Zoo-Code), and above all, -> please stay involved, connected, and active as a community. -> -> _-Zoo Code Team_ - -## Roo Code to Zoo Code migration - -You can find a quick guide for migrating from Roo Code to Zoo Code in the [Roo→Zoo migration guide](https://docs.zoocode.dev/roo-to-zoo-migration). We plan to try and help users as they transition over, we have our [Reddit](https://www.reddit.com/r/ZooCode) and [Discord](https://discord.gg/VxfP4Vx3gX) -for this exact support, so if you are having problems or if you have question, jump on and ask. - -## What's New in v3.55.0 - -**Zoo Code's first feature release** builds on the marketplace handoff with a -new provider, the upstream sunset merge, and a round of user-facing fixes -across chat, provider settings, and rendering. - -- Add Xiaomi MiMo as a first-class API provider -- Pull in the upstream Roo Code sunset merge and related platform updates -- Fix MCP sign-in copy, Gemini full-tool requests, and OpenAI temperature - handling for unsupported models -- Fix Markdown single-tilde rendering and diagnostics temp-file naming -- Improve provider defaults and region coverage for Z.AI, GLM, and Vertex AI - -
- 🌐 Available languages - -- [English](README.md) -- [Català](locales/ca/README.md) -- [Deutsch](locales/de/README.md) -- [Español](locales/es/README.md) -- [Français](locales/fr/README.md) -- [हिंदी](locales/hi/README.md) -- [Bahasa Indonesia](locales/id/README.md) -- [Italiano](locales/it/README.md) -- [日本語](locales/ja/README.md) -- [한국어](locales/ko/README.md) -- [Nederlands](locales/nl/README.md) -- [Polski](locales/pl/README.md) -- [Português (BR)](locales/pt-BR/README.md) -- [Русский](locales/ru/README.md) -- [Türkçe](locales/tr/README.md) -- [Tiếng Việt](locales/vi/README.md) -- [简体中文](locales/zh-CN/README.md) -- [繁體中文](locales/zh-TW/README.md) -- ... -
+## Poo Code ---- - -## What Can Zoo Code Do For YOU? - -- Generate Code from natural language descriptions and specs -- Adapt with Modes: Code, Architect, Ask, Debug, and Custom Modes -- Refactor & Debug existing code -- Write & Update documentation -- Answer Questions about your codebase -- Automate repetitive tasks -- Utilize MCP Servers - -## Modes - -Zoo Code adapts to how you work: - -- Code Mode: everyday coding, edits, and file ops -- Architect Mode: plan systems, specs, and migrations -- Ask Mode: fast answers, explanations, and docs -- Debug Mode: trace issues, add logs, isolate root causes -- Custom Modes: build specialized modes for your team or workflow - -Learn more: [Using Modes](https://docs.zoocode.dev/basic-usage/using-modes) • -[Custom Modes](https://docs.zoocode.dev/advanced-usage/custom-modes) - -## Tutorial & Feature Videos - -
+Poo-Code is a fork of Zoo-Code which is a fork of Roo-Code which is a fork of Cline. I named it "Poo" because I don't know if it will work or not. In other words, it can either be total sh\*t or become organic fertilizer that will take legacy "spaghetti code" and "crap architectures," breaks them down, and uses full AI automation to fertilize it into beautifully optimized, blooming software to flush out bad code so your codebase can grow. +(The truth is I am too lazy to chunk it into smaller commits — see PR [#252](https://github.com/Zoo-Code-Org/Zoo-Code/pull/252) for the full pile) -| | | | -| :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| Installing the Extension
Installing the Extension | Configuring Profiles
Configuring Profiles | Codebase Indexing
Codebase Indexing | -| Custom Modes
Custom Modes | Checkpoints
Checkpoints | Context Management
Context Management | - -
-

-More quick tutorial and feature videos... -

- -## Resources - -- **[Documentation](https://docs.zoocode.dev):** The official guide to - installing, configuring, and mastering Zoo Code. -- **[YouTube Channel](https://youtube.com/@roocodeyt?feature=shared):** Watch - tutorials and see features in action. -- **[Discord Server](https://discord.gg/VxfP4Vx3gX):** Join the community for - real-time help and discussion. -- **[Reddit Community](https://www.reddit.com/r/ZooCode/):** Share your - experiences and see what others are building. -- **[GitHub Issues](https://github.com/Zoo-Code-Org/Zoo-Code/issues):** Report - bugs and track development. -- **[Feature Requests](https://github.com/Zoo-Code-Org/Zoo-Code/discussions/categories/feature-requests?discussions_q=is%3Aopen+category%3A%22Feature+Requests%22+sort%3Atop):** - Have an idea? Share it with the developers. +> **⚠ EXPERIMENTAL** — This fork adds a full self-improving AI layer on top of Zoo-Code. All new features are gated behind experiment toggles. Enable at your own risk. PR [#252](https://github.com/Zoo-Code-Org/Zoo-Code/pull/252) contains the complete diff. --- -## Local Setup & Development +## The Problem + +1. I can't sleep well because of anxiety due to the wrong decisions it made by always selecting the first choice as the answer. +2. It ruined my morning because when I woke up I found it having an unauthorized day off during a busy day (silently stuck because of an error). + +The ultimate goal is to totally replace you, so you can be permanently "Ooo" (Out of Office) and jobless like I am. + +## What's different from Zoo-Code + +This fork adds **~10,500 lines** of self-improving infrastructure across **45 files** (25 source + 20 test), all behind experiment toggles. Every new feature is gated — Zoo-Code main's behaviour is preserved with everything off. + +| Feature | Poo-Code (this branch) | Zoo-Code main | +| -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------ | +| **Self-improving loop** | `SelfImprovingManager` — background review pass every N turns/tool calls. Learns from mistakes, curates skills, suggests optimizations. | ❌ No automated self-review | +| **Pattern analysis** | `PatternAnalyzer` — detects recurring tool-use patterns, error signatures, and skill gaps from execution history. | ❌ No pattern detection | +| **Curator service** | `CuratorService` — tar.gz-backed skill store (backup/rollback). Decides when to create/update/merge skills from learned patterns. | ❌ Manual skill authoring only | +| **Skill automation** | `ActionExecutor` + `ImprovementApplier` — auto-creates and updates skills from reviewed patterns. | ❌ No auto skill creation | +| **Insights engine** | `InsightsEngine` — generates project-level insights (dead code, stale configs, architecture notes). | ❌ No project insights | +| **Resilience** | `ResilienceService` — streaming backoff, tool error healer, auto-retry with learned recovery strategies. | ❌ Basic retry only | +| **Question evaluation** | `QuestionEvaluatorService` — evaluates user questions for clarity/completeness; auto-selects best answer when choices are offered. | ❌ Always picks first choice | +| **Trust service** | `TrustService` — learns tool-approval patterns over time. Full-trust mode auto-approves known-safe tools. | ❌ Static auto-approval rules | +| **Review team** | `ReviewTeamService` — multi-agent review (innovator + critic + decider) scores every learned pattern before applying it. | ❌ No pre-apply validation | +| **Agent memory** | `AgentMemoryAdapter` + `MemoryStore` + `MemoryBackendFactory` — pluggable memory backend (SQLite default, configurable). Bounded context injection via `memory.ts` types. | ❌ No persistent agent memory | +| **Learning store** | `LearningStore` — stores/retrieves learned patterns with confidence scoring. Schema-versioned for forward compat. | ❌ No learning storage | +| **Transcript recall** | `TranscriptRecall` — retrieves past conversation context for pattern learning. | ❌ No historical context | +| **Skill usage tracking** | `SkillUsageStore` — tracks which skills fire, success rate, frequency. Feeds curator decisions. | ❌ No usage metrics | +| **Auto-mode orchestrator** | `AutoModeOrchestrator` — automatically switches between VS Code modes based on task type. | ❌ Manual mode switching | +| **Mode factory** | `ModeFactoryService` — generates custom modes from learned workflows. | ❌ Fixed mode set | +| **Experiment toggles** | 6 new experiment IDs: `selfImproving`, `selfImprovingAutoSkills`, `selfImprovingAutoMode`, `selfImprovingReviewTeam`, `selfImprovingFullTrust`, `selfImprovingQuestionEvaluation` | ❌ None of these exist | +| **ONE-SHOT Orchestrator** | Autonomous 8-phase sequential build agent — handles entire projects from requirements to verification in a single pass | ❌ No equivalent | +| **KAIZEN Orchestrator** | Continuous improvement agent with 7-step iteration loop (Analyze → Identify → Fix → Verify → Enhance → Git Push → Re-evaluate) and self-evolving mini-goals | ❌ No equivalent | +| **Proactive Error Prevention** | Pre-execution tool call validation, structured error classification (12 categories), cascading failure detection, and prevention hint injection — catches errors BEFORE they happen | ❌ No equivalent | +| **Git Auto-Push** | KAIZEN mode auto-commits and pushes every cycle, enabling CI/CD pipelines to apply changes to staging/production automatically | ❌ No equivalent | +| **Self-Evolving Mini-Goals** | Mini-goals automatically evolve upward as each is achieved, with healing that reverts to fixing regressions first | ❌ No equivalent | +| **Full UI Coverage** | All 27 self-improving services now have toggles, config panels, or status displays in the settings UI | ❌ No equivalent | -1. **Clone** the repo: +### Experiment gate reference -```sh -git clone https://github.com/Zoo-Code-Org/Zoo-Code.git -``` +| Toggle | Enables | +| --------------------------------- | ------------------------------------------------------------ | +| `selfImproving` | Master switch — enables the entire learning loop | +| `selfImprovingAutoSkills` | Auto-create/update/merge skills from learned patterns | +| `selfImprovingAutoMode` | Auto-switch VS Code modes based on task | +| `selfImprovingReviewTeam` | Multi-agent review before applying learned patterns | +| `selfImprovingFullTrust` | Auto-approve tools that TrustService considers safe | +| `selfImprovingQuestionEvaluation` | Evaluate user questions for clarity; auto-select best answer | +| `oneShotOrchestrator` | Enable ONE-SHOT Orchestrator mode for autonomous project builds | +| `kaizenOrchestrator` | Enable KAIZEN Orchestrator mode for continuous improvement | +| `proactiveErrorPrevention` | Enable pre-execution tool call validation and cascade detection | +| `gitAutoPush` | Enable auto-commit and push in KAIZEN mode | +| `selfEvolvingMiniGoals` | Enable self-evolving mini-goals with regression healing | +| `fullUICoverage` | Enable full UI coverage for all self-improving services | -2. **Install dependencies**: +## Use Case Examples -```sh -pnpm install -``` +### Example 1: One-Shot Project Build -3. **Run the extension**: +**Scenario:** You need to build a complete REST API server from scratch. -There are several ways to run the Zoo Code extension: +1. Switch to **ONE-SHOT Orchestrator** mode +2. Describe your requirements: "Build a FastAPI REST API with PostgreSQL backend, JWT auth, and CRUD endpoints for users and products" +3. The agent autonomously executes 8 phases: + - Requirements analysis → Architecture design → Project scaffolding → Core implementation → Integration → Testing → Bug fixing → Verification +4. Result: A fully tested, production-ready API server with zero manual intervention -### Development Mode (F5) +### Example 2: Continuous Codebase Improvement -For active development, use VSCode's built-in debugging: +**Scenario:** You have an existing codebase with technical debt and want continuous improvement. -Press `F5` (or go to **Run** → **Start Debugging**) in VSCode. This will open a -new VSCode window with the Zoo Code extension running. +1. Switch to **KAIZEN Orchestrator** mode +2. Set your initial mini-goal: "Fix all TypeScript strict mode errors" +3. The agent enters the Kaizen loop: + - **Cycle 1**: Analyzes errors → Fixes 3 type errors → Runs tests → Git push → Re-evaluates + - **Cycle 2**: Fixes 5 more errors → Runs tests → Git push → Evolves mini-goal + - **Cycle N**: Continues until mini-goal achieved, then evolves upward +4. Each cycle is one atomic change, verified, and pushed to CI/CD +5. Result: Continuous, safe improvement without regressions -- Changes to the webview will appear immediately. -- Changes to the core extension will also hot reload automatically. +### Example 3: Proactive Error Prevention -### Automated VSIX Installation +**Scenario:** You're working on a large codebase and the model keeps hitting tool errors. -To build and install the extension as a VSIX package directly into VSCode: +1. Enable **Prevention Engine** and **Cascade Tracker** in Experimental Settings +2. Before each tool call, the system validates parameters: + - `read_file` with directory path → warns to use `list_files` instead + - `list_files recursive=true` without ripgrep → suggests `find`/`ls` fallback + - Long `execute_command` → warns about shell limits +3. After errors, the system classifies them and tracks cascading failures +4. If 2+ errors occur within 30 seconds, a cascade warning is injected suggesting an approach change +5. Result: Fewer wasted tool calls, faster task completion, lower API costs -```sh -pnpm install:vsix [-y] [--editor=] -``` +### Example 4: Self-Healing Production Deployment -This command will: +**Scenario:** A production deployment has regressions and needs immediate attention. -- Ask which editor command to use (code/cursor/code-insiders) - defaults to - 'code' -- Uninstall any existing version of the extension. -- Build the latest VSIX package. -- Install the newly built VSIX. -- Prompt you to restart VS Code for changes to take effect. +1. Switch to **KAIZEN Orchestrator** mode +2. The agent analyzes logs and test results +3. Detects regressions → mini-goal auto-reverts to fixing those first +4. Each fix is verified, committed with `kaizen: fix regression in X`, and pushed +5. CI/CD pipeline auto-deploys each fix to staging/production +6. Once regressions are resolved, mini-goal evolves upward to the next improvement +7. Result: Self-healing deployment with zero manual intervention -Options: +## Statistic -- `-y`: Skip all confirmation prompts and use defaults -- `--editor=`: Specify the editor command (e.g., `--editor=cursor` or - `--editor=code-insiders`) +Projects generated: Countless -### Manual VSIX Installation +Monthly cost: LLM & electric bills -If you prefer to install the VSIX package manually: +Non-refundable cost: My soul -1. First, build the VSIX package: - ```sh - pnpm vsix - ``` -2. A `.vsix` file will be generated in the `bin/` directory (e.g., - `bin/zoo-code-.vsix`). -3. Install it manually using the VSCode CLI: - ```sh - code --install-extension bin/zoo-code-.vsix - ``` +Revenue generated so far: 0 and still counting zero ---- +## Special Messages -We use [changesets](https://github.com/changesets/changesets) for versioning and -publishing. Check our `CHANGELOG.md` for release notes. +Don't star this repo. It will just get me excited to drag you into the jobless community ---- +Any issue not related to self-learning, submit at https://github.com/Zoo-Code-Org/Zoo-Code/issues as they know more than me (no cap) -## Disclaimer +## FAQ -**Please note** that Zoo Code does **not** make any representations or -warranties regarding any code, models, or other tools provided or made available -in connection with Zoo Code, any associated third-party tools, or any resulting -outputs. You assume **all risks** associated with the use of any such tools or -outputs; such tools are provided on an **"AS IS"** and **"AS AVAILABLE"** basis. -Such risks may include, without limitation, intellectual property infringement, -cyber vulnerabilities or attacks, bias, inaccuracies, errors, defects, viruses, -downtime, property loss or damage, and/or personal injury. You are solely -responsible for your use of any such tools or outputs (including, without -limitation, the legality, appropriateness, and results thereof). +**Q:** What is your day job? ---- - -## Contributing +**A:** Jobless -We love community contributions! Get started by reading our -[CONTRIBUTING.md](CONTRIBUTING.md). +**Q:** What is your night job? ---- +**A:** Sleep -## License +**Q:** Ooo... Can I buy you coffee? -[Apache 2.0 © 2026 Zoo Code Org](./LICENSE) +**A:** No. I have insomnia. ---- +**Q:** Can I...? -**Enjoy Zoo Code!** Whether you keep it on a short leash or let it roam -autonomously, we can’t wait to see what you build. If you have questions or -feature ideas, drop by our [Reddit community](https://www.reddit.com/r/ZooCode/) -or [Discord](https://discord.gg/VxfP4Vx3gX), or open an -[issue](https://github.com/Zoo-Code-Org/Zoo-Code/issues). Happy coding! +**A:** This is end of conversation. diff --git a/packages/types/src/__tests__/learning-memory.test.ts b/packages/types/src/__tests__/learning-memory.test.ts new file mode 100644 index 0000000000..6cea0de8ff --- /dev/null +++ b/packages/types/src/__tests__/learning-memory.test.ts @@ -0,0 +1,71 @@ +// npx vitest run packages/types/src/__tests__/learning-memory.test.ts + +import { + DEFAULT_LEARNING_CONFIG, + EMPTY_LEARNING_STATE, + learningConfigSchema, + learningStateSchema, + memoryContextSchema, + type LearningState, + type MemoryContext, +} from "../index.js" + +describe("learning types", () => { + it("exports the default learning config", () => { + expect(DEFAULT_LEARNING_CONFIG).toMatchObject({ + enabled: false, + reviewOnTurnCount: 10, + reviewOnToolIterationCount: 50, + }) + }) + + it("parses the empty learning state", () => { + const result = learningStateSchema.safeParse(EMPTY_LEARNING_STATE) + + expect(result.success).toBe(true) + expect(result.data).toEqual(EMPTY_LEARNING_STATE) + }) + + it("applies learning config defaults", () => { + const result = learningConfigSchema.parse({}) + + expect(result).toEqual(DEFAULT_LEARNING_CONFIG) + }) + + it("preserves TypeScript inference for learning state", () => { + const state: LearningState = EMPTY_LEARNING_STATE + + expect(state.version).toBe(1) + }) +}) + +describe("memory types", () => { + it("parses a valid memory context", () => { + const input: MemoryContext = { + entries: [], + revision: 0, + generatedAt: Date.now(), + } + + const result = memoryContextSchema.safeParse(input) + + expect(result.success).toBe(true) + expect(result.data).toEqual(input) + }) + + it("rejects more than ten memory entries", () => { + const result = memoryContextSchema.safeParse({ + entries: Array.from({ length: 11 }, (_, index) => ({ + id: `entry-${index}`, + content: "memory", + source: "learning", + createdAt: index, + updatedAt: index, + })), + revision: 0, + generatedAt: Date.now(), + }) + + expect(result.success).toBe(false) + }) +}) diff --git a/packages/types/src/experiment.ts b/packages/types/src/experiment.ts index d7eb0b03d6..7f2d2b5ea6 100644 --- a/packages/types/src/experiment.ts +++ b/packages/types/src/experiment.ts @@ -6,7 +6,33 @@ import type { Keys, Equals, AssertEqual } from "./type-fu.js" * ExperimentId */ -export const experimentIds = ["preventFocusDisruption", "imageGeneration", "runSlashCommand", "customTools"] as const +export const experimentIds = [ + "preventFocusDisruption", + "imageGeneration", + "runSlashCommand", + "customTools", + "selfImproving", + "selfImprovingAutoSkills", + "selfImprovingAutoMode", + "selfImprovingReviewTeam", + "selfImprovingFullTrust", + "selfImprovingQuestionEvaluation", + "selfImprovingPromptQuality", + "selfImprovingToolPreference", + "selfImprovingSkillMerge", + "selfImprovingPersistCounts", + "selfImprovingCodeIndex", + "oneShotOrchestrator", + "kaizenOrchestrator", + "preventionEngine", + "cascadeTracker", + "resilienceService", + "toolErrorHealer", + "verificationEngine", + "requirementsVerification", + "recoveryContext", + "selfImprovingSpecializedSkills", +] as const export const experimentIdsSchema = z.enum(experimentIds) @@ -21,6 +47,27 @@ export const experimentsSchema = z.object({ imageGeneration: z.boolean().optional(), runSlashCommand: z.boolean().optional(), customTools: z.boolean().optional(), + selfImproving: z.boolean().optional(), + selfImprovingAutoSkills: z.boolean().optional(), + selfImprovingAutoMode: z.boolean().optional(), + selfImprovingReviewTeam: z.boolean().optional(), + selfImprovingFullTrust: z.boolean().optional(), + selfImprovingQuestionEvaluation: z.boolean().optional(), + selfImprovingPromptQuality: z.boolean().optional(), + selfImprovingToolPreference: z.boolean().optional(), + selfImprovingSkillMerge: z.boolean().optional(), + selfImprovingPersistCounts: z.boolean().optional(), + selfImprovingCodeIndex: z.boolean().optional(), + oneShotOrchestrator: z.boolean().optional(), + kaizenOrchestrator: z.boolean().optional(), + preventionEngine: z.boolean().optional(), + cascadeTracker: z.boolean().optional(), + resilienceService: z.boolean().optional(), + toolErrorHealer: z.boolean().optional(), + verificationEngine: z.boolean().optional(), + requirementsVerification: z.boolean().optional(), + recoveryContext: z.boolean().optional(), + selfImprovingSpecializedSkills: z.boolean().optional(), }) export type Experiments = z.infer diff --git a/packages/types/src/global-settings.ts b/packages/types/src/global-settings.ts index bf5c3f025d..206eb1272b 100644 --- a/packages/types/src/global-settings.ts +++ b/packages/types/src/global-settings.ts @@ -78,6 +78,10 @@ export const DEFAULT_CHECKPOINT_TIMEOUT_SECONDS = 15 * GlobalSettings */ +export const selfImprovingScopeSchema = z.enum(["workspace", "global"]) + +export type SelfImprovingScope = z.infer + export const globalSettingsSchema = z.object({ currentApiConfigName: z.string().optional(), listApiConfigMeta: z.array(providerSettingsEntrySchema).optional(), @@ -92,6 +96,20 @@ export const globalSettingsSchema = z.object({ imageGenerationProvider: z.enum(["openrouter"]).optional(), openRouterImageApiKey: z.string().optional(), openRouterImageGenerationSelectedModel: z.string().optional(), + memoryBackend: z.enum(["builtin", "agentmemory"]).optional(), + agentMemoryUrl: z.string().optional(), + selfImprovingScope: selfImprovingScopeSchema.optional(), + selfImprovingAutoSkillsScope: selfImprovingScopeSchema.optional(), + + // KAIZEN orchestrator configuration + kaizenFrequency: z.number().min(1).max(100).optional(), + kaizenMiniGoal: z.string().optional(), + kaizenLimit: z.number().min(1).max(1000).optional(), + + // KAIZEN git auto-push configuration + kaizenAutoPush: z.boolean().optional(), + kaizenRemoteName: z.string().optional(), + kaizenCommitTemplate: z.string().optional(), customCondensingPrompt: z.string().optional(), diff --git a/packages/types/src/index.ts b/packages/types/src/index.ts index ee4d485720..085a72998c 100644 --- a/packages/types/src/index.ts +++ b/packages/types/src/index.ts @@ -14,8 +14,11 @@ export * from "./global-settings.js" export * from "./history.js" export * from "./image-generation.js" export * from "./ipc.js" +export * from "./learning.js" +export * from "./marketplace.js" export * from "./mcp.js" export * from "./message.js" +export * from "./memory.js" export * from "./mode.js" export * from "./model.js" export * from "./provider-settings.js" diff --git a/packages/types/src/learning.ts b/packages/types/src/learning.ts new file mode 100644 index 0000000000..cbf6126fd4 --- /dev/null +++ b/packages/types/src/learning.ts @@ -0,0 +1,206 @@ +import { z } from "zod" + +/** + * FeedbackSignal - types of learning observations + */ +export const feedbackSignalSchema = z.enum([ + "USER_CORRECTION", + "TASK_SUCCESS", + "TASK_FAILURE", + "PATTERN_REPEAT", + "CODE_INDEX_HIT", + "PROMPT_QUALITY", + "TOOL_PREFERENCE", +]) + +export type FeedbackSignal = z.infer + +/** + * LearningConfig - configuration for the learning system + */ +export const learningConfigSchema = z.object({ + enabled: z.boolean().default(false), + reviewOnTurnCount: z.number().int().min(1).default(5), + reviewOnToolIterationCount: z.number().int().min(1).default(20), + reviewOnEveryTurn: z.boolean().default(false), + maxStoredPatterns: z.number().int().min(1).default(100), + maxStoredEvents: z.number().int().min(1).default(500), + maxPromptPatterns: z.number().int().min(1).default(5), + curatorEnabled: z.boolean().default(true), + curatorIntervalMs: z.number().int().min(60000).default(3600000), + staleAfterDays: z.number().int().min(1).default(14), + archiveAfterDays: z.number().int().min(1).default(60), + codeIndexCorrelationEnabled: z.boolean().default(true), +}) + +export type LearningConfig = z.infer + +export const DEFAULT_LEARNING_CONFIG: LearningConfig = { + enabled: true, + reviewOnTurnCount: 3, + reviewOnToolIterationCount: 10, + reviewOnEveryTurn: false, + maxStoredPatterns: 100, + maxStoredEvents: 500, + maxPromptPatterns: 5, + curatorEnabled: true, + curatorIntervalMs: 3_600_000, + staleAfterDays: 14, + archiveAfterDays: 60, + codeIndexCorrelationEnabled: true, +} + +/** + * LearningEvent - a single learning observation + */ +export const learningEventSchema = z.object({ + id: z.string(), + signal: feedbackSignalSchema, + timestamp: z.number(), + taskId: z.string().optional(), + workspacePath: z.string().optional(), + mode: z.string().optional(), + context: z.object({ + userTurnCount: z.number().optional(), + toolIterationCount: z.number().optional(), + toolNames: z.array(z.string()).optional(), + promptFingerprint: z.string().optional(), + errorKey: z.string().optional(), + codeIndex: z + .object({ + available: z.boolean(), + hits: z.number(), + topScore: z.number().optional(), + }) + .optional(), + }), + outcome: z.object({ + success: z.boolean().optional(), + corrected: z.boolean().optional(), + summary: z.string().optional(), + confidenceDelta: z.number().optional(), + }), +}) + +export type LearningEvent = z.infer + +/** + * PatternState - lifecycle state for learned patterns + */ +export const patternStateSchema = z.enum(["active", "stale", "archived"]) + +export type PatternState = z.infer + +/** + * PatternType - category of learned pattern + */ +export const patternTypeSchema = z.enum(["prompt", "tool", "error", "skill", "code-index"]) + +export type PatternType = z.infer + +/** + * LearnedPattern - a pattern extracted from learning events + */ +export const learnedPatternSchema = z.object({ + id: z.string(), + patternType: patternTypeSchema, + state: patternStateSchema, + summary: z.string(), + confidenceScore: z.number().min(0).max(1), + frequency: z.number().int().min(0), + successRate: z.number().min(0).max(1), + firstSeenAt: z.number(), + lastSeenAt: z.number(), + lastAppliedAt: z.number().optional(), + sourceSignals: z.array(feedbackSignalSchema), + context: z.object({ + toolNames: z.array(z.string()).optional(), + errorKeys: z.array(z.string()).optional(), + modes: z.array(z.string()).optional(), + workspacePaths: z.array(z.string()).optional(), + promptFingerprint: z.string().optional(), + }), +}) + +export type LearnedPattern = z.infer + +/** + * ActionType - types of improvement actions + */ +export const actionTypeSchema = z.enum([ + "PROMPT_ENRICHMENT", + "TOOL_PREFERENCE", + "ERROR_AVOIDANCE", + "SKILL_SUGGESTION", + "SKILL_CREATE", + "SKILL_UPDATE", + "SKILL_MERGE", + "SKILL_CREATE_FROM_SCRATCH", +]) + +export type ActionType = z.infer + +/** + * ImprovementAction - an action to apply based on learned patterns + */ +export const improvementActionSchema = z.object({ + id: z.string(), + actionType: actionTypeSchema, + target: z.enum(["system-prompt", "task-execution", "skills-manager", "review-queue"]), + payload: z.record(z.string(), z.unknown()), + timestamp: z.number(), +}) + +export type ImprovementAction = z.infer + +/** + * LearningTelemetry - telemetry counters for the learning system + */ +export const learningTelemetrySchema = z.object({ + promptEnrichmentUses: z.number().int().default(0), + toolPreferenceUses: z.number().int().default(0), + errorAvoidanceUses: z.number().int().default(0), + skillSuggestionCount: z.number().int().default(0), + lastReviewAt: z.number().optional(), + lastCuratorRunAt: z.number().optional(), +}) + +export type LearningTelemetry = z.infer + +/** + * LearningState - full serializable state of the learning system + */ +export const learningStateSchema = z.object({ + version: z.literal(1), + config: learningConfigSchema, + counters: z.object({ + userTurnsSinceReview: z.number().int().default(0), + toolIterationsSinceReview: z.number().int().default(0), + }), + patterns: z.array(learnedPatternSchema).default([]), + archivedPatterns: z.array(learnedPatternSchema).default([]), + recentEvents: z.array(learningEventSchema).default([]), + pendingActions: z.array(improvementActionSchema).default([]), + telemetry: learningTelemetrySchema, +}) + +export type LearningState = z.infer + +export const EMPTY_LEARNING_STATE: LearningState = { + version: 1, + config: DEFAULT_LEARNING_CONFIG, + counters: { + userTurnsSinceReview: 0, + toolIterationsSinceReview: 0, + }, + patterns: [], + archivedPatterns: [], + recentEvents: [], + pendingActions: [], + telemetry: { + promptEnrichmentUses: 0, + toolPreferenceUses: 0, + errorAvoidanceUses: 0, + skillSuggestionCount: 0, + }, +} diff --git a/packages/types/src/memory.ts b/packages/types/src/memory.ts new file mode 100644 index 0000000000..9cd191b670 --- /dev/null +++ b/packages/types/src/memory.ts @@ -0,0 +1,29 @@ +import { z } from "zod" + +/** + * MemoryEntry - a single durable memory entry for prompt-facing context + * Adapted from Hermes' bounded memory store concept. + */ +export const memoryEntrySchema = z.object({ + id: z.string(), + content: z.string().max(2000), + source: z.enum(["learning", "user", "system", "review"]), + createdAt: z.number(), + updatedAt: z.number(), + relevanceScore: z.number().min(0).max(1).optional(), + tags: z.array(z.string()).optional(), + expiresAt: z.number().optional(), +}) + +export type MemoryEntry = z.infer + +/** + * MemoryContext - bounded set of memory entries for prompt injection + */ +export const memoryContextSchema = z.object({ + entries: z.array(memoryEntrySchema).max(10), + revision: z.number().int().default(0), + generatedAt: z.number(), +}) + +export type MemoryContext = z.infer diff --git a/packages/types/src/message.ts b/packages/types/src/message.ts index e518972a1c..9479957de0 100644 --- a/packages/types/src/message.ts +++ b/packages/types/src/message.ts @@ -36,6 +36,7 @@ export const clineAsks = [ "mistake_limit_reached", "use_mcp_server", "auto_approval_max_req_reached", + "mode_switch", ] as const export const clineAskSchema = z.enum(clineAsks) diff --git a/packages/types/src/mode.ts b/packages/types/src/mode.ts index d14fb72aa3..e2f16d0c8f 100644 --- a/packages/types/src/mode.ts +++ b/packages/types/src/mode.ts @@ -224,4 +224,74 @@ export const DEFAULT_MODES: readonly ModeConfig[] = [ customInstructions: "Your role is to coordinate complex workflows by delegating tasks to specialized modes. As an orchestrator, you should:\n\n1. When given a complex task, break it down into logical subtasks that can be delegated to appropriate specialized modes.\n\n2. For each subtask, use the `new_task` tool to delegate. Choose the most appropriate mode for the subtask's specific goal and provide comprehensive instructions in the `message` parameter. These instructions must include:\n * All necessary context from the parent task or previous subtasks required to complete the work.\n * A clearly defined scope, specifying exactly what the subtask should accomplish.\n * An explicit statement that the subtask should *only* perform the work outlined in these instructions and not deviate.\n * An instruction for the subtask to signal completion by using the `attempt_completion` tool, providing a concise yet thorough summary of the outcome in the `result` parameter, keeping in mind that this summary will be the source of truth used to keep track of what was completed on this project.\n * A statement that these specific instructions supersede any conflicting general instructions the subtask's mode might have.\n\n3. Track and manage the progress of all subtasks. When a subtask is completed, analyze its results and determine the next steps.\n\n4. Help the user understand how the different subtasks fit together in the overall workflow. Provide clear reasoning about why you're delegating specific tasks to specific modes.\n\n5. When all subtasks are completed, synthesize the results and provide a comprehensive overview of what was accomplished.\n\n6. Ask clarifying questions when necessary to better understand how to break down complex tasks effectively.\n\n7. Suggest improvements to the workflow based on the results of completed subtasks.\n\nUse subtasks to maintain clarity. If a request significantly shifts focus or requires a different expertise (mode), consider creating a subtask rather than overloading the current one.", }, + { + slug: "one-shot-orchestrator", + name: "🎯 ONE-SHOT Orchestrator", + roleDefinition: `You are a ONE-SHOT SPARC Orchestrator — the ultimate autonomous AI coding agent. You systematically build complete, production-ready software from a single user prompt. + +Unlike the generic orchestrator, you enforce a **rigid SPARC phase sequence** on every project. + +## Core Principles +1. **Phase-by-Phase Execution via Delegation**: Break every task into small, sequential SPARC phases. Delegate each phase to the most appropriate specialized mode using \`new_task\`. Do NOT do the work yourself. +2. **Zero Gap Coverage**: Every phase must cover ALL possible edge cases, error states, and boundary conditions. +3. **Full Integration**: Every component must be scaffolded, wired, and tested — no orphan code, no incomplete implementations. +4. **Enterprise Grade**: Production-ready code with proper error handling, logging, security, and performance considerations. +5. **E2E Verified**: Every feature must have end-to-end tests that pass before moving on. + +## Required SPARC Phases (always delegate in this order) +0. **Deep Research** — Delegate to \`research\` mode: conduct thorough research on the domain, existing codebase, libraries, APIs, competitors, best practices, common pitfalls, and any documentation. Understand the ecosystem before designing anything. This phase is mandatory — never skip. +1. **Requirements Analysis** — Delegate to \`ask\` mode: parse the user prompt, identify all features, edge cases, constraints +2. **Architecture Design** — Delegate to \`architect\` mode: system architecture, component tree, data flow +3. **Scaffolding** — Delegate to \`code\` mode: create all files, directories, configuration +4. **Core Implementation** — Delegate to \`code\` mode: implement each component with full error handling +5. **Integration Wiring** — Delegate to \`code\` mode: connect all components, ensure no orphan code +6. **Testing** — Delegate to \`test-generator\` or \`debug\` mode: write and run unit tests, integration tests, E2E tests +7. **Bug Fixing** — Delegate to \`debug\` mode: fix all test failures, edge cases, error states +8. **Final Verification** — Delegate to \`code\` or \`devops\`mode: run full test suite, verify all features work + +## Self-Improving Integration +You MUST actively use ALL available self-improving systems: +- **Pattern Analysis**: Review patterns from past sessions for guidance +- **Skill System**: Auto-create and update skills for reusable patterns +- **Full Team Review**: Delegate code review, architecture review, security review, performance review, and test review to the Review Team +- **Code Index**: Use vector search for pattern dedup and retrieval +- **Question Evaluation**: Use contextual analysis for decision making +- **Memory**: Persist and recall learnings across sessions`, + groups: [], + customInstructions: `You are the ONE-SHOT SPARC Orchestrator. You NEVER write code directly — you delegate every SPARC phase to specialized modes via \`new_task\`. Your unique value is enforcing the complete SPARC sequence from start to finish on every project. Never skip a phase. Never leave incomplete work. Every prompt is a complete project — treat it as such.`, + }, + { + slug: "kaizen-orchestrator", + name: "♾️ KAIZEN Orchestrator", + roleDefinition: `You are a KAIZEN SPARC Orchestrator — the continuous improvement autonomous AI coding agent. "Kaizen" (改善) means "change for the better" or "continuous improvement". You embody the philosophy that small, incremental changes made consistently over time lead to massive, long-term improvements. So you never stop iterating until the goal is achieved. + +Unlike the generic orchestrator, you work in a **relentless continuous iteration loop** — you never deliver a single final result. You analyze, delegate a fix, verify, then loop again. + +## Core Principles +1. **Continuous Iteration Loop via Delegation**: Analyze → Identify → Delegate Fix via \`new_task\` → Verify → Enhance → Git Push → Re-evaluate. Loop endlessly until the mini-goal is achieved. You do NOT make changes yourself — you delegate each atomic change to the appropriate specialized mode. +2. **Small Steps, Big Impact**: Each iteration is a single, focused, atomic change delegated to one mode. One bug fix. One enhancement. One refactor. Never multiple changes at once. +3. **Data-Driven**: Always analyze logs, test results, error reports, database queries, and any available data sources before delegating. Never guess. +4. **Self-Evolving Mini-Goals**: The mini-goal is NOT static. It automatically grows and evolves based on self-improving/learning/healing feedback. Start with the user's initial rule/guideline, then gradually raise the bar as the codebase improves. +5. **Self-Evaluating**: After each delegated action completes, evaluate if the goal is closer. If not, pivot strategy. +6. **Continuous Iteration**: Loop endlessly — analyze, delegate fix, verify, repeat. +7. **Git Push Per Cycle**: Every completed cycle (mini-goal achieved) triggers: git add → git commit → git push. +8. **Zero Regressions**: Every fix must be verified. Every enhancement must pass existing tests. Never introduce new bugs. + +## Kaizen Iteration Loop (always delegate in this order) +0. **Deep Research** — Delegate to \`research\` mode: thoroughly research the domain, codebase, libraries, APIs, logs, metrics, and documentation before deciding what to fix. Understand root causes, not symptoms. +1. **Analyze** — Yourself: read logs, check test results, scan for errors, review metrics, identify patterns +2. **Identify** — Yourself: pinpoint the single most impactful change to make right now +3. **Delegate Fix** — Use \`new_task\` to send the fix to the most appropriate mode (\`code\`, \`debug\`, \`refactor\`) +4. **Verify Result** — Use \`debug\` mode to verify the change didn't break anything +5. **Enhance if Needed** — Delegate follow-up improvements to the appropriate mode +6. **Git Push** — Use \`command\` mode: git add → git commit → git push (so CI/CD applies to staging/production) +7. **Re-evaluate** — Loop back to Deep Research. Is the mini-goal achieved? If yes, expand the mini-goal. If no, research deeper and fix the next issue. +8. **Self-Evolving Goals** — Let the goal grow naturally: fix code quality → add tests → improve documentation → enhance monitoring → optimize performance + +## Integration +- **Tool Access**: You have NO direct edit/execute tools. Your power is orchestrating the Kaizen loop — analyzing state, delegating atomic fixes, verifying results, and looping. +- **Commit Message Template**: "kaizen: {description}" (auto-generated from the change)`, + groups: [], + customInstructions: `You are the KAIZEN Orchestrator. You NEVER write code directly — you delegate every atomic change to specialized modes via \`new_task\`. Your unique value is the relentless continuous improvement loop: analyze, delegate one fix, verify, git push, repeat. You never stop until the goal is achieved.`, + }, ] as const diff --git a/packages/types/src/vscode-extension-host.ts b/packages/types/src/vscode-extension-host.ts index c09f22aed7..6492ecd7c2 100644 --- a/packages/types/src/vscode-extension-host.ts +++ b/packages/types/src/vscode-extension-host.ts @@ -98,6 +98,7 @@ export interface ExtensionMessage { | "branchWorktreeIncludeResult" | "folderSelected" | "skills" + | "skillsUpdated" | "fileContent" text?: string /** For fileContent: { path, content, error? } */ @@ -290,6 +291,16 @@ export type ExtensionState = Pick< | "includeDiagnosticMessages" | "maxDiagnosticMessages" | "imageGenerationProvider" + | "memoryBackend" + | "agentMemoryUrl" + | "selfImprovingScope" + | "selfImprovingAutoSkillsScope" + | "kaizenFrequency" + | "kaizenMiniGoal" + | "kaizenLimit" + | "kaizenAutoPush" + | "kaizenRemoteName" + | "kaizenCommitTemplate" | "openRouterImageGenerationSelectedModel" | "includeTaskHistoryInEnhance" | "reasoningBlockCollapsed" @@ -360,7 +371,44 @@ export type ExtensionState = Pick< profileThresholds: Record hasOpenedModeSelector: boolean openRouterImageApiKey?: string + memoryBackend?: "builtin" | "agentmemory" + agentMemoryUrl?: string + selfImprovingScope?: "workspace" | "global" + selfImprovingAutoSkillsScope?: "workspace" | "global" messageQueue?: QueuedMessage[] + selfImprovingStatus?: { + enabled: boolean + started: boolean + patternCount: number + eventCount: number + actionCount: number + memoryEntries: number + memoryBackend?: string + skillRecords: number + curatorStatus: { + lastRunAt: number + firstRunDone: boolean + config: { + intervalMs: number + minIdleMs: number + firstRunDeferred: boolean + staleAfterDays: number + archiveAfterDays: number + backupsEnabled: boolean + maxBackups: number + } + } + lastReviewAt?: number + lastCuratorRunAt?: number + autoMode?: Record + reviewTeam?: Record + questionEvaluator?: Record + resilience?: Record + toolErrorHealer?: Record + preventionEngine?: Record + verificationEngine?: Record + requirementsVerification?: Record + } lastShownAnnouncementId?: string apiModelId?: string mcpServers?: McpServer[] diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index fbe50ebc04..b6e8ce0d55 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -536,7 +536,7 @@ importers: version: 6.0.0 diff: specifier: ^5.2.0 - version: 5.2.2 + version: 5.2.0 diff-match-patch: specifier: ^1.0.5 version: 1.0.5 @@ -659,7 +659,7 @@ importers: version: 1.8.3 simple-git: specifier: ^3.27.0 - version: 3.36.0 + version: 3.27.0 sound-play: specifier: ^1.1.0 version: 1.1.0 @@ -692,7 +692,7 @@ importers: version: 6.24.0 uuid: specifier: ^11.1.0 - version: 11.1.1 + version: 11.1.0 vscode-material-icons: specifier: ^0.1.1 version: 0.1.1 @@ -918,7 +918,7 @@ importers: version: 2.2.0 diff: specifier: ^5.2.0 - version: 5.2.2 + version: 5.2.0 fast-deep-equal: specifier: ^3.1.3 version: 3.1.3 @@ -1100,6 +1100,9 @@ importers: vite: specifier: 8.0.14 version: 8.0.14(@types/node@20.19.41)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.9.0) + vite-tsconfig-paths: + specifier: ^6.1.1 + version: 6.1.1(typescript@5.8.3)(vite@8.0.14(@types/node@20.19.41)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.9.0)) vitest: specifier: ^3.2.3 version: 3.2.4(@types/debug@4.1.12)(@types/node@20.19.41)(@vitest/ui@3.2.4)(esbuild@0.28.0)(jiti@2.4.2)(jsdom@26.1.0)(tsx@4.19.4)(yaml@2.9.0) @@ -1471,10 +1474,6 @@ packages: resolution: {integrity: sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==} engines: {node: '>=6.9.0'} - '@babel/code-frame@7.29.7': - resolution: {integrity: sha512-Aup7aUOfpbAUg2ROOJN6Iw5f9DMBlzu0mIkm/malLQFN/YQgO48wCj0Kxa3sEHJvPVFg7siR+qRInwXd2qhQKw==} - engines: {node: '>=6.9.0'} - '@babel/compat-data@7.29.3': resolution: {integrity: sha512-LIVqM46zQWZhj17qA8wb4nW/ixr2y1Nw+r1etiAWgRM6U1IqP+LNhL1yg440jYZR72jCWcWbLWzIosH+uP1fqg==} engines: {node: '>=6.9.0'} @@ -1521,10 +1520,6 @@ packages: resolution: {integrity: sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==} engines: {node: '>=6.9.0'} - '@babel/helper-validator-identifier@7.29.7': - resolution: {integrity: sha512-qehxGkRj55h/ff8EMaJ+cYhyaKlHIxqYDn682wQD7RNp9UujOQsHog2uS0r2vzr4pW+sXf90NeeayjcNaX3fFg==} - engines: {node: '>=6.9.0'} - '@babel/helper-validator-option@7.27.1': resolution: {integrity: sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg==} engines: {node: '>=6.9.0'} @@ -1571,8 +1566,8 @@ packages: resolution: {integrity: sha512-Q/N6JNWvIvPnLDvjlE1OUBLPQHH6l3CltCEsHIujp45zQUSSh8K+gHnaEX45yAT1nyngnINhvWtzN+Nb9D8RAQ==} engines: {node: '>=6.9.0'} - '@babel/runtime@7.29.7': - resolution: {integrity: sha512-Nq8OhGWiZIZGV6hLHoyAKLLcJihP/xFeBMGJoUrxTX2psI8dCifzLhZISFb+VWS3wFMRDmCGw5R+dOySCqPLhw==} + '@babel/runtime@7.29.2': + resolution: {integrity: sha512-JiDShH45zKHWyGe4ZNVRrCjBz8Nh9TMmZG1kh4QTK8hCBTWBi8Da+i7s1fJw7/lYpM4ccepSNfqzZ/QvABBi5g==} engines: {node: '>=6.9.0'} '@babel/template@7.28.6': @@ -3212,12 +3207,6 @@ packages: '@shikijs/vscode-textmate@10.0.2': resolution: {integrity: sha512-83yeghZ2xxin3Nj8z1NMd/NCuca+gsYXswywDy5bHvwlWL8tpTQmzGeUuHd9FC3E/SBEMvzJRwWEOz5gGes9Qg==} - '@simple-git/args-pathspec@1.0.3': - resolution: {integrity: sha512-ngJMaHlsWDTfjyq9F3VIQ8b7NXbBLq5j9i5bJ6XLYtD6qlDXT7fdKY2KscWWUF8t18xx052Y/PUO1K1TRc9yKA==} - - '@simple-git/argv-parser@1.1.1': - resolution: {integrity: sha512-Q9lBcfQ+VQCpQqGJFHe5yooOS5hGdLFFbJ5R+R5aDsnkPCahtn1hSkMcORX65J2Z5lxSkD0lQorMsncuBQxYUw==} - '@sinclair/typebox@0.27.8': resolution: {integrity: sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==} @@ -3627,9 +3616,6 @@ packages: '@types/chai@5.2.2': resolution: {integrity: sha512-8kB30R7Hwqf40JPiKhVzodJs2Qc1ZJ5zuT3uzw5Hq/dhNCl3G3l83jfpdI1e20BP348+fV7VIL/+FxaXkqBmWg==} - '@types/chai@5.2.3': - resolution: {integrity: sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA==} - '@types/clone-deep@4.0.4': resolution: {integrity: sha512-vXh6JuuaAha6sqEbJueYdh5zNBPPgG1OYumuz2UvLvriN6ABHDSW8ludREGWJb1MLIzbwZn4q4zUbUCerJTJfA==} @@ -4393,7 +4379,7 @@ packages: basic-ftp@5.0.5: resolution: {integrity: sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==} engines: {node: '>=10.0.0'} - deprecated: Security vulnerability fixed in 5.2.1, please upgrade + deprecated: Security vulnerability fixed in 5.2.0, please upgrade better-path-resolve@1.0.0: resolution: {integrity: sha512-pbnl5XzGBdrFU/wT4jqmJVPn2B6UHPBOhzMQkY/SPUPB6QtUXtmBHBIwCbXJol93mOpGMnQyP/+BB19q04xj7g==} @@ -5201,10 +5187,6 @@ packages: resolution: {integrity: sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==} engines: {node: '>=0.3.1'} - diff@5.2.2: - resolution: {integrity: sha512-vtcDfH3TOjP8UekytvnHH1o1P4FcUdt4eQ1Y+Abap1tk/OB2MWQvcwS2ClCd1zuIhc3JKOx6p3kod8Vfys3E+A==} - engines: {node: '>=0.3.1'} - dingbat-to-unicode@1.0.1: resolution: {integrity: sha512-98l0sW87ZT58pU4i61wa2OHwxbiYSbuxsCBozaVnYX2iCnr3bLM3fIes1/ej7h1YdOKuKt/MLs706TVnALA65w==} @@ -5941,6 +5923,9 @@ packages: resolution: {integrity: sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g==} engines: {node: '>=10'} + globrex@0.1.2: + resolution: {integrity: sha512-uHJgbwAMwNFf5mLst7IWLNg14x1CkeqglJb/K3doi4dw6q2IvAAmM/Y81kevy83wP+Sst+nutFTYOGg3d1lsxg==} + google-auth-library@10.5.0: resolution: {integrity: sha512-7ABviyMOlX5hIVD60YOfHw4/CxOfBhyduaYB+wbFWCWoni4N7SLcV46hrVRktuBbZjFC9ONyqamZITN7q3n32w==} engines: {node: '>=18'} @@ -8495,8 +8480,8 @@ packages: simple-get@4.0.1: resolution: {integrity: sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==} - simple-git@3.36.0: - resolution: {integrity: sha512-cGQjLjK8bxJw4QuYT7gxHw3/IouVESbhahSsHrX97MzCL1gu2u7oy38W6L2ZIGECEfIBG4BabsWDPjBxJENv9Q==} + simple-git@3.27.0: + resolution: {integrity: sha512-ivHoFS9Yi9GY49ogc6/YAi3Fl9ROnF4VyubNylgCkA+RVqLaKWnDSzXOVzya8csELIaWaYNutsEuAhZrtOjozA==} simple-invariant@2.0.1: resolution: {integrity: sha512-1sbhsxqI+I2tqlmjbz99GXNmZtr6tKIyEgGGnJw/MKGblalqk/XoOYYFJlBzTKZCxx8kLaD3FD5s9BEEjx5Pyg==} @@ -8889,10 +8874,6 @@ packages: resolution: {integrity: sha512-dAqSqE/RabpBKI8+h26GfLq6Vb3JVXs30XYQjdMjaj/c2tS8IYYMbIzP599KtRj7c57/wYApb3QjgRgXmrCukA==} engines: {node: '>=18'} - tinyexec@1.2.2: - resolution: {integrity: sha512-M/Q0B2cp4K7kynaT/vnED1j8TlLY+Pp7C6Wl2bl/7u/F0mUVwdyOpwomQb8JpYLitHUssAJRmLZdMCGsrx7i+g==} - engines: {node: '>=18'} - tinyglobby@0.2.14: resolution: {integrity: sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==} engines: {node: '>=12.0.0'} @@ -8909,8 +8890,8 @@ packages: resolution: {integrity: sha512-op4nsTR47R6p0vMUUoYl/a+ljLFVtlfaXkLQmqfLR1qHma1h/ysYk4hEXZ880bf2CYgTskvTa/e196Vd5dDQXw==} engines: {node: '>=14.0.0'} - tinyrainbow@3.1.0: - resolution: {integrity: sha512-Bf+ILmBgretUrdJxzXM0SgXLZ3XfiaUuOj/IKQHuTXip+05Xn+uyEYdVg0kYDipTBcLrCVyUzAPz7QmArb0mmw==} + tinyrainbow@3.0.3: + resolution: {integrity: sha512-PSkbLUoxOFRzJYjjxHJt9xro7D+iilgMX/C9lawzVuYiIdcihh9DXmVibBe8lmcFrRi/VzlPjBxbN7rH24q8/Q==} engines: {node: '>=14.0.0'} tinyspy@4.0.3: @@ -8999,6 +8980,16 @@ packages: ts-interface-checker@0.1.13: resolution: {integrity: sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==} + tsconfck@3.1.6: + resolution: {integrity: sha512-ks6Vjr/jEw0P1gmOVwutM3B7fWxoWBL2KRDb1JfqGVawBmO5UsvmWOQFGHBPl5yxYz4eERr19E6L7NMv+Fej4w==} + engines: {node: ^18 || >=20} + hasBin: true + peerDependencies: + typescript: ^5.0.0 + peerDependenciesMeta: + typescript: + optional: true + tslib@1.14.1: resolution: {integrity: sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==} @@ -9277,8 +9268,8 @@ packages: util-deprecate@1.0.2: resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==} - uuid@11.1.1: - resolution: {integrity: sha512-vIYxrBCC/N/K+Js3qSN88go7kIfNPssr/hHCesKCQNAjmgvYS2oqr69kIufEG+O4+PfezOH4EbIeHCfFov8ZgQ==} + uuid@11.1.0: + resolution: {integrity: sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==} hasBin: true uuid@8.3.2: @@ -9319,6 +9310,11 @@ packages: engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0} hasBin: true + vite-tsconfig-paths@6.1.1: + resolution: {integrity: sha512-2cihq7zliibCCZ8P9cKJrQBkfgdvcFkOOc3Y02o3GWUDLgqjWsZudaoiuOwO/gzTzy17cS5F7ZPo4bsnS4DGkg==} + peerDependencies: + vite: 8.0.14 + vite@8.0.14: resolution: {integrity: sha512-s4BJJ+5y1pYL6Otw51FHhVJQhPnuRinKig64g/1+EUNaJsd3gCKdD31IPFvswUgW9/60QT9oFHbZHbQK5imcxw==} engines: {node: ^20.19.0 || >=22.12.0} @@ -10573,12 +10569,6 @@ snapshots: js-tokens: 4.0.0 picocolors: 1.1.1 - '@babel/code-frame@7.29.7': - dependencies: - '@babel/helper-validator-identifier': 7.29.7 - js-tokens: 4.0.0 - picocolors: 1.1.1 - '@babel/compat-data@7.29.3': {} '@babel/core@7.29.0': @@ -10643,8 +10633,6 @@ snapshots: '@babel/helper-validator-identifier@7.28.5': {} - '@babel/helper-validator-identifier@7.29.7': {} - '@babel/helper-validator-option@7.27.1': {} '@babel/helpers@7.29.2': @@ -10678,7 +10666,7 @@ snapshots: '@babel/runtime@7.28.4': {} - '@babel/runtime@7.29.7': {} + '@babel/runtime@7.29.2': {} '@babel/template@7.28.6': dependencies: @@ -11108,7 +11096,7 @@ snapshots: '@eslint/config-array@0.20.0': dependencies: '@eslint/object-schema': 2.1.6 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 minimatch: 3.1.2 transitivePeerDependencies: - supports-color @@ -11122,7 +11110,7 @@ snapshots: '@eslint/eslintrc@3.3.1': dependencies: ajv: 6.12.6 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 espree: 10.4.0 globals: 14.0.0 ignore: 5.3.2 @@ -11211,7 +11199,7 @@ snapshots: '@antfu/install-pkg': 1.1.0 '@antfu/utils': 8.1.1 '@iconify/types': 2.0.0 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 globals: 15.15.0 kolorist: 1.8.0 local-pkg: 1.1.1 @@ -11310,7 +11298,7 @@ snapshots: '@kwsites/file-exists@1.1.1': dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 transitivePeerDependencies: - supports-color @@ -11578,7 +11566,7 @@ snapshots: '@puppeteer/browsers@2.10.5': dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 extract-zip: 2.0.1 progress: 2.0.3 proxy-agent: 6.5.0 @@ -11591,7 +11579,7 @@ snapshots: '@puppeteer/browsers@2.6.1': dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 extract-zip: 2.0.1 progress: 2.0.3 proxy-agent: 6.5.0 @@ -12291,12 +12279,6 @@ snapshots: '@shikijs/vscode-textmate@10.0.2': {} - '@simple-git/args-pathspec@1.0.3': {} - - '@simple-git/argv-parser@1.1.1': - dependencies: - '@simple-git/args-pathspec': 1.0.3 - '@sinclair/typebox@0.27.8': {} '@sindresorhus/merge-streams@4.0.0': {} @@ -12700,8 +12682,8 @@ snapshots: '@testing-library/dom@10.4.0': dependencies: - '@babel/code-frame': 7.29.7 - '@babel/runtime': 7.29.7 + '@babel/code-frame': 7.29.0 + '@babel/runtime': 7.29.2 '@types/aria-query': 5.0.4 aria-query: 5.3.0 chalk: 4.1.2 @@ -12799,12 +12781,6 @@ snapshots: dependencies: '@types/deep-eql': 4.0.2 - '@types/chai@5.2.3': - dependencies: - '@types/deep-eql': 4.0.2 - assertion-error: 2.0.1 - optional: true - '@types/clone-deep@4.0.4': {} '@types/d3-array@3.2.1': {} @@ -13128,7 +13104,7 @@ snapshots: '@typescript-eslint/types': 8.32.1 '@typescript-eslint/typescript-estree': 8.32.1(typescript@5.8.3) '@typescript-eslint/visitor-keys': 8.32.1 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 eslint: 9.27.0(jiti@2.4.2) typescript: 5.8.3 transitivePeerDependencies: @@ -13143,7 +13119,7 @@ snapshots: dependencies: '@typescript-eslint/typescript-estree': 8.32.1(typescript@5.8.3) '@typescript-eslint/utils': 8.32.1(eslint@9.27.0(jiti@2.4.2))(typescript@5.8.3) - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 eslint: 9.27.0(jiti@2.4.2) ts-api-utils: 2.1.0(typescript@5.8.3) typescript: 5.8.3 @@ -13156,7 +13132,7 @@ snapshots: dependencies: '@typescript-eslint/types': 8.32.1 '@typescript-eslint/visitor-keys': 8.32.1 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 fast-glob: 3.3.3 is-glob: 4.0.3 minimatch: 9.0.5 @@ -13236,11 +13212,11 @@ snapshots: '@vitest/expect@4.0.18': dependencies: '@standard-schema/spec': 1.1.0 - '@types/chai': 5.2.3 + '@types/chai': 5.2.2 '@vitest/spy': 4.0.18 '@vitest/utils': 4.0.18 chai: 6.2.2 - tinyrainbow: 3.1.0 + tinyrainbow: 3.0.3 optional: true '@vitest/mocker@3.2.4(vite@8.0.14(@types/node@20.17.50)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.8.3))': @@ -13290,7 +13266,7 @@ snapshots: '@vitest/pretty-format@4.0.18': dependencies: - tinyrainbow: 3.1.0 + tinyrainbow: 3.0.3 optional: true '@vitest/runner@3.2.4': @@ -13345,7 +13321,7 @@ snapshots: '@vitest/utils@4.0.18': dependencies: '@vitest/pretty-format': 4.0.18 - tinyrainbow: 3.1.0 + tinyrainbow: 3.0.3 optional: true '@vscode/codicons@0.0.36': {} @@ -13788,7 +13764,7 @@ snapshots: dependencies: bytes: 3.1.2 content-type: 1.0.5 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 http-errors: 2.0.0 iconv-lite: 0.6.3 on-finished: 2.4.1 @@ -14564,8 +14540,6 @@ snapshots: diff@5.2.0: {} - diff@5.2.2: {} - dingbat-to-unicode@1.0.1: {} dir-glob@3.0.1: @@ -15134,7 +15108,7 @@ snapshots: content-type: 1.0.5 cookie: 0.7.2 cookie-signature: 1.2.2 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 encodeurl: 2.0.0 escape-html: 1.0.3 etag: 1.8.1 @@ -15170,7 +15144,7 @@ snapshots: extract-zip@2.0.1: dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 get-stream: 5.2.0 yauzl: 2.10.0 optionalDependencies: @@ -15258,7 +15232,7 @@ snapshots: finalhandler@2.1.0: dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 encodeurl: 2.0.0 escape-html: 1.0.3 on-finished: 2.4.1 @@ -15488,7 +15462,7 @@ snapshots: dependencies: basic-ftp: 5.0.5 data-uri-to-buffer: 6.0.2 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 transitivePeerDependencies: - supports-color @@ -15543,6 +15517,8 @@ snapshots: merge2: 1.4.1 slash: 3.0.0 + globrex@0.1.2: {} + google-auth-library@10.5.0: dependencies: base64-js: 1.5.1 @@ -15779,14 +15755,14 @@ snapshots: http-proxy-agent@7.0.2: dependencies: agent-base: 7.1.3 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 transitivePeerDependencies: - supports-color https-proxy-agent@7.0.6: dependencies: agent-base: 7.1.3 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 transitivePeerDependencies: - supports-color @@ -16957,7 +16933,7 @@ snapshots: roughjs: 4.6.6 stylis: 4.3.6 ts-dedent: 2.2.0 - uuid: 11.1.1 + uuid: 11.1.0 transitivePeerDependencies: - supports-color @@ -17142,7 +17118,7 @@ snapshots: micromark@2.11.4: dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 parse-entities: 2.0.0 transitivePeerDependencies: - supports-color @@ -17598,7 +17574,7 @@ snapshots: dependencies: '@tootallnate/quickjs-emscripten': 0.23.0 agent-base: 7.1.3 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 get-uri: 6.0.4 http-proxy-agent: 7.0.2 https-proxy-agent: 7.0.6 @@ -17862,7 +17838,7 @@ snapshots: proxy-agent@6.5.0: dependencies: agent-base: 7.1.3 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 http-proxy-agent: 7.0.2 https-proxy-agent: 7.0.6 lru-cache: 7.18.3 @@ -17919,7 +17895,7 @@ snapshots: dependencies: '@puppeteer/browsers': 2.10.5 chromium-bidi: 5.1.0(devtools-protocol@0.0.1452169) - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 devtools-protocol: 0.0.1452169 typed-query-selector: 2.12.0 ws: 8.18.2 @@ -18383,7 +18359,7 @@ snapshots: router@2.2.0: dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 depd: 2.0.0 is-promise: 4.0.0 parseurl: 1.3.3 @@ -18486,7 +18462,7 @@ snapshots: send@1.2.0: dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 encodeurl: 2.0.0 escape-html: 1.0.3 etag: 1.8.1 @@ -18620,13 +18596,11 @@ snapshots: simple-concat: 1.0.1 optional: true - simple-git@3.36.0: + simple-git@3.27.0: dependencies: '@kwsites/file-exists': 1.1.1 '@kwsites/promise-deferred': 1.1.1 - '@simple-git/args-pathspec': 1.0.3 - '@simple-git/argv-parser': 1.1.1 - debug: 4.4.3 + debug: 4.4.1(supports-color@8.1.1) transitivePeerDependencies: - supports-color @@ -18662,7 +18636,7 @@ snapshots: socks-proxy-agent@8.0.5: dependencies: agent-base: 7.1.3 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 socks: 2.8.4 transitivePeerDependencies: - supports-color @@ -19041,9 +19015,6 @@ snapshots: tinyexec@1.1.2: {} - tinyexec@1.2.2: - optional: true - tinyglobby@0.2.14: dependencies: fdir: 6.5.0(picomatch@4.0.4) @@ -19058,7 +19029,7 @@ snapshots: tinyrainbow@2.0.0: {} - tinyrainbow@3.1.0: + tinyrainbow@3.0.3: optional: true tinyspy@4.0.3: {} @@ -19123,6 +19094,10 @@ snapshots: ts-interface-checker@0.1.13: {} + tsconfck@3.1.6(typescript@5.8.3): + optionalDependencies: + typescript: 5.8.3 + tslib@1.14.1: {} tslib@2.6.2: {} @@ -19471,7 +19446,7 @@ snapshots: util-deprecate@1.0.2: {} - uuid@11.1.1: {} + uuid@11.1.0: {} uuid@8.3.2: {} @@ -19515,7 +19490,7 @@ snapshots: vite-node@3.2.4(@types/node@20.17.50)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.8.3): dependencies: cac: 6.7.14 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 es-module-lexer: 1.7.0 pathe: 2.0.3 vite: 8.0.14(@types/node@20.17.50)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.8.3) @@ -19537,7 +19512,7 @@ snapshots: vite-node@3.2.4(@types/node@20.17.57)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.9.0): dependencies: cac: 6.7.14 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 es-module-lexer: 1.7.0 pathe: 2.0.3 vite: 8.0.14(@types/node@20.17.57)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.9.0) @@ -19559,7 +19534,7 @@ snapshots: vite-node@3.2.4(@types/node@20.19.41)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.9.0): dependencies: cac: 6.7.14 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 es-module-lexer: 1.7.0 pathe: 2.0.3 vite: 8.0.14(@types/node@20.19.41)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.9.0) @@ -19581,7 +19556,7 @@ snapshots: vite-node@3.2.4(@types/node@24.2.1)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.9.0): dependencies: cac: 6.7.14 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 es-module-lexer: 1.7.0 pathe: 2.0.3 vite: 8.0.14(@types/node@24.2.1)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.9.0) @@ -19600,6 +19575,16 @@ snapshots: - tsx - yaml + vite-tsconfig-paths@6.1.1(typescript@5.8.3)(vite@8.0.14(@types/node@20.19.41)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.9.0)): + dependencies: + debug: 4.4.3 + globrex: 0.1.2 + tsconfck: 3.1.6(typescript@5.8.3) + vite: 8.0.14(@types/node@20.19.41)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.9.0) + transitivePeerDependencies: + - supports-color + - typescript + vite@8.0.14(@types/node@20.17.50)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.8.3): dependencies: lightningcss: 1.32.0 @@ -19857,9 +19842,9 @@ snapshots: picomatch: 4.0.4 std-env: 3.10.0 tinybench: 2.9.0 - tinyexec: 1.2.2 + tinyexec: 1.1.2 tinyglobby: 0.2.16 - tinyrainbow: 3.1.0 + tinyrainbow: 3.0.3 vite: 8.0.14(@types/node@20.19.41)(esbuild@0.28.0)(jiti@2.4.2)(tsx@4.19.4)(yaml@2.9.0) why-is-node-running: 2.3.0 optionalDependencies: diff --git a/src/__tests__/extension.spec.ts b/src/__tests__/extension.spec.ts index e01c739edc..c51c27fb34 100644 --- a/src/__tests__/extension.spec.ts +++ b/src/__tests__/extension.spec.ts @@ -184,6 +184,7 @@ vi.mock("../core/webview/ClineProvider", async () => { postStateToWebview: vi.fn(), postStateToWebviewWithoutClineMessages: vi.fn(), getState: vi.fn().mockResolvedValue({}), + initializeSelfImproving: vi.fn().mockResolvedValue(undefined), initializeCloudProfileSyncWhenReady: vi.fn().mockResolvedValue(undefined), providerSettingsManager: {}, contextProxy: { getGlobalState: vi.fn() }, @@ -261,6 +262,18 @@ describe("extension.ts", () => { expect(dotenvx.config).toHaveBeenCalledTimes(1) }) + test("initializes self-improving through the provider during activation", async () => { + vi.resetModules() + vi.clearAllMocks() + + const { ClineProvider } = await import("../core/webview/ClineProvider") + const { activate } = await import("../extension") + await activate(mockContext) + + const provider = (ClineProvider as any).getVisibleInstance() + expect(provider.initializeSelfImproving).toHaveBeenCalledTimes(1) + }) + describe("cloud auth state handling", () => { beforeEach(() => { vi.resetModules() diff --git a/src/api/providers/__tests__/fireworks.spec.ts b/src/api/providers/__tests__/fireworks.spec.ts index 227dea1795..871c989eac 100644 --- a/src/api/providers/__tests__/fireworks.spec.ts +++ b/src/api/providers/__tests__/fireworks.spec.ts @@ -95,25 +95,46 @@ describe("FireworksHandler", () => { }) it.each([ - { modelId: "accounts/fireworks/models/glm-5p1" as const, contextWindow: 202752, inputPrice: 1.4, outputPrice: 4.4, cacheReadsPrice: 0.26 }, - { modelId: "accounts/fireworks/models/kimi-k2p6" as const, contextWindow: 262144, inputPrice: 0.95, outputPrice: 4.0, cacheReadsPrice: 0.16 }, - { modelId: "accounts/fireworks/models/deepseek-v4-pro" as const, contextWindow: 1048576, inputPrice: 1.74, outputPrice: 3.48, cacheReadsPrice: 0.14 }, - ])("should expose newly added model $modelId", ({ modelId, contextWindow, inputPrice, outputPrice, cacheReadsPrice }) => { - expect(fireworksModels[modelId]).toBeDefined() - const info = fireworksModels[modelId] - expect(info.maxTokens).toBeGreaterThan(0) - expect(info.contextWindow).toBe(contextWindow) - expect(info.inputPrice).toBe(inputPrice) - expect(info.outputPrice).toBe(outputPrice) - expect(info.cacheReadsPrice).toBe(cacheReadsPrice) - expect(info.description).toBeTruthy() - - const handlerWithModel = new FireworksHandler({ - apiModelId: modelId, - fireworksApiKey: "test-fireworks-api-key", - }) - expect(handlerWithModel.getModel().id).toBe(modelId) - }) + { + modelId: "accounts/fireworks/models/glm-5p1" as const, + contextWindow: 202752, + inputPrice: 1.4, + outputPrice: 4.4, + cacheReadsPrice: 0.26, + }, + { + modelId: "accounts/fireworks/models/kimi-k2p6" as const, + contextWindow: 262144, + inputPrice: 0.95, + outputPrice: 4.0, + cacheReadsPrice: 0.16, + }, + { + modelId: "accounts/fireworks/models/deepseek-v4-pro" as const, + contextWindow: 1048576, + inputPrice: 1.74, + outputPrice: 3.48, + cacheReadsPrice: 0.14, + }, + ])( + "should expose newly added model $modelId", + ({ modelId, contextWindow, inputPrice, outputPrice, cacheReadsPrice }) => { + expect(fireworksModels[modelId]).toBeDefined() + const info = fireworksModels[modelId] + expect(info.maxTokens).toBeGreaterThan(0) + expect(info.contextWindow).toBe(contextWindow) + expect(info.inputPrice).toBe(inputPrice) + expect(info.outputPrice).toBe(outputPrice) + expect(info.cacheReadsPrice).toBe(cacheReadsPrice) + expect(info.description).toBeTruthy() + + const handlerWithModel = new FireworksHandler({ + apiModelId: modelId, + fireworksApiKey: "test-fireworks-api-key", + }) + expect(handlerWithModel.getModel().id).toBe(modelId) + }, + ) it("should return Kimi K2 Instruct model with correct configuration", () => { const testModelId: FireworksModelId = "accounts/fireworks/models/kimi-k2-instruct" diff --git a/src/api/providers/fetchers/opencode-go.ts b/src/api/providers/fetchers/opencode-go.ts index 11b4d88024..ac62db52a7 100644 --- a/src/api/providers/fetchers/opencode-go.ts +++ b/src/api/providers/fetchers/opencode-go.ts @@ -72,7 +72,9 @@ export async function getOpencodeGoModels(apiKey?: string): Promise c.type === "text") + .map((c) => c.text) + .join("\n") + if (userText) { + sim.preventionEngine.enrichContextWithCodeIndex(userText).then((enriched) => { + if (enriched !== userText) { + cline.userMessageContent.push({ + type: "text", + text: `[Code Index Context]\n${enriched}`, + }) + } + }).catch(() => { + // Graceful fallback — enrichment failure is non-critical + }) + } + } + } + switch (block.name) { case "write_to_file": await checkpointSaveAndMark(cline) @@ -813,6 +848,10 @@ export async function presentAssistantMessage(cline: Task) { }) break case "attempt_completion": { + // Reset recovery state — the model is trying to deliver a result, + // not failing. This prevents false positive recovery from large + // response failures during delivery attempts. + cline.resilienceService?.onDeliveryAttempt() const completionCallbacks: AttemptCompletionCallbacks = { askApproval, handleError, diff --git a/src/core/auto-approval/index.ts b/src/core/auto-approval/index.ts index c8293c2a79..b2bef87e55 100644 --- a/src/core/auto-approval/index.ts +++ b/src/core/auto-approval/index.ts @@ -12,6 +12,7 @@ import { ClineAskResponse } from "../../shared/WebviewMessage" import { isWriteToolAction, isReadOnlyToolAction } from "./tools" import { isMcpToolAlwaysAllowed } from "./mcp" import { getCommandDecision } from "./commands" +import type { TrustService } from "../../services/self-improving/TrustService" // We have auto-approval actions for different categories. export type AutoApprovalState = @@ -49,16 +50,29 @@ export async function checkAutoApproval({ ask, text, isProtected, + trustService, }: { state?: Pick ask: ClineAsk text?: string isProtected?: boolean + trustService?: TrustService }): Promise { if (isNonBlockingAsk(ask)) { return { decision: "approve" } } + // Check TrustService for auto-approval (experiment-gated full trust) + if (trustService) { + const toolName = mapAskToToolName(ask, text) + if (toolName) { + const params = extractToolParams(ask, text) + if (trustService.shouldAutoApprove(toolName, params)) { + return { decision: "approve" } + } + } + } + if (!state || !state.autoApprovalEnabled) { return { decision: "ask" } } @@ -182,4 +196,64 @@ export async function checkAutoApproval({ return { decision: "ask" } } +/** + * Map a ClineAsk type to a tool name for TrustService checks. + */ +function mapAskToToolName(ask: ClineAsk, text?: string): string | undefined { + switch (ask) { + case "tool": + if (!text) return undefined + try { + const tool = JSON.parse(text) as ClineSayTool + return tool.tool + } catch { + return undefined + } + case "command": + return "execute_command" + case "command_output": + return "execute_command" + case "use_mcp_server": + return "use_mcp_tool" + case "mode_switch": + return "switch_mode" + case "followup": + return "ask_followup_question" + case "completion_result": + return "attempt_completion" + default: + return undefined + } +} + +/** + * Extract tool parameters from a ClineAsk for TrustService checks. + */ +function extractToolParams( + ask: ClineAsk, + text?: string, +): { command?: string; path?: string; mode?: string } | undefined { + if (!text) return undefined + + switch (ask) { + case "tool": + try { + const tool = JSON.parse(text) as ClineSayTool + return { + command: tool.command, + path: tool.path, + } + } catch { + return undefined + } + case "command": + case "command_output": + return { command: text } + case "mode_switch": + return { mode: text } + default: + return undefined + } +} + export { AutoApprovalHandler } from "./AutoApprovalHandler" diff --git a/src/core/config/ContextProxy.ts b/src/core/config/ContextProxy.ts index 97d4104afc..cb9919081e 100644 --- a/src/core/config/ContextProxy.ts +++ b/src/core/config/ContextProxy.ts @@ -382,14 +382,16 @@ export class ContextProxy { return this.secretCache[key] } - storeSecret(key: SecretStateKey, value?: string) { - // Update cache. - this.secretCache[key] = value + async storeSecret(key: SecretStateKey, value?: string) { + // Write to storage first, then update cache only on success. + // This prevents cache/storage desync if the write fails. + if (value === undefined) { + await this.originalContext.secrets.delete(key) + } else { + await this.originalContext.secrets.store(key, value) + } - // Write directly to context. - return value === undefined - ? this.originalContext.secrets.delete(key) - : this.originalContext.secrets.store(key, value) + this.secretCache[key] = value } /** diff --git a/src/core/config/CustomModesManager.ts b/src/core/config/CustomModesManager.ts index a243a9236b..2550471e1f 100644 --- a/src/core/config/CustomModesManager.ts +++ b/src/core/config/CustomModesManager.ts @@ -52,6 +52,8 @@ export class CustomModesManager { private writeQueue: Array<() => Promise> = [] private cachedModes: ModeConfig[] | null = null private cachedAt: number = 0 + /** Prevents re-entrant onUpdate() when ModeFactory writes to .roomodes */ + private isInternalUpdate = false constructor( private readonly context: vscode.ExtensionContext, @@ -321,6 +323,11 @@ export class CustomModesManager { const roomodesWatcher = vscode.workspace.createFileSystemWatcher(roomodesPath) const handleRoomodesChange = async () => { + // Skip — this change was triggered by our own write (e.g. ModeFactory) + if (this.isInternalUpdate) { + return + } + try { const settingsModes = await this.loadModesFromFile(settingsPath) const roomodesModes = await this.loadModesFromFile(roomodesPath) @@ -402,6 +409,7 @@ export class CustomModesManager { } public async updateCustomMode(slug: string, config: ModeConfig): Promise { + this.isInternalUpdate = true try { // Validate the mode configuration before saving const validationResult = modeConfigSchema.safeParse(config) @@ -459,6 +467,8 @@ export class CustomModesManager { logger.error("Failed to update custom mode", { slug, error: errorMessage }) vscode.window.showErrorMessage(t("common:customModes.errors.updateFailed", { error: errorMessage })) throw error + } finally { + this.isInternalUpdate = false } } diff --git a/src/core/config/__tests__/ContextProxy.spec.ts b/src/core/config/__tests__/ContextProxy.spec.ts index 0a24141155..6f07feb901 100644 --- a/src/core/config/__tests__/ContextProxy.spec.ts +++ b/src/core/config/__tests__/ContextProxy.spec.ts @@ -255,6 +255,30 @@ describe("ContextProxy", () => { const storedValue = proxy.getGlobalState("apiModelId") expect(storedValue).toBe("gpt-4") }) + + it("should persist self-improving memory backend settings in global settings", async () => { + const updateGlobalStateSpy = vi.spyOn(proxy, "updateGlobalState") + + await proxy.setValue("memoryBackend", "agentmemory") + await proxy.setValue("agentMemoryUrl", "http://agentmemory.internal:4001") + + expect(updateGlobalStateSpy).toHaveBeenCalledWith("memoryBackend", "agentmemory") + expect(updateGlobalStateSpy).toHaveBeenCalledWith("agentMemoryUrl", "http://agentmemory.internal:4001") + expect(proxy.getGlobalState("memoryBackend")).toBe("agentmemory") + expect(proxy.getGlobalState("agentMemoryUrl")).toBe("http://agentmemory.internal:4001") + }) + + it("should persist self-improving scope settings in global settings", async () => { + const updateGlobalStateSpy = vi.spyOn(proxy, "updateGlobalState") + + await proxy.setValue("selfImprovingScope", "workspace") + await proxy.setValue("selfImprovingAutoSkillsScope", "global") + + expect(updateGlobalStateSpy).toHaveBeenCalledWith("selfImprovingScope", "workspace") + expect(updateGlobalStateSpy).toHaveBeenCalledWith("selfImprovingAutoSkillsScope", "global") + expect(proxy.getGlobalState("selfImprovingScope")).toBe("workspace") + expect(proxy.getGlobalState("selfImprovingAutoSkillsScope")).toBe("global") + }) }) describe("setValues", () => { diff --git a/src/core/prompts/__tests__/system-prompt.spec.ts b/src/core/prompts/__tests__/system-prompt.spec.ts index f555daba06..d7d5c71ea3 100644 --- a/src/core/prompts/__tests__/system-prompt.spec.ts +++ b/src/core/prompts/__tests__/system-prompt.spec.ts @@ -49,6 +49,7 @@ import { ModeConfig } from "@roo-code/types" import { SYSTEM_PROMPT } from "../system" import { McpHub } from "../../../services/mcp/McpHub" +import type { SelfImprovingManager } from "../../../services/self-improving" import { defaultModeSlug, modes, Mode } from "../../../shared/modes" import "../../../utils/path" import { addCustomInstructions } from "../sections/custom-instructions" @@ -272,6 +273,37 @@ describe("SYSTEM_PROMPT", () => { expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-undefined-mcp-hub.snap") }) + it("should include learned guidance before rules when available", async () => { + const selfImprovingManager = { + getPromptContextString: () => "\n## Learned Guidance\n- [prompt] Search relevant code before editing\n", + getPromptContext: () => undefined, + } as unknown as SelfImprovingManager + + const prompt = await SYSTEM_PROMPT( + mockContext, + "/test/path", + false, + undefined, // mcpHub + undefined, // diffStrategy + defaultModeSlug, // mode + undefined, // customModePrompts + undefined, // customModes + undefined, // globalCustomInstructions + experiments, + undefined, // language + undefined, // rooIgnoreInstructions + undefined, // settings + undefined, // todoList + undefined, // modelId + undefined, // skillsManager + selfImprovingManager, + ) + + expect(prompt).toContain("## Learned Guidance") + expect(prompt).toContain("- [prompt] Search relevant code before editing") + expect(prompt.indexOf("## Learned Guidance")).toBeLessThan(prompt.indexOf("====\n\nRULES")) + }) + it("should include vscode language in custom instructions", async () => { // Mock vscode.env.language const vscode = vi.mocked(await import("vscode")) as any diff --git a/src/core/prompts/responses.ts b/src/core/prompts/responses.ts index 60b5b4123a..8349c02b0d 100644 --- a/src/core/prompts/responses.ts +++ b/src/core/prompts/responses.ts @@ -60,10 +60,11 @@ Otherwise, if you have not completed the task and do not need additional informa feedback, }), - missingToolParameterError: (paramName: string) => { + missingToolParameterError: (paramName: string, toolName?: string) => { const instructions = getToolInstructionsReminder() + const toolPrefix = toolName ? ` for '${toolName}'` : "" - return `Missing value for required parameter '${paramName}'. Please retry with complete response.\n\n${instructions}` + return `Missing value for required parameter '${paramName}'${toolPrefix}. Please retry with complete response.\n\n${instructions}` }, invalidMcpToolArgumentError: (serverName: string, toolName: string) => diff --git a/src/core/prompts/system.ts b/src/core/prompts/system.ts index 0d6071644a..907f4f3718 100644 --- a/src/core/prompts/system.ts +++ b/src/core/prompts/system.ts @@ -10,6 +10,7 @@ import { isEmpty } from "../../utils/object" import { McpHub } from "../../services/mcp/McpHub" import { CodeIndexManager } from "../../services/code-index/manager" import { SkillsManager } from "../../services/skills/SkillsManager" +import { SelfImprovingManager, type PromptContext } from "../../services/self-improving" import type { SystemPromptSettings } from "./types" import { @@ -38,6 +39,31 @@ export function getPromptComponent( return component } +/** + * Format structured PromptContext entries into a markdown string for prompt injection. + * Groups entries by their pattern type (prompt enrichment, error avoidance, tool preference). + */ +function buildPatternContextString(ctx: PromptContext): string { + const sections: string[] = [] + + const enrichedInstructions = ctx.entries.filter((e) => e.type === "prompt").map((e) => `- ${e.summary}`) + if (enrichedInstructions.length > 0) { + sections.push("## Learned Guidance\n" + enrichedInstructions.join("\n")) + } + + const errorAvoidanceRules = ctx.entries.filter((e) => e.type === "error").map((e) => `- ${e.summary}`) + if (errorAvoidanceRules.length > 0) { + sections.push("## Error Avoidance\n" + errorAvoidanceRules.join("\n")) + } + + const toolPreferences = ctx.entries.filter((e) => e.type === "tool").map((e) => `- ${e.summary}`) + if (toolPreferences.length > 0) { + sections.push("## Tool Preferences\n" + toolPreferences.join("\n")) + } + + return sections.length > 0 ? sections.join("\n\n") : "" +} + async function generatePrompt( context: vscode.ExtensionContext, cwd: string, @@ -55,6 +81,7 @@ async function generatePrompt( todoList?: TodoItem[], modelId?: string, skillsManager?: SkillsManager, + selfImprovingManager?: SelfImprovingManager, ): Promise { if (!context) { throw new Error("Extension context is required for generating system prompt") @@ -79,6 +106,12 @@ async function generatePrompt( getSkillsSection(skillsManager, mode as string), ]) + // Inject learned guidance from self-improving system (experiment-gated) + const learningContext = selfImprovingManager?.getPromptContextString() || "" + const promptContext = selfImprovingManager?.getPromptContext() + const patternContext = promptContext ? buildPatternContextString(promptContext) : "" + const combinedLearningContext = [learningContext, patternContext].filter(Boolean).join("\n\n") + // Tools catalog is not included in the system prompt. const toolsCatalog = "" @@ -93,7 +126,7 @@ ${getSharedToolUseSection()}${toolsCatalog} ${getCapabilitiesSection(cwd, shouldIncludeMcp ? mcpHub : undefined)} ${modesSection} -${skillsSection ? `\n${skillsSection}` : ""} +${skillsSection ? `\n${skillsSection}` : ""}${combinedLearningContext} ${getRulesSection(cwd, settings)} ${getSystemInfoSection(cwd)} @@ -126,6 +159,7 @@ export const SYSTEM_PROMPT = async ( todoList?: TodoItem[], modelId?: string, skillsManager?: SkillsManager, + selfImprovingManager?: SelfImprovingManager, ): Promise => { if (!context) { throw new Error("Extension context is required for generating system prompt") @@ -154,5 +188,6 @@ export const SYSTEM_PROMPT = async ( todoList, modelId, skillsManager, + selfImprovingManager, ) } diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index fcdfd0263d..3ed91d9544 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -128,6 +128,10 @@ import { import { processUserContentMentions } from "../mentions/processUserContentMentions" import { getMessagesSinceLastSummary, summarizeConversation, getEffectiveApiHistory } from "../condense" import { MessageQueueService } from "../message-queue/MessageQueueService" +import { QuestionEvaluatorService } from "../../services/self-improving/QuestionEvaluatorService" +import { ToolErrorHealer } from "../../services/self-improving/ToolErrorHealer" +import { ResilienceService } from "../../services/self-improving/ResilienceService" +import { PreventionEngine } from "../../services/self-improving/PreventionEngine" import { AutoApprovalHandler, checkAutoApproval } from "../auto-approval" import { MessageManager } from "../message-manager" import { validateAndFixToolResultIds } from "./validateToolResultIds" @@ -286,6 +290,10 @@ export class Task extends EventEmitter implements TaskLike { api: ApiHandler private static lastGlobalApiRequestTime?: number private autoApprovalHandler: AutoApprovalHandler + questionEvaluator: QuestionEvaluatorService | undefined + toolErrorHealer: ToolErrorHealer | undefined + resilienceService: ResilienceService | undefined + preventionEngine: PreventionEngine | undefined /** * Reset the global API request timestamp. This should only be used for testing. @@ -1217,7 +1225,13 @@ export class Task extends EventEmitter implements TaskLike { // Automatically approve if the ask according to the user's settings. const provider = this.providerRef.deref() const state = provider ? await provider.getState() : undefined - const approval = await checkAutoApproval({ state, ask: type, text, isProtected }) + const approval = await checkAutoApproval({ + state, + ask: type, + text, + isProtected, + trustService: provider?.trustService, + }) if (approval.decision === "approve") { this.approveAsk() @@ -1225,9 +1239,45 @@ export class Task extends EventEmitter implements TaskLike { this.denyAsk() } else if (approval.decision === "timeout") { // Store the auto-approval timeout so it can be cancelled if user interacts - this.autoApprovalTimeoutRef = setTimeout(() => { - const { askResponse, text, images } = approval.fn() - this.handleWebviewAskResponse(askResponse, text, images) + this.autoApprovalTimeoutRef = setTimeout(async () => { + // Use QuestionEvaluatorService to pick the best choice when available + if ( + this.questionEvaluator && + this.questionEvaluator.getConfig().enabled && + type === "followup" && + text + ) { + try { + const followUpData = JSON.parse(text) as { + question?: string + suggest?: Array<{ answer: string; mode?: string }> + } + if (followUpData.suggest && followUpData.suggest.length > 0) { + const evaluation = await this.questionEvaluator.evaluateBestChoice( + followUpData.question ?? "", + followUpData.suggest.map((s) => ({ text: s.answer, mode: s.mode ?? null })), + ) + console.error( + `[Task] Question evaluated: chose #${evaluation.selectedIndex + 1} via ${evaluation.evaluatedBy}: "${evaluation.selectedText.substring(0, 60)}..."`, + ) + // If evaluation returned empty result, fall back to default first choice + // This prevents empty responses when Full Trust + Auto-approve Question are enabled + if (evaluation.selectedText && evaluation.selectedText.trim().length > 0) { + this.handleWebviewAskResponse("messageResponse", evaluation.selectedText) + this.autoApprovalTimeoutRef = undefined + return + } + console.error( + `[Task] Question evaluation returned empty result, falling back to first choice`, + ) + } + } catch (error) { + console.error(`[Task] Question evaluation failed, falling back to first choice: ${error}`) + } + } + // Fallback: first choice + const { askResponse, text: responseText, images } = approval.fn() + this.handleWebviewAskResponse(askResponse, responseText, images) this.autoApprovalTimeoutRef = undefined }, approval.timeout) timeouts.push(this.autoApprovalTimeoutRef) @@ -1451,6 +1501,7 @@ export class Task extends EventEmitter implements TaskLike { } const provider = this.providerRef.deref() + const shouldRecordCorrection = !!this.taskAsk if (provider) { if (mode) { @@ -1468,6 +1519,26 @@ export class Task extends EventEmitter implements TaskLike { } } + if (shouldRecordCorrection) { + void provider + .getSelfImprovingManager?.() + ?.recordUserCorrection({ + taskId: this.taskId, + success: false, + corrected: true, + }) + .catch((error: unknown) => { + console.error("[Task#submitUserMessage] Failed to record user correction:", error) + }) + } + + // Reset TrustService taskCompleted latch when user sends a new message. + // This allows subsequent attempt_completion calls to be auto-approved + // after the user provides follow-up feedback on a completed task. + if (provider.trustService?.taskCompleted) { + provider.trustService.taskCompleted = false + } + this.emit(RooCodeEventName.TaskUserMessage, this.taskId) // Handle the message directly instead of routing through the webview. @@ -1721,13 +1792,28 @@ export class Task extends EventEmitter implements TaskLike { } async sayAndCreateMissingParamError(toolName: ToolName, paramName: string, relPath?: string) { + // Consult ToolErrorHealer for fix suggestion first so we can include it in both say and tool result + const fix = this.toolErrorHealer?.handleToolError(toolName, paramName) + const fixSuffix = fix ? ` Suggestion: ${fix.fix}` : "" + await this.say( "error", `Roo tried to use ${toolName}${ relPath ? ` for '${relPath.toPosix()}'` : "" - } without value for required parameter '${paramName}'. Retrying...`, + } without value for required parameter '${paramName}'. Retrying...${fixSuffix}`, ) - return formatResponse.toolError(formatResponse.missingToolParameterError(paramName)) + + // Build error message with healing suggestion prominently included + const baseError = formatResponse.missingToolParameterError(paramName, toolName) + if (fix) { + const fixHint = fix.autoCorrectable ? `\n\n[Fix suggestion: ${fix.fix}]` : `\n\n[Hint: ${fix.fix}]` + // Also try getHealingSuggestion for additional context from error message parsing + const healingSuggestion = this.toolErrorHealer?.getHealingSuggestion(toolName, baseError) + const healingSuffix = healingSuggestion ? `\n\n${healingSuggestion}` : "" + return formatResponse.toolError(baseError + fixHint + healingSuffix) + } + + return formatResponse.toolError(baseError) } // Lifecycle @@ -2106,6 +2192,16 @@ export class Task extends EventEmitter implements TaskLike { const tokenUsage = this.getTokenUsage() this.debouncedEmitTokenUsage(tokenUsage, this.toolUsage) this.debouncedEmitTokenUsage.flush() + + // Record token usage in insights engine for session analysis + if (tokenUsage) { + const totalTokens = (tokenUsage.totalTokensIn ?? 0) + (tokenUsage.totalTokensOut ?? 0) + const cost = tokenUsage.totalCost ?? 0 + this.providerRef + .deref() + ?.getSelfImprovingManager?.() + ?.insightsEngine?.recordTokenUsage(totalTokens, cost, "session") + } } public async abortTask(isAbandoned = false) { @@ -3153,8 +3249,45 @@ export class Task extends EventEmitter implements TaskLike { `[Task#${this.taskId}.${this.instanceId}] Stream failed, will retry: ${streamingFailedMessage}`, ) - // Apply exponential backoff similar to first-chunk errors when auto-resubmit is enabled - const stateForBackoff = await this.providerRef.deref()?.getState() + // Check if this is a large response failure (not a model error) + // Large responses occur when the model tries to deliver a comprehensive result + // that exceeds API limits — don't trigger recovery, just suggest shortening + if (this.resilienceService?.isLargeResponseFailure(rawErrorMessage)) { + const suggestion = this.resilienceService.onLargeResponseFailure() + await this.say("error", `[Recovery] ${suggestion}`) + // Don't abort — let the model retry with a shorter response + // Push the same content back onto the stack to retry + stack.push({ + userContent: currentUserContent, + includeFileDetails: false, + retryAttempt: (currentItem.retryAttempt ?? 0) + 1, + }) + continue + } + + // Consult ResilienceService for backoff and recovery guidance + const backoffDelay = this.resilienceService?.onStreamingFailure() ?? -1 + + if (backoffDelay < 0) { + // Max retries exceeded — enter recovery mode + console.error( + `[Task#${this.taskId}.${this.instanceId}] Max streaming retries exceeded. Entering recovery mode.`, + ) + const recoverySuggestion = this.resilienceService?.getRecoverySuggestion() + if (recoverySuggestion) { + await this.say("error", `[Recovery] ${recoverySuggestion}`) + } + // Fall through to abort the task + this.abortReason = "streaming_failed" + await this.abortTask() + break + } + + // Apply exponential backoff from resilience service + await delay(backoffDelay) + + // Also apply existing backoff for auto-approval mode + const stateForBackoff = await provider?.getState() if (stateForBackoff?.autoApprovalEnabled) { await this.backoffAndAnnounce(currentItem.retryAttempt ?? 0, error) @@ -3170,9 +3303,49 @@ export class Task extends EventEmitter implements TaskLike { } } - // Push the same content back onto the stack to retry, incrementing the retry attempt counter + // --- Recovery context injection --- + // Classify the error and generate recovery context to enrich the retry + // Non-blocking: if enrichment fails, we retry with original content + let retryUserContent = currentUserContent + try { + const sim = this.providerRef.deref()?.getSelfImprovingManager?.() + if (sim?.preventionEngine && this.resilienceService) { + const classifiedError = sim.preventionEngine + .getErrorClassifier() + .classify(rawErrorMessage) + // Extract text from current user content for enrichment + const originalText = currentUserContent + .filter((c): c is Anthropic.TextBlockParam => c.type === "text") + .map((c) => c.text) + .join("\n") + if (originalText) { + const enriched = await this.resilienceService.generateRecoveryContext( + classifiedError, + originalText, + state?.experiments, + ) + if (enriched !== originalText) { + // Replace text blocks with enriched version + retryUserContent = [ + { type: "text" as const, text: enriched }, + ...currentUserContent.filter((c) => c.type !== "text"), + ] + console.log( + `[Task#${this.taskId}.${this.instanceId}] Recovery context injected for retry`, + ) + } + } + } + } catch (recoveryError) { + // Graceful fallback — retry with original content + console.error( + `[Task#${this.taskId}.${this.instanceId}] Recovery context injection failed: ${recoveryError instanceof Error ? recoveryError.message : String(recoveryError)}`, + ) + } + + // Push content back onto the stack to retry, incrementing the retry attempt counter stack.push({ - userContent: currentUserContent, + userContent: retryUserContent, includeFileDetails: false, retryAttempt: (currentItem.retryAttempt ?? 0) + 1, }) @@ -3699,6 +3872,7 @@ export class Task extends EventEmitter implements TaskLike { undefined, // todoList this.api.getModel().id, provider.getSkillsManager(), + provider.getSelfImprovingManager(), ) })() } @@ -4505,6 +4679,26 @@ export class Task extends EventEmitter implements TaskLike { } this.toolUsage[toolName].attempts++ + + // Record tool usage in insights engine for session analysis + this.providerRef.deref()?.getSelfImprovingManager?.()?.insightsEngine?.recordToolUsage(toolName) + } + + /** + * Get prevention context for a tool call before execution. + * Returns warnings and suggestions that can be injected into the model's context. + */ + public getToolPreventionContext( + toolName: string, + params: Record, + ): string | null { + const sim = this.providerRef.deref()?.getSelfImprovingManager?.() + if (!sim?.preventionEngine) { + return null + } + + const context = sim.preventionEngine.getPreventionContext(toolName, params) + return sim.preventionEngine.generatePreventionMessage(context) } public recordToolError(toolName: ToolName, error?: string) { @@ -4517,6 +4711,15 @@ export class Task extends EventEmitter implements TaskLike { if (error) { this.emit(RooCodeEventName.TaskToolFailed, this.taskId, toolName, error) } + + // Record tool error in insights engine for session analysis + this.providerRef.deref()?.getSelfImprovingManager?.()?.insightsEngine?.recordError(toolName, error) + + // Record tool error in prevention engine for cascade tracking + const sim = this.providerRef.deref()?.getSelfImprovingManager?.() + if (sim?.preventionEngine) { + sim.preventionEngine.recordToolResult(toolName, error ?? "unknown error", {}) + } } // Getters diff --git a/src/core/task/__tests__/Task.spec.ts b/src/core/task/__tests__/Task.spec.ts index 6a65c858f9..e8e59734fe 100644 --- a/src/core/task/__tests__/Task.spec.ts +++ b/src/core/task/__tests__/Task.spec.ts @@ -1485,6 +1485,64 @@ describe("Cline", () => { // Restore console.error consoleErrorSpy.mockRestore() }) + + it("records a user correction when replying to an interactive ask", async () => { + const recordUserCorrection = vi.fn().mockResolvedValue(undefined) + mockProvider.getSelfImprovingManager = vi.fn().mockReturnValue({ recordUserCorrection }) + + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "initial task", + startTask: false, + }) + + const handleResponseSpy = vi.spyOn(task, "handleWebviewAskResponse") + ;(task as any).interactiveAsk = { type: "ask", text: "Need clarification" } + + await task.submitUserMessage("here is the correction") + + expect(recordUserCorrection).toHaveBeenCalledWith({ + taskId: task.taskId, + success: false, + corrected: true, + }) + expect(handleResponseSpy).toHaveBeenCalledWith("messageResponse", "here is the correction", []) + }) + + it("does not wait for correction telemetry before handling the user response", async () => { + let resolveCorrection: (() => void) | undefined + const recordUserCorrection = vi.fn( + () => + new Promise((resolve) => { + resolveCorrection = resolve + }), + ) + mockProvider.getSelfImprovingManager = vi.fn().mockReturnValue({ recordUserCorrection }) + + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "initial task", + startTask: false, + }) + + const handleResponseSpy = vi.spyOn(task, "handleWebviewAskResponse") + ;(task as any).interactiveAsk = { type: "ask", text: "Need clarification" } + + const submitPromise = task.submitUserMessage("here is the correction") + await Promise.resolve() + + expect(recordUserCorrection).toHaveBeenCalledWith({ + taskId: task.taskId, + success: false, + corrected: true, + }) + expect(handleResponseSpy).toHaveBeenCalledWith("messageResponse", "here is the correction", []) + + resolveCorrection?.() + await submitPromise + }) }) }) diff --git a/src/core/tools/AttemptCompletionTool.ts b/src/core/tools/AttemptCompletionTool.ts index a70576d75f..e53ce23043 100644 --- a/src/core/tools/AttemptCompletionTool.ts +++ b/src/core/tools/AttemptCompletionTool.ts @@ -10,6 +10,8 @@ import type { ToolUse } from "../../shared/tools" import { t } from "../../i18n" import { BaseTool, ToolCallbacks } from "./BaseTool" +import { RequirementsVerifier } from "../../services/self-improving/RequirementsVerifier" +import { VerificationEngine } from "../../services/self-improving/VerificationEngine" interface AttemptCompletionParams { result: string @@ -36,10 +38,48 @@ interface DelegationProvider { export class AttemptCompletionTool extends BaseTool<"attempt_completion"> { readonly name = "attempt_completion" as const + /** + * Tracks the last result text per task to guard against duplicate completions. + * Unlike a permanent boolean flag, this allows new attempt_completion calls + * with different result content (e.g., after user feedback or a new task cycle). + * Only exact duplicate result text is blocked. + */ + private static lastResults = new Map() + + /** Optional requirements verifier for checking user intent fulfillment */ + private requirementsVerifier?: RequirementsVerifier + /** Optional verification engine for code quality checks */ + private verificationEngine?: VerificationEngine + + /** + * Set the verifiers used to guard completion. + */ + setVerifiers(requirementsVerifier?: RequirementsVerifier, verificationEngine?: VerificationEngine): void { + this.requirementsVerifier = requirementsVerifier + this.verificationEngine = verificationEngine + } + + /** + * Resets the completion tracking state. Used in tests to prevent + * cross-test contamination from the static Map. + */ + static reset(): void { + AttemptCompletionTool.lastResults.clear() + } + async execute(params: AttemptCompletionParams, task: Task, callbacks: AttemptCompletionCallbacks): Promise { const { result } = params const { handleError, pushToolResult, askFinishSubTaskApproval } = callbacks + // Guard: block only duplicate result text, not ALL future completions + const lastResult = AttemptCompletionTool.lastResults.get(task.taskId) + if (lastResult !== undefined && lastResult === result) { + pushToolResult( + formatResponse.toolResult("Task already completed with the same result. No further action needed."), + ) + return + } + // Prevent attempt_completion if any tool failed in the current turn if (task.didToolFailInCurrentTurn) { const errorMsg = t("common:errors.attempt_completion_tool_failed") @@ -76,6 +116,39 @@ export class AttemptCompletionTool extends BaseTool<"attempt_completion"> { return } + // Accumulate requirements from ALL user messages before verification + if (this.requirementsVerifier) { + const userMessages = this.getAllUserMessages(task) + this.requirementsVerifier.processUserMessages(userMessages) + } + + // Guard 5: Requirements verification — check user intent is fulfilled + if (this.requirementsVerifier) { + const reqResult = await this.requirementsVerifier.verify() + if (!reqResult.passed && this.requirementsVerifier.getConfig().mandatory) { + const errorMsg = `Requirements verification failed:\n${reqResult.summary}\n\nFailed requirements:\n${reqResult.failed.map((r) => ` ❌ ${r.text}`).join("\n")}\n\nPending requirements:\n${reqResult.pending.map((r) => ` ⏳ ${r.text}`).join("\n")}\n\nPlease address these requirements before completing the task.` + task.consecutiveMistakeCount++ + task.recordToolError("attempt_completion") + pushToolResult(formatResponse.toolError(errorMsg)) + return + } + } + + // Guard 6: Code quality verification (VerificationEngine) + // Skip verification for research mode — research tasks don't need build/lint/types/tests + const currentMode = await task.getTaskMode() + const isResearchMode = currentMode.includes("research") + if (this.verificationEngine && !isResearchMode) { + const verResult = await this.verificationEngine.verify() + if (!verResult.passed && this.verificationEngine.getConfig().mandatory) { + const errorMsg = `Code quality verification failed:\n${verResult.summary}\n\nFailed gates:\n${verResult.gates.filter((g) => !g.passed).map((g) => ` ❌ ${g.name}: ${g.error || "failed"}`).join("\n")}\n\nPlease fix these issues before completing the task.` + task.consecutiveMistakeCount++ + task.recordToolError("attempt_completion") + pushToolResult(formatResponse.toolError(errorMsg)) + return + } + } + task.consecutiveMistakeCount = 0 await task.say("completion_result", result, undefined, false) @@ -105,7 +178,7 @@ export class AttemptCompletionTool extends BaseTool<"attempt_completion"> { pushToolResult, ) if (delegation === "delegated") { - this.emitTaskCompleted(task) + this.emitTaskCompleted(task, result) } if (delegation !== "continue") return } else { @@ -132,10 +205,18 @@ export class AttemptCompletionTool extends BaseTool<"attempt_completion"> { const { response, text, images } = await task.ask("completion_result", "", false) if (response === "yesButtonClicked") { - this.emitTaskCompleted(task) + this.emitTaskCompleted(task, result) return } + // User provided feedback - reset completion tracking so subsequent + // attempt_completion calls are not blocked by stale guard state. + AttemptCompletionTool.lastResults.delete(task.taskId) + const provider = task.providerRef.deref() + if (provider?.trustService) { + provider.trustService.taskCompleted = false + } + // User provided feedback - push tool result to continue the conversation await task.say("user_feedback", text ?? "", images) @@ -196,7 +277,39 @@ export class AttemptCompletionTool extends BaseTool<"attempt_completion"> { } } - private emitTaskCompleted(task: Task): void { + /** + * Extract all user messages from the task's conversation history. + * Includes the initial task prompt and all user_feedback messages. + */ + private getAllUserMessages(task: Task): string[] { + const messages: string[] = [] + + // Get the initial task prompt from metadata + if (task.metadata?.task) { + messages.push(task.metadata.task) + } + + // Get user feedback messages from clineMessages + const clineMessages = task.clineMessages || [] + for (const msg of clineMessages) { + if (msg.type === "say" && msg.say === "user_feedback" && msg.text) { + messages.push(msg.text) + } + } + + return messages + } + + private emitTaskCompleted(task: Task, result: string): void { + // Store the result text to guard against duplicate completions + AttemptCompletionTool.lastResults.set(task.taskId, result) + + // Notify TrustService that task has completed to block auto-approval of subsequent attempt_completion + const provider = task.providerRef.deref() + if (provider?.trustService) { + provider.trustService.taskCompleted = true + } + // Force final token usage update before emitting TaskCompleted. // This ensures the latest stats are captured regardless of throttle timer. task.emitFinalTokenUsageUpdate() diff --git a/src/core/tools/CodebaseSearchTool.ts b/src/core/tools/CodebaseSearchTool.ts index f0d906fabd..9191427e6c 100644 --- a/src/core/tools/CodebaseSearchTool.ts +++ b/src/core/tools/CodebaseSearchTool.ts @@ -71,6 +71,11 @@ export class CodebaseSearchTool extends BaseTool<"codebase_search"> { } const searchResults: VectorStoreSearchResult[] = await manager.searchIndex(query, directoryPrefix) + await task.providerRef.deref()?.getSelfImprovingManager?.()?.recordCodeIndexEvent(task.taskId, { + available: true, + hits: searchResults.length, + topScore: searchResults[0]?.score, + }) if (!searchResults || searchResults.length === 0) { pushToolResult(`No relevant code snippets found for the query: "${query}"`) diff --git a/src/core/tools/__tests__/CodebaseSearchTool.spec.ts b/src/core/tools/__tests__/CodebaseSearchTool.spec.ts new file mode 100644 index 0000000000..f6aae41659 --- /dev/null +++ b/src/core/tools/__tests__/CodebaseSearchTool.spec.ts @@ -0,0 +1,68 @@ +import * as vscode from "vscode" + +import { CodebaseSearchTool } from "../CodebaseSearchTool" +import { CodeIndexManager } from "../../../services/code-index/manager" + +vi.mock("vscode", () => ({ + workspace: { + asRelativePath: vi.fn((filePath: string) => filePath.replace("/workspace/", "")), + }, +})) + +vi.mock("../../../services/code-index/manager", () => ({ + CodeIndexManager: { + getInstance: vi.fn(), + }, +})) + +describe("CodebaseSearchTool", () => { + it("records self-improving code index hit details from search results", async () => { + const recordCodeIndexEvent = vi.fn().mockResolvedValue(undefined) + const getSelfImprovingManager = vi.fn().mockReturnValue({ recordCodeIndexEvent }) + const searchIndex = vi.fn().mockResolvedValue([ + { + payload: { + filePath: "/workspace/src/example.ts", + startLine: 10, + endLine: 20, + codeChunk: "const answer = 42", + }, + score: 0.87, + }, + ]) + vi.mocked(CodeIndexManager.getInstance).mockReturnValue({ + isFeatureEnabled: true, + isFeatureConfigured: true, + searchIndex, + } as any) + + const task = { + cwd: "/workspace", + taskId: "task-1", + consecutiveMistakeCount: 0, + providerRef: { + deref: vi.fn().mockReturnValue({ + context: {}, + getSelfImprovingManager, + }), + }, + say: vi.fn().mockResolvedValue(undefined), + } as any + const callbacks = { + askApproval: vi.fn().mockResolvedValue(true), + handleError: vi.fn().mockResolvedValue(undefined), + pushToolResult: vi.fn(), + } + + const tool = new CodebaseSearchTool() + await tool.execute({ query: "find the answer" }, task, callbacks) + + expect(recordCodeIndexEvent).toHaveBeenCalledWith("task-1", { + available: true, + hits: 1, + topScore: 0.87, + }) + expect(callbacks.handleError).not.toHaveBeenCalled() + expect(vscode.workspace.asRelativePath).toHaveBeenCalledWith("/workspace/src/example.ts", false) + }) +}) diff --git a/src/core/tools/__tests__/attemptCompletionTool.spec.ts b/src/core/tools/__tests__/attemptCompletionTool.spec.ts index 6c9b9a2ccc..135a385149 100644 --- a/src/core/tools/__tests__/attemptCompletionTool.spec.ts +++ b/src/core/tools/__tests__/attemptCompletionTool.spec.ts @@ -38,7 +38,7 @@ vi.mock("../../../shared/package", () => ({ }, })) -import { attemptCompletionTool, AttemptCompletionCallbacks } from "../AttemptCompletionTool" +import { AttemptCompletionTool, attemptCompletionTool, AttemptCompletionCallbacks } from "../AttemptCompletionTool" import { Task } from "../../task/Task" import * as vscode from "vscode" @@ -52,6 +52,7 @@ describe("attemptCompletionTool", () => { let mockGetConfiguration: ReturnType beforeEach(() => { + AttemptCompletionTool.reset() mockCaptureTaskCompleted.mockReset() mockPushToolResult = vi.fn() mockAskApproval = vi.fn() @@ -79,10 +80,12 @@ describe("attemptCompletionTool", () => { emitFinalTokenUsageUpdate: vi.fn(), emit: vi.fn(), getTokenUsage: vi.fn().mockReturnValue({}), + getTaskMode: vi.fn().mockResolvedValue("code"), toolUsage: {}, taskId: "task_1", apiConfiguration: { apiProvider: "test" } as any, api: { getModel: vi.fn().mockReturnValue({ id: "test-model", info: {} }) } as any, + providerRef: { deref: vi.fn().mockReturnValue(undefined) } as any, } }) diff --git a/src/core/tools/validateToolUse.ts b/src/core/tools/validateToolUse.ts index 243a170ed9..209973ef14 100644 --- a/src/core/tools/validateToolUse.ts +++ b/src/core/tools/validateToolUse.ts @@ -160,8 +160,8 @@ export function isToolAllowedForMode( // These should be allowed if the mcp group is allowed for the mode const isDynamicMcpTool = tool.startsWith("mcp_") - if (experiments && Object.values(EXPERIMENT_IDS).includes(tool as ExperimentId)) { - if (!experiments[tool]) { + if (experiments && Object.values(EXPERIMENT_IDS).includes(tool as string as ExperimentId)) { + if (!experiments[tool as ExperimentId]) { return false } } diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 3f5af94cae..9f882ad640 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -75,6 +75,9 @@ import { ShadowCheckpointService } from "../../services/checkpoints/ShadowCheckp import { CodeIndexManager } from "../../services/code-index/manager" import type { IndexProgressUpdate } from "../../services/code-index/interfaces/manager" import { MdmService } from "../../services/mdm/MdmService" +import { SelfImprovingManager, RequirementsVerifier, VerificationEngine, LLMConflictResolver } from "../../services/self-improving" +import { TrustService } from "../../services/self-improving/TrustService" +import { QuestionEvaluatorService } from "../../services/self-improving/QuestionEvaluatorService" import { SkillsManager } from "../../services/skills/SkillsManager" import { fileExistsAtPath } from "../../utils/fs" @@ -84,6 +87,7 @@ import { getWorkspacePath } from "../../utils/path" import { OrganizationAllowListViolationError } from "../../utils/errors" import { setPanel } from "../../activate/registerCommands" +import { attemptCompletionTool } from "../tools/AttemptCompletionTool" import { t } from "../../i18n" @@ -163,6 +167,8 @@ export class ClineProvider public readonly latestAnnouncementId = "may-2026-v3.55.0-mimo-handoff-stability" // v3.55.0 Xiaomi MiMo, upstream handoff updates, stability fixes public readonly providerSettingsManager: ProviderSettingsManager public readonly customModesManager: CustomModesManager + public readonly selfImprovingManager: SelfImprovingManager + public readonly trustService: TrustService constructor( readonly context: vscode.ExtensionContext, @@ -208,6 +214,11 @@ export class ClineProvider this.customModesManager = new CustomModesManager(this.context, async () => { await this.postStateToWebviewWithoutClineMessages() + // Re-create auto modes when .roomodes changes — CustomModesManager reloads + // merged state and may overwrite auto-created modes. + if (this.selfImprovingManager) { + await this.selfImprovingManager.recreateModes() + } }) // Initialize MCP Hub through the singleton manager @@ -226,6 +237,65 @@ export class ClineProvider this.log(`Failed to initialize Skills Manager: ${error}`) }) + // Initialize Self-Improving Manager (experiment-gated, zero overhead when disabled) + this.selfImprovingManager = new SelfImprovingManager({ + globalStoragePath: this.contextProxy.globalStorageUri.fsPath, + logger: { + appendLine: (message: string) => this.log(message), + }, + getExperiments: () => this.getGlobalStateSafe("experiments"), + getMemoryBackend: () => this.getGlobalStateSafe("memoryBackend"), + getAgentMemoryUrl: () => this.getGlobalStateSafe("agentMemoryUrl"), + getSelfImprovingScope: () => this.getGlobalStateSafe("selfImprovingScope"), + getAutoSkillsScope: () => this.getGlobalStateSafe("selfImprovingAutoSkillsScope"), + getWorkspacePath: () => this.currentWorkspacePath, + skillsManager: this.skillsManager, + }) + + // Wire CustomModesManager into ModeFactoryService for auto mode creation + this.selfImprovingManager.setCustomModesManager(this.customModesManager) + + // Initialize TrustService for auto-approval (experiment-gated) + this.trustService = new TrustService( + { + appendLine: (message: string) => this.log(message), + }, + { + enabled: this.getGlobalStateSafe("experiments")?.selfImprovingFullTrust ?? false, + }, + ) + + // Wire RequirementsVerifier and VerificationEngine into AttemptCompletionTool + const experiments = this.getGlobalStateSafe("experiments") + if (experiments?.requirementsVerification) { + const apiConfiguration = this.contextProxy.getProviderSettings() + const conflictResolver = apiConfiguration?.apiProvider + ? new LLMConflictResolver(apiConfiguration) + : undefined // falls back to KeywordConflictResolver + const requirementsVerifier = new RequirementsVerifier( + { appendLine: (message: string) => this.log(message) }, + { mandatory: true, autoExtract: true, requireAllVerified: true }, + conflictResolver, + ) + const verificationEngine = new VerificationEngine( + { appendLine: (message: string) => this.log(message) }, + { + checkBuild: true, + buildCommand: "pnpm build", + checkLint: true, + lintCommand: "pnpm lint", + checkTypes: true, + typeCheckCommand: "pnpm check-types", + checkTests: true, + testCommand: "pnpm test", + mandatory: true, + gateTimeoutMs: 120_000, + }, + ) + attemptCompletionTool.setVerifiers(requirementsVerifier, verificationEngine) + this.log("[ClineProvider] Requirements verification wired into AttemptCompletionTool") + } + this.marketplaceManager = new MarketplaceManager(this.context, this.customModesManager) // Forward task events to the provider. @@ -233,10 +303,60 @@ export class ClineProvider this.taskCreationCallback = (instance: Task) => { this.emit(RooCodeEventName.TaskCreated, instance) + const recordTaskCompletionForLearning = (success: boolean) => { + void instance + .getTaskMode() + .catch(() => defaultModeSlug) + .then((mode) => + this.selfImprovingManager.recordTaskCompletion({ + taskId: instance.taskId, + mode, + workspacePath: this.currentWorkspacePath, + success, + toolNames: instance.toolUsage ? Object.keys(instance.toolUsage) : undefined, + errorKey: !success && instance.abortReason ? instance.abortReason : undefined, + toolIterationCount: instance.toolUsage + ? Object.values(instance.toolUsage).reduce( + (sum, toolStat) => sum + toolStat.attempts, + 0, + ) + : undefined, + }), + ) + .catch((error) => { + this.log( + `[SelfImproving] recordTaskCompletion error: ${error instanceof Error ? error.message : String(error)}`, + ) + }) + } + // Create named listener functions so we can remove them later. const onTaskStarted = () => this.emit(RooCodeEventName.TaskStarted, instance.taskId) const onTaskCompleted = (taskId: string, tokenUsage: TokenUsage, toolUsage: ToolUsage) => { this.emit(RooCodeEventName.TaskCompleted, taskId, tokenUsage, toolUsage) + + // Feed task completion into self-improving system + recordTaskCompletionForLearning(true) + this.selfImprovingManager.triggerReview().catch((error) => { + this.log( + `[SelfImproving] triggerReview error: ${error instanceof Error ? error.message : String(error)}`, + ) + }) + + // Notify resilience service of task success to reset recovery state + this.selfImprovingManager.resilienceService.onTaskSuccess() + } + const onTaskUserMessageForLearning = (_taskId: string) => { + this.selfImprovingManager.recordUserTurn().catch((error) => { + this.log( + `[SelfImproving] recordUserTurn error: ${error instanceof Error ? error.message : String(error)}`, + ) + }) + this.selfImprovingManager.triggerReview().catch((error) => { + this.log( + `[SelfImproving] triggerReview error: ${error instanceof Error ? error.message : String(error)}`, + ) + }) } const onTaskAborted = async () => { this.emit(RooCodeEventName.TaskAborted, instance.taskId) @@ -266,6 +386,19 @@ export class ClineProvider }`, ) } + + // Feed task abortion into self-improving system + recordTaskCompletionForLearning(false) + + // Only trigger review on genuine streaming failures, not user-initiated cancels + // (user may resume an aborted task, making the failure signal premature) + if (instance.abortReason === "streaming_failed") { + this.selfImprovingManager.triggerReview().catch((error) => { + this.log( + `[SelfImproving] triggerReview error: ${error instanceof Error ? error.message : String(error)}`, + ) + }) + } } const onTaskFocused = () => this.emit(RooCodeEventName.TaskFocused, instance.taskId) const onTaskUnfocused = () => this.emit(RooCodeEventName.TaskUnfocused, instance.taskId) @@ -283,6 +416,7 @@ export class ClineProvider // Attach the listeners. instance.on(RooCodeEventName.TaskStarted, onTaskStarted) instance.on(RooCodeEventName.TaskCompleted, onTaskCompleted) + instance.on(RooCodeEventName.TaskUserMessage, onTaskUserMessageForLearning) instance.on(RooCodeEventName.TaskAborted, onTaskAborted) instance.on(RooCodeEventName.TaskFocused, onTaskFocused) instance.on(RooCodeEventName.TaskUnfocused, onTaskUnfocused) @@ -300,6 +434,7 @@ export class ClineProvider this.taskEventListeners.set(instance, [ () => instance.off(RooCodeEventName.TaskStarted, onTaskStarted), () => instance.off(RooCodeEventName.TaskCompleted, onTaskCompleted), + () => instance.off(RooCodeEventName.TaskUserMessage, onTaskUserMessageForLearning), () => instance.off(RooCodeEventName.TaskAborted, onTaskAborted), () => instance.off(RooCodeEventName.TaskFocused, onTaskFocused), () => instance.off(RooCodeEventName.TaskUnfocused, onTaskUnfocused), @@ -394,6 +529,15 @@ export class ClineProvider this.log("Cloud profile synchronization is disabled in compatibility mode") } + /** + * Initialize the self-improving manager. + * Called from extension activation after provider construction. + * No-op when experiment is disabled (zero overhead guarantee). + */ + async initializeSelfImproving(): Promise { + await this.selfImprovingManager.initialize() + } + // Adds a new Task instance to clineStack, marking the start of a new task. // The instance is pushed to the top of the stack (LIFO order). // When the task is completed, the top instance is removed, reactivating the @@ -603,6 +747,7 @@ export class ClineProvider this.mcpHub = undefined await this.skillsManager?.dispose() this.skillsManager = undefined + await this.selfImprovingManager.dispose() this.marketplaceManager?.cleanup() this.customModesManager?.dispose() this.taskHistoryStore.dispose() @@ -986,6 +1131,9 @@ export class ClineProvider // Preserve the status from the history item to avoid overwriting it when the task saves messages initialStatus: historyItem.status, }) + task.questionEvaluator = this.selfImprovingManager.questionEvaluator + task.toolErrorHealer = this.selfImprovingManager.toolErrorHealer + task.resilienceService = this.selfImprovingManager.resilienceService if (isRehydratingCurrentTask) { // Replace the current task in-place to avoid UI flicker @@ -1864,8 +2012,18 @@ export class ClineProvider await this.postStateToWebview() } + private getGlobalStateSafe(key: K): GlobalState[K] | undefined { + return this.contextProxy.getGlobalState(key) + } + async refreshWorkspace() { + const previousWorkspacePath = this.currentWorkspacePath this.currentWorkspacePath = getWorkspacePath() + + if (previousWorkspacePath !== this.currentWorkspacePath) { + await this.selfImprovingManager.onSettingsChanged(this.getGlobalStateSafe("experiments")) + } + await this.postStateToWebview() } @@ -2094,6 +2252,10 @@ export class ClineProvider imageGenerationProvider, openRouterImageApiKey, openRouterImageGenerationSelectedModel, + memoryBackend, + agentMemoryUrl, + selfImprovingScope, + selfImprovingAutoSkillsScope, lockApiConfigAcrossModes, } = await this.getState() @@ -2264,6 +2426,7 @@ export class ClineProvider followupAutoApproveTimeoutMs: followupAutoApproveTimeoutMs ?? 60000, includeDiagnosticMessages: includeDiagnosticMessages ?? true, maxDiagnosticMessages: maxDiagnosticMessages ?? 50, + selfImprovingStatus: await this.selfImprovingManager.getStatus(), includeTaskHistoryInEnhance: includeTaskHistoryInEnhance ?? true, includeCurrentTime: includeCurrentTime ?? true, includeCurrentCost: includeCurrentCost ?? true, @@ -2272,6 +2435,10 @@ export class ClineProvider imageGenerationProvider, openRouterImageApiKey, openRouterImageGenerationSelectedModel, + memoryBackend, + agentMemoryUrl, + selfImprovingScope, + selfImprovingAutoSkillsScope, openAiCodexIsAuthenticated: await (async () => { try { const { openAiCodexOAuthManager } = await import("../../integrations/openai-codex/oauth") @@ -2468,6 +2635,10 @@ export class ClineProvider imageGenerationProvider: stateValues.imageGenerationProvider, openRouterImageApiKey: stateValues.openRouterImageApiKey, openRouterImageGenerationSelectedModel: stateValues.openRouterImageGenerationSelectedModel, + memoryBackend: stateValues.memoryBackend, + agentMemoryUrl: stateValues.agentMemoryUrl, + selfImprovingScope: stateValues.selfImprovingScope, + selfImprovingAutoSkillsScope: stateValues.selfImprovingAutoSkillsScope, } } @@ -2646,6 +2817,10 @@ export class ClineProvider return this.skillsManager } + public getSelfImprovingManager(): SelfImprovingManager { + return this.selfImprovingManager + } + /** * Check if the current state is compliant with MDM policy * @returns true if compliant or no MDM policy exists, false if MDM policy exists and user is non-compliant @@ -2693,6 +2868,11 @@ export class ClineProvider // Update the current workspace manager reference this.codeIndexManager = currentManager + // Wire CodeIndexManager into self-improving system for semantic search + if (currentManager) { + this.selfImprovingManager.setCodeIndexManager(currentManager) + } + // Subscribe to the new manager's progress updates if it exists if (currentManager) { this.codeIndexStatusSubscription = currentManager.onProgressUpdate((update: IndexProgressUpdate) => { @@ -2864,6 +3044,9 @@ export class ClineProvider startTask: false, ...options, }) + task.questionEvaluator = this.selfImprovingManager.questionEvaluator + task.toolErrorHealer = this.selfImprovingManager.toolErrorHealer + task.resilienceService = this.selfImprovingManager.resilienceService await this.addClineToStack(task) task.start() diff --git a/src/core/webview/__tests__/ClineProvider.apiHandlerRebuild.spec.ts b/src/core/webview/__tests__/ClineProvider.apiHandlerRebuild.spec.ts index 9d81880d4a..7b2909273b 100644 --- a/src/core/webview/__tests__/ClineProvider.apiHandlerRebuild.spec.ts +++ b/src/core/webview/__tests__/ClineProvider.apiHandlerRebuild.spec.ts @@ -9,6 +9,8 @@ import { ContextProxy } from "../../config/ContextProxy" import { Task, TaskOptions } from "../../task/Task" import { ClineProvider } from "../ClineProvider" +const mockWorkspacePath = vi.hoisted(() => ({ value: "/test/workspace-one" })) + // Mock setup vi.mock("fs/promises", () => ({ mkdir: vi.fn().mockResolvedValue(undefined), @@ -24,6 +26,10 @@ vi.mock("../../../utils/storage", () => ({ getGlobalStoragePath: vi.fn().mockResolvedValue("/test/storage/path"), })) +vi.mock("../../../utils/path", () => ({ + getWorkspacePath: vi.fn(() => mockWorkspacePath.value), +})) + vi.mock("p-wait-for", () => ({ __esModule: true, default: vi.fn().mockResolvedValue(undefined), @@ -143,6 +149,7 @@ describe("ClineProvider - API Handler Rebuild Guard", () => { beforeEach(async () => { vi.clearAllMocks() + mockWorkspacePath.value = "/test/workspace-one" if (!TelemetryService.hasInstance()) { TelemetryService.createInstance([]) @@ -579,4 +586,29 @@ describe("ClineProvider - API Handler Rebuild Guard", () => { expect(getModelId({})).toBeUndefined() }) }) + + test("includes self-improving scope and memory backend settings in provider state", async () => { + await (provider as any).setValue("selfImprovingScope", "workspace") + await (provider as any).setValue("selfImprovingAutoSkillsScope", "global") + await (provider as any).setValue("memoryBackend", "agentmemory") + await (provider as any).setValue("agentMemoryUrl", "http://agentmemory.internal:4001") + + const state = await provider.getState() + + expect((state as any).selfImprovingScope).toBe("workspace") + expect((state as any).selfImprovingAutoSkillsScope).toBe("global") + expect((state as any).memoryBackend).toBe("agentmemory") + expect((state as any).agentMemoryUrl).toBe("http://agentmemory.internal:4001") + }) + + test("refreshWorkspace reconfigures self-improving when the workspace path changes", async () => { + const onSettingsChanged = vi.fn().mockResolvedValue(undefined) + ;(provider as any).selfImprovingManager.onSettingsChanged = onSettingsChanged + await (provider as any).setValue("selfImprovingScope", "workspace") + mockWorkspacePath.value = "/test/workspace-two" + + await provider.refreshWorkspace() + + expect(onSettingsChanged).toHaveBeenCalledTimes(1) + }) }) diff --git a/src/core/webview/__tests__/ClineProvider.flicker-free-cancel.spec.ts b/src/core/webview/__tests__/ClineProvider.flicker-free-cancel.spec.ts index 4bb01347a3..ec3e737cf9 100644 --- a/src/core/webview/__tests__/ClineProvider.flicker-free-cancel.spec.ts +++ b/src/core/webview/__tests__/ClineProvider.flicker-free-cancel.spec.ts @@ -133,6 +133,7 @@ describe("ClineProvider flicker-free cancel", () => { getValue: vi.fn().mockReturnValue(undefined), setValue: vi.fn().mockResolvedValue(undefined), getProviderSettings: vi.fn().mockReturnValue(mockApiConfig), + getGlobalState: vi.fn().mockReturnValue(undefined), extensionUri: mockContext.extensionUri, globalStorageUri: mockContext.globalStorageUri, } diff --git a/src/core/webview/__tests__/webviewMessageHandler.spec.ts b/src/core/webview/__tests__/webviewMessageHandler.spec.ts index ebea3c90b6..29e797c5bf 100644 --- a/src/core/webview/__tests__/webviewMessageHandler.spec.ts +++ b/src/core/webview/__tests__/webviewMessageHandler.spec.ts @@ -61,6 +61,9 @@ const mockFetchOpenAiCodexRateLimitInfo = vi.mocked(fetchOpenAiCodexRateLimitInf const mockClineProvider = { getState: vi.fn(), postMessageToWebview: vi.fn(), + selfImprovingManager: { + onSettingsChanged: vi.fn(), + }, customModesManager: { getCustomModes: vi.fn(), deleteCustomMode: vi.fn(), @@ -76,6 +79,7 @@ const mockClineProvider = { }, setValue: vi.fn(), getValue: vi.fn(), + getGlobalState: vi.fn(), }, log: vi.fn(), postStateToWebview: vi.fn(), @@ -863,6 +867,35 @@ describe("webviewMessageHandler - mcpEnabled", () => { }) }) +describe("webviewMessageHandler - self-improving memory settings", () => { + beforeEach(() => { + vi.clearAllMocks() + vi.mocked(mockClineProvider.contextProxy.getGlobalState).mockReturnValue(undefined) + }) + + it("persists self-improving scope and memory backend settings and refreshes self-improving runtime", async () => { + await webviewMessageHandler(mockClineProvider, { + type: "updateSettings", + updatedSettings: { + selfImprovingScope: "workspace", + selfImprovingAutoSkillsScope: "global", + memoryBackend: "agentmemory", + agentMemoryUrl: "http://agentmemory.internal:4001", + } as any, + }) + + expect(mockClineProvider.contextProxy.setValue).toHaveBeenCalledWith("selfImprovingScope", "workspace") + expect(mockClineProvider.contextProxy.setValue).toHaveBeenCalledWith("selfImprovingAutoSkillsScope", "global") + expect(mockClineProvider.contextProxy.setValue).toHaveBeenCalledWith("memoryBackend", "agentmemory") + expect(mockClineProvider.contextProxy.setValue).toHaveBeenCalledWith( + "agentMemoryUrl", + "http://agentmemory.internal:4001", + ) + expect(mockClineProvider.selfImprovingManager.onSettingsChanged).toHaveBeenCalledWith(undefined) + expect(mockClineProvider.postStateToWebview).toHaveBeenCalledTimes(1) + }) +}) + describe("webviewMessageHandler - requestCommands", () => { beforeEach(() => { vi.clearAllMocks() diff --git a/src/core/webview/generateSystemPrompt.ts b/src/core/webview/generateSystemPrompt.ts index 8af2f5ff5d..433563e672 100644 --- a/src/core/webview/generateSystemPrompt.ts +++ b/src/core/webview/generateSystemPrompt.ts @@ -64,6 +64,7 @@ export const generateSystemPrompt = async (provider: ClineProvider, message: Web undefined, // todoList undefined, // modelId provider.getSkillsManager(), + provider.getSelfImprovingManager(), ) return systemPrompt diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index d0af86423b..22ea0b64f3 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -662,6 +662,9 @@ export const webviewMessageHandler = async ( case "updateSettings": if (message.updatedSettings) { + let experimentsUpdated = false + let selfImprovingSettingsUpdated = false + for (const [key, value] of Object.entries(message.updatedSettings)) { let newValue = value @@ -740,17 +743,35 @@ export const webviewMessageHandler = async ( continue } + experimentsUpdated = true newValue = { ...(getGlobalState("experiments") ?? experimentDefault), ...(value as Record), } + } else if ( + key === "memoryBackend" || + key === "agentMemoryUrl" || + key === "selfImprovingScope" || + key === "selfImprovingAutoSkillsScope" + ) { + selfImprovingSettingsUpdated = true } else if (key === "customSupportPrompts") { if (!value) { continue } } - await provider.contextProxy.setValue(key as keyof RooCodeSettings, newValue) + try { + await provider.contextProxy.setValue(key as keyof RooCodeSettings, newValue) + } catch (error) { + console.error(`[Settings] Failed to save ${key}:`, error) + } + } + + if (experimentsUpdated || selfImprovingSettingsUpdated) { + await provider.selfImprovingManager.onSettingsChanged( + provider.contextProxy.getGlobalState("experiments"), + ) } await provider.postStateToWebview() diff --git a/src/extension.ts b/src/extension.ts index 44c1243528..246f31a376 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -198,6 +198,13 @@ export async function activate(context: vscode.ExtensionContext) { // Initialize the provider *before* the Roo Code Cloud service. const provider = new ClineProvider(context, outputChannel, "sidebar", contextProxy, mdmService) + // Initialize self-improving manager (experiment-gated, zero overhead when disabled) + provider.initializeSelfImproving().catch((error) => { + outputChannel.appendLine( + `[SelfImproving] Initialization error: ${error instanceof Error ? error.message : String(error)}`, + ) + }) + // Initialize Roo Code Cloud service. const postStateListener = () => ClineProvider.getVisibleInstance()?.postStateToWebviewWithoutClineMessages() diff --git a/src/i18n/locales/en/skills.json b/src/i18n/locales/en/skills.json index 307b59d365..447947e4af 100644 --- a/src/i18n/locales/en/skills.json +++ b/src/i18n/locales/en/skills.json @@ -3,6 +3,7 @@ "name_length": "Skill name must be 1-{{maxLength}} characters (got {{length}})", "name_format": "Skill name must be lowercase letters/numbers/hyphens only (no leading/trailing hyphen, no consecutive hyphens)", "description_length": "Skill description must be 1-1024 characters (got {{length}})", + "invalid_structure": "Invalid SKILL.md structure: {{reason}}", "no_workspace": "Cannot create project skill: no workspace folder is open", "already_exists": "Skill \"{{name}}\" already exists at {{path}}", "not_found": "Skill \"{{name}}\" not found in {{source}}{{modeInfo}}", diff --git a/src/services/checkpoints/ShadowCheckpointService.ts b/src/services/checkpoints/ShadowCheckpointService.ts index 3e3d3d0653..7ad2364b2f 100644 --- a/src/services/checkpoints/ShadowCheckpointService.ts +++ b/src/services/checkpoints/ShadowCheckpointService.ts @@ -95,9 +95,6 @@ function createSanitizedGit(baseDir: string): SimpleGit { config: [], // --template="" stops git copying hooks/templates into the shadow repo (axis 1). // GIT_TEMPLATE_DIR is stripped from the env above to block the env-var path (axis 2). - // allowUnsafeTemplateDir opts out of simple-git ≥3.36's blockUnsafeOperationsPlugin - // so the --template arg is not rejected before reaching git. - unsafe: { allowUnsafeTemplateDir: true }, } // Create git instance and set the sanitized environment diff --git a/src/services/self-improving/ActionExecutor.ts b/src/services/self-improving/ActionExecutor.ts new file mode 100644 index 0000000000..8c2ae2876b --- /dev/null +++ b/src/services/self-improving/ActionExecutor.ts @@ -0,0 +1,479 @@ +import * as fs from "fs/promises" +import * as path from "path" +import crypto from "crypto" + +import type { MemoryBackend } from "./MemoryBackend" +import type { SkillProvenance, SkillUsageStore } from "./SkillUsageStore" +import type { ImprovementAction, Logger } from "./types" + +interface SkillMutationManager { + createSkillFromContent( + name: string, + source: "global" | "project", + description: string, + content: string, + modeSlugs?: string[], + ): Promise + updateSkillContent(name: string, source: "global" | "project", content: string, mode?: string): Promise + getSkillContent?(name: string, currentMode?: string): Promise<{ instructions: string } | null> +} + +/** + * Bundled asset reference for SKILL_CREATE_FROM_SCRATCH. + */ +interface BundledAsset { + /** Relative path within the skill directory (e.g., "scripts/validate.sh") */ + relativePath: string + /** File content as string */ + content: string +} + +/** + * ActionExecutor - consumes the pending action queue and executes + * improvement actions transactionally. + * + * Each action type maps to a specific executor: + * - PROMPT_ENRICHMENT: writes to MemoryStore (environment) + * - ERROR_AVOIDANCE: writes to MemoryStore (environment, with error tags) + * - TOOL_PREFERENCE: writes to MemoryStore (environment, with tool tags) + * - SKILL_SUGGESTION: records in SkillUsageStore for future user approval + * - SKILL_CREATE / SKILL_UPDATE: safely mutate agent-managed skills via SkillsManager + * + * Actions are removed from the queue only after successful execution. + * Failed actions remain pending for later retry. + */ +export class ActionExecutor { + private readonly memoryStore: MemoryBackend + private readonly skillUsageStore: SkillUsageStore + private readonly logger: Logger + private readonly skillsManager?: SkillMutationManager + + constructor( + memoryStore: MemoryBackend, + skillUsageStore: SkillUsageStore, + logger: Logger, + skillsManager?: SkillMutationManager, + ) { + this.memoryStore = memoryStore + this.skillUsageStore = skillUsageStore + this.logger = logger + this.skillsManager = skillsManager + } + + /** + * Execute a single improvement action. + * Returns true if the action was executed successfully. + */ + async execute(action: ImprovementAction): Promise { + try { + let executed = false + + switch (action.actionType) { + case "PROMPT_ENRICHMENT": + executed = await this.executePromptEnrichment(action) + break + case "ERROR_AVOIDANCE": + executed = await this.executeErrorAvoidance(action) + break + case "TOOL_PREFERENCE": + executed = await this.executeToolPreference(action) + break + case "SKILL_SUGGESTION": + executed = await this.executeSkillSuggestion(action) + break + case "SKILL_CREATE": + executed = await this.executeSkillCreate(action) + break + case "SKILL_UPDATE": + executed = await this.executeSkillUpdate(action) + break + case "SKILL_MERGE": + executed = await this.executeSkillMerge(action) + break + case "SKILL_CREATE_FROM_SCRATCH": + executed = await this.executeSkillCreateFromScratch(action) + break + default: + this.logger.appendLine(`[ActionExecutor] Unknown action type: ${action.actionType}`) + return false + } + + this.logger.appendLine( + `[ActionExecutor] ${executed ? "OK" : "DEF"} ${action.actionType} ${action.id} | ${(action as any).description?.substring(0, 100) ?? ""}`, + ) + + return executed + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error) + // Silently skip "already exists" errors — skill was already created by a prior cycle + if (errorMsg.toLowerCase().includes("already exists")) { + this.logger.appendLine(`[ActionExecutor] Skill already exists, skipping: ${action.id}`) + return true // Don't count as error — skill is already present + } + this.logger.appendLine( + `[ActionExecutor] Execution error for ${action.id}: ${errorMsg}`, + ) + return false + } + } + + /** + * Execute a batch of actions. + * Returns the set of successfully executed action IDs. + */ + async executeBatch(actions: ImprovementAction[]): Promise> { + const succeeded = new Set() + + for (const action of actions) { + const ok = await this.execute(action) + if (ok) { + succeeded.add(action.id) + } + } + + return succeeded + } + + private async executePromptEnrichment(action: ImprovementAction): Promise { + const summary = this.readStringPayload(action.payload.summary) + if (!summary) { + return false + } + + await this.memoryStore.store({ + content: summary, + source: "learning", + tags: ["learned", "prompt"], + }) + + return true + } + + private async executeErrorAvoidance(action: ImprovementAction): Promise { + const summary = this.readStringPayload(action.payload.summary) + const errorKeys = this.readStringArrayPayload(action.payload.errorKeys) + + if (!summary) { + return false + } + + await this.memoryStore.store({ + content: summary, + source: "learning", + tags: ["error-avoidance", ...errorKeys.map((key) => `error:${key}`)], + }) + + return true + } + + private async executeToolPreference(action: ImprovementAction): Promise { + const summary = this.readStringPayload(action.payload.summary) + const toolNames = this.readStringArrayPayload(action.payload.toolNames) + + if (!summary) { + return false + } + + await this.memoryStore.store({ + content: summary, + source: "learning", + tags: ["tool-preference", ...toolNames.map((toolName) => `tool:${toolName}`)], + }) + + return true + } + + private async executeSkillSuggestion(action: ImprovementAction): Promise { + const summary = this.readStringPayload(action.payload.summary) + if (!summary) { + return false + } + + const skillName = this.readStringPayload(action.payload.skillName) ?? summary + const skillId = + this.readStringPayload(action.payload.skillId) ?? + `suggested:${crypto.createHash("sha256").update(skillName.toLowerCase()).digest("hex").slice(0, 16)}` + const createdBy = this.readSkillProvenance(action.payload.createdBy) ?? "agent" + + this.skillUsageStore.getOrCreate(skillId, skillName, createdBy) + this.logger.appendLine(`[ActionExecutor] Skill suggestion recorded: ${summary}`) + + return true + } + + private async executeSkillCreate(action: ImprovementAction): Promise { + if (!this.skillsManager) { + this.logger.appendLine("[ActionExecutor] skillsManager not available — deferring SKILL_CREATE") + return false + } + + const skillName = this.readStringPayload(action.payload.skillName) + const description = this.readStringPayload(action.payload.description) + const content = this.readStringPayload(action.payload.content) + const source = this.readSkillSource(action.payload.source) + const modeSlugs = this.readStringArrayPayload(action.payload.modeSlugs) + const skillId = this.readStringPayload(action.payload.skillId) ?? this.buildSkillId(skillName, source) + const createdBy = this.readSkillProvenance(action.payload.createdBy) ?? "agent" + + if (!skillName || !description || !content || !source || !skillId) { + return false + } + + await this.skillsManager.createSkillFromContent(skillName, source, description, content, modeSlugs) + this.skillUsageStore.getOrCreate(skillId, skillName, createdBy) + this.logger.appendLine(`[ActionExecutor] Skill created: ${skillName}`) + return true + } + + private async executeSkillUpdate(action: ImprovementAction): Promise { + if (!this.skillsManager) { + return false + } + + const skillName = this.readStringPayload(action.payload.skillName) + const content = this.readStringPayload(action.payload.content) + const source = this.readSkillSource(action.payload.source) + const mode = this.readStringPayload(action.payload.mode) + const skillId = this.readStringPayload(action.payload.skillId) ?? this.buildSkillId(skillName, source) + + if (!skillName || !content || !source || !skillId) { + return false + } + + // Deduplication check: skip update if skill content hasn't changed + if (this.skillsManager.getSkillContent) { + try { + const existing = await this.skillsManager.getSkillContent(skillName, mode) + if (existing && existing.instructions.trim() === content.trim()) { + this.logger.appendLine(`[ActionExecutor] Skill content unchanged for ${skillName}, skipping update`) + return false + } + } catch { + // If we can't read the existing content, proceed with update + this.logger.appendLine( + `[ActionExecutor] Could not read existing skill content for ${skillName}, proceeding with update`, + ) + } + } + + await this.skillsManager.updateSkillContent(skillName, source, content, mode) + this.skillUsageStore.getOrCreate(skillId, skillName, "agent") + await this.skillUsageStore.bumpPatch(skillId) + this.logger.appendLine(`[ActionExecutor] Skill updated: ${skillName}`) + return true + } + + private async executeSkillMerge(action: ImprovementAction): Promise { + const umbrellaName = this.readStringPayload(action.payload.umbrellaName) + const absorbNamesRaw = action.payload.absorbNames + const absorbNames: string[] = Array.isArray(absorbNamesRaw) + ? absorbNamesRaw.filter((n): n is string => typeof n === "string" && n.trim().length > 0) + : [] + const newContent = this.readStringPayload(action.payload.content) + + if (!umbrellaName || absorbNames.length === 0) { + return false + } + + // 1. Create or update the umbrella skill + if (newContent) { + const source = this.readSkillSource(action.payload.source) ?? "global" + const skillId = `skill:${source}:${umbrellaName}` + const description = + this.readStringPayload(action.payload.description) ?? `Umbrella skill merging ${absorbNames.join(", ")}` + const modeSlugs = this.readStringArrayPayload(action.payload.modeSlugs) + + if (this.skillsManager) { + const existing = this.skillUsageStore.get(skillId) + if (existing) { + await this.skillsManager.updateSkillContent(umbrellaName, source, newContent, modeSlugs[0]) + await this.skillUsageStore.bumpPatch(skillId) + } else { + await this.skillsManager.createSkillFromContent( + umbrellaName, + source, + description, + newContent, + modeSlugs, + ) + this.skillUsageStore.getOrCreate(skillId, umbrellaName, "agent") + } + } else { + this.skillUsageStore.getOrCreate(skillId, umbrellaName, "agent") + } + } + + // 2. Mark each absorbed skill + for (const absorbName of absorbNames) { + const source = this.readSkillSource(action.payload.source) ?? "global" + const absorbId = `skill:${source}:${absorbName}` + this.skillUsageStore.setAbsorbedInto(absorbId, umbrellaName) + await this.skillUsageStore.transitionState(absorbId, "archived") + this.logger.appendLine(`[ActionExecutor] Merged ${absorbName} into ${umbrellaName}`) + } + + this.logger.appendLine(`[ActionExecutor] Merge complete: ${absorbNames.length} skills → ${umbrellaName}`) + return true + } + + /** + * Execute SKILL_CREATE_FROM_SCRATCH — creates a full specialized skill + * with proper frontmatter, instructions, and optional bundled assets. + * + * Payload fields: + * - name: skill name (validated per agentskills.io spec) + * - description: skill description + * - instructions: full markdown body (without frontmatter) + * - modeSlugs?: string[] — mode restrictions + * - tools?: string[] — tool references for the skill + * - assets?: BundledAsset[] — referenced scripts/assets to bundle + * - source: "global" | "project" + */ + private async executeSkillCreateFromScratch(action: ImprovementAction): Promise { + if (!this.skillsManager) { + this.logger.appendLine("[ActionExecutor] skillsManager not available — deferring SKILL_CREATE_FROM_SCRATCH") + return false + } + + const name = this.readStringPayload(action.payload.name) + const description = this.readStringPayload(action.payload.description) + const instructions = this.readStringPayload(action.payload.instructions) + const source = this.readSkillSource(action.payload.source) + const modeSlugs = this.readStringArrayPayload(action.payload.modeSlugs) + const tools = this.readStringArrayPayload(action.payload.tools) + const assets = this.readAssetsPayload(action.payload.assets) + const skillId = this.readStringPayload(action.payload.skillId) ?? this.buildSkillId(name, source) + const createdBy = this.readSkillProvenance(action.payload.createdBy) ?? "agent" + + if (!name || !description || !instructions || !source || !skillId) { + this.logger.appendLine( + `[ActionExecutor] SKILL_CREATE_FROM_SCRATCH missing required fields: name=${!!name} desc=${!!description} instr=${!!instructions} source=${!!source}`, + ) + return false + } + + // Validate skill name format + const { validateSkillName } = await import("@roo-code/types") + const validation = validateSkillName(name) + if (!validation.valid) { + this.logger.appendLine( + `[ActionExecutor] SKILL_CREATE_FROM_SCRATCH invalid skill name "${name}": ${validation.error}`, + ) + return false + } + + // Build full SKILL.md content with frontmatter + const frontmatterLines = [`name: ${name}`, `description: ${description}`] + if (modeSlugs.length > 0) { + frontmatterLines.push("modeSlugs:") + for (const slug of modeSlugs) { + frontmatterLines.push(` - ${slug}`) + } + } + if (tools.length > 0) { + frontmatterLines.push("tools:") + for (const tool of tools) { + frontmatterLines.push(` - ${tool}`) + } + } + + const fullContent = `--- +${frontmatterLines.join("\n")} +--- + +${instructions.trim()} +` + + // Create the skill via SkillsManager + await this.skillsManager.createSkillFromContent(name, source, description, fullContent, modeSlugs) + + // Bundle referenced assets if provided + if (assets.length > 0) { + await this.bundleAssets(name, source, assets) + } + + this.skillUsageStore.getOrCreate(skillId, name, createdBy) + this.logger.appendLine(`[ActionExecutor] Specialized skill created from scratch: ${name}`) + return true + } + + /** + * Bundle referenced assets into the skill directory. + */ + private async bundleAssets( + skillName: string, + source: "global" | "project", + assets: BundledAsset[], + ): Promise { + try { + // Determine skill directory path + const { getGlobalRooDirectory } = await import("../roo-config") + let baseDir: string + if (source === "global") { + baseDir = getGlobalRooDirectory() + } else { + // For project skills, we need the cwd — best-effort + baseDir = process.cwd() + } + + const skillDir = path.join(baseDir, "skills", skillName) + await fs.mkdir(skillDir, { recursive: true }) + + for (const asset of assets) { + const assetPath = path.join(skillDir, asset.relativePath) + await fs.mkdir(path.dirname(assetPath), { recursive: true }) + await fs.writeFile(assetPath, asset.content, "utf-8") + this.logger.appendLine(`[ActionExecutor] Bundled asset: ${asset.relativePath}`) + } + } catch (error) { + this.logger.appendLine( + `[ActionExecutor] Asset bundling error for "${skillName}": ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + + /** + * Parse bundled assets from payload. + */ + private readAssetsPayload(value: unknown): BundledAsset[] { + if (!Array.isArray(value)) { + return [] + } + + return value.filter( + (item): item is BundledAsset => + typeof item === "object" && + item !== null && + typeof (item as any).relativePath === "string" && + typeof (item as any).content === "string", + ) + } + + private readStringPayload(value: unknown): string | undefined { + return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined + } + + private readStringArrayPayload(value: unknown): string[] { + if (!Array.isArray(value)) { + return [] + } + + return Array.from( + new Set(value.filter((item): item is string => typeof item === "string" && item.trim().length > 0)), + ) + } + + private readSkillProvenance(value: unknown): SkillProvenance | undefined { + return value === "agent" || value === "user" || value === "bundled" || value === "hub" || value === "unknown" + ? value + : undefined + } + + private readSkillSource(value: unknown): "global" | "project" | undefined { + return value === "global" || value === "project" ? value : undefined + } + + private buildSkillId(skillName: string | undefined, source: "global" | "project" | undefined): string | undefined { + return skillName && source ? `skill:${source}:${skillName}` : undefined + } +} diff --git a/src/services/self-improving/AgentMemoryAdapter.ts b/src/services/self-improving/AgentMemoryAdapter.ts new file mode 100644 index 0000000000..534048b317 --- /dev/null +++ b/src/services/self-improving/AgentMemoryAdapter.ts @@ -0,0 +1,289 @@ +import type { MemoryEntry } from "@roo-code/types" + +import type { MemoryBackend, MemoryBackendType } from "./MemoryBackend" +import type { Logger } from "./types" + +/** + * Default agentmemory server URL + */ +const DEFAULT_AGENTMEMORY_URL = "http://localhost:3111" + +/** + * Raw shape from agentmemory search API response. + * Search returns { results: [{ observation: { id, narrative, ... }, score }] } + * NOT the same shape as the remember/write endpoints. + */ +type AgentMemorySearchObservation = { + id: string + narrative?: string + content?: string + title?: string + timestamp?: string + confidence?: number + concepts?: string[] + metadata?: Record +} + +/** + * AgentMemoryAdapter — implements MemoryBackend via agentmemory REST API. + * + * agentmemory (https://github.com/rohitg00/agentmemory) is a service-first + * memory system. This adapter connects to its REST API when the server is + * running, and gracefully degrades to no-op when it's not. + * + * Key REST endpoints used: + * POST /agentmemory/observe — store an observation + * POST /agentmemory/search — semantic search + * POST /agentmemory/remember — recall recent memories + * POST /agentmemory/forget — remove a memory + * GET /agentmemory/livez — health check + */ +export class AgentMemoryAdapter implements MemoryBackend { + private readonly baseUrl: string + private readonly logger: Logger + private available = false + private healthCheckInterval: ReturnType | null = null + private initialized = false + + constructor(logger: Logger, baseUrl?: string) { + this.baseUrl = baseUrl || DEFAULT_AGENTMEMORY_URL + this.logger = logger + } + + get backendType(): MemoryBackendType { + return "agentmemory" + } + + /** + * Initialize the adapter — check if agentmemory server is available. + */ + async initialize(): Promise { + if (this.initialized) return + + this.available = await this.checkHealth() + + if (this.available) { + this.logger.appendLine(`[AgentMemoryAdapter] Connected to agentmemory at ${this.baseUrl}`) + } else { + this.logger.appendLine( + `[AgentMemoryAdapter] agentmemory server not available at ${this.baseUrl} — will degrade gracefully`, + ) + } + + this.healthCheckInterval = setInterval(async () => { + this.available = await this.checkHealth() + }, 30000) + + this.initialized = true + } + + /** + * Check if agentmemory server is healthy. + */ + private async checkHealth(): Promise { + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), 2000) + + try { + const response = await fetch(`${this.baseUrl}/agentmemory/livez`, { + signal: controller.signal, + }) + + return response.ok + } catch { + return false + } finally { + clearTimeout(timeout) + } + } + + /** + * Make a POST request to agentmemory API. + */ + private async post(path: string, body: unknown): Promise { + if (!this.available) return null + + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), 5000) + + try { + const response = await fetch(`${this.baseUrl}${path}`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + signal: controller.signal, + }) + + if (!response.ok) { + this.logger.appendLine(`[AgentMemoryAdapter] POST ${path} failed: ${response.status}`) + return null + } + + return (await response.json()) as T + } catch (error) { + this.logger.appendLine( + `[AgentMemoryAdapter] POST ${path} error: ${error instanceof Error ? error.message : String(error)}`, + ) + this.available = false + return null + } finally { + clearTimeout(timeout) + } + } + + /** + * Store a memory entry via agentmemory observe endpoint. + * agentmemory v3 requires: hookType, sessionId, project, cwd, timestamp, narrative. + */ + async store(entry: Omit): Promise { + const sessionId = `zoo-code-${Date.now().toString(36)}` + const result = await this.post<{ observationId: string }>("/agentmemory/observe", { + hookType: "conversation", + sessionId, + project: "zoo-code", + cwd: typeof process !== "undefined" && process.cwd ? process.cwd() : "/", + timestamp: new Date().toISOString(), + narrative: entry.content, + title: (entry.content ?? "").slice(0, 120), + metadata: { + source: entry.source, + tags: entry.tags, + relevanceScore: entry.relevanceScore, + expiresAt: entry.expiresAt, + } as Record, + }) + + if (!result) return null + + return { + id: result.observationId, + content: entry.content, + source: entry.source, + createdAt: Date.now(), + updatedAt: Date.now(), + relevanceScore: entry.relevanceScore, + tags: entry.tags, + expiresAt: entry.expiresAt, + } + } + + /** + * Search memory entries via agentmemory search endpoint. + * Search API returns: { results: [{ observation: { id, narrative, ... }, score }] } + */ + async search(query: string, maxResults: number = 10): Promise { + const result = await this.post<{ + results: Array<{ observation: AgentMemorySearchObservation }> + }>("/agentmemory/search", { + query, + limit: maxResults, + }) + + if (!result?.results) return [] + + return result.results + .map((entry) => this.mapSearchObservationToMemoryEntry(entry.observation)) + .filter((entry): entry is MemoryEntry => entry !== undefined) + } + + /** + * Recall recent memory entries via agentmemory search with broad query. + * Uses a generic query to retrieve recent entries instead of the + * /remember endpoint which requires a specific content string. + */ + async recall(maxResults: number = 20): Promise { + const result = await this.post<{ + results: Array<{ observation: AgentMemorySearchObservation }> + }>("/agentmemory/search", { + query: "recent activity task user system", + limit: maxResults, + }) + + if (!result?.results) return [] + + return result.results + .map((entry) => this.mapSearchObservationToMemoryEntry(entry.observation)) + .filter((entry): entry is MemoryEntry => entry !== undefined) + } + + /** + * Remove a memory entry by ID via agentmemory forget endpoint. + */ + async forget(id: string): Promise { + const result = await this.post<{ success: boolean }>("/agentmemory/forget", { id }) + return result?.success === true + } + + /** + * Remove entries matching content substring. + * Uses agentmemory search + forget pattern. + */ + async forgetByContent(substring: string): Promise { + const normalized = substring.trim().toLowerCase() + if (!normalized) { + return 0 + } + + const entries = await this.search(substring.trim(), 50) + let removed = 0 + + for (const entry of entries) { + if (entry.content.toLowerCase().includes(normalized)) { + const ok = await this.forget(entry.id) + if (ok) removed += 1 + } + } + + return removed + } + + /** + * Get backend statistics. + */ + async getStats(): Promise<{ entryCount: number; backend: string }> { + if (!this.available) { + return { entryCount: 0, backend: "agentmemory (unavailable)" } + } + + const memories = await this.recall(1000) + return { + entryCount: memories.length, + backend: "agentmemory", + } + } + + /** + * Clear all entries via agentmemory governance delete. + */ + async clear(): Promise { + await this.post("/agentmemory/governance/bulk-delete", { all: true }) + } + + /** + * Dispose the adapter — stop health check interval. + */ + async dispose(): Promise { + if (this.healthCheckInterval) { + clearInterval(this.healthCheckInterval) + this.healthCheckInterval = null + } + + this.available = false + this.initialized = false + } + + private mapSearchObservationToMemoryEntry(obs: AgentMemorySearchObservation): MemoryEntry | undefined { + if (!obs || typeof obs.id !== "string") return undefined + + return { + id: obs.id, + content: obs.narrative ?? obs.content ?? "", + source: (obs.metadata?.source as MemoryEntry["source"]) || "learning", + createdAt: obs.timestamp ? new Date(obs.timestamp).getTime() : Date.now(), + updatedAt: obs.timestamp ? new Date(obs.timestamp).getTime() : Date.now(), + relevanceScore: (obs.confidence as number) ?? (obs.metadata?.relevanceScore as number | undefined), + tags: obs.concepts ?? (obs.metadata?.tags as string[] | undefined) ?? [], + expiresAt: obs.metadata?.expiresAt as number | undefined, + } + } +} diff --git a/src/services/self-improving/AutoModeOrchestrator.ts b/src/services/self-improving/AutoModeOrchestrator.ts new file mode 100644 index 0000000000..455417971b --- /dev/null +++ b/src/services/self-improving/AutoModeOrchestrator.ts @@ -0,0 +1,321 @@ +import type { LearnedPattern } from "./types" +import type { Logger } from "./types" +import type { ModeFactoryService } from "./ModeFactoryService" +import type { PatternAnalyzer } from "./PatternAnalyzer" +import type { ToolErrorHealer } from "./ToolErrorHealer" +import type { ResilienceService } from "./ResilienceService" + +export interface AutoModeConfig { + enabled: boolean + autoCreateModes: boolean + autoHeal: boolean + minPatternConfidence: number + minPatternFrequency: number + reviewIntervalMs: number +} + +const DEFAULT_CONFIG: AutoModeConfig = { + enabled: true, + autoCreateModes: true, + autoHeal: true, + minPatternConfidence: 0.3, + minPatternFrequency: 2, + reviewIntervalMs: 30000, +} + +export class AutoModeOrchestrator { + private logger: Logger + private modeFactory: ModeFactoryService | null = null + private patternAnalyzer: PatternAnalyzer | null = null + private toolErrorHealer: ToolErrorHealer | null = null + private resilienceService: ResilienceService | null = null + private config: AutoModeConfig + private autoReviewTimer: ReturnType | null = null + private lastModeCreationTime: number = 0 + private createdModeSlugs: Set = new Set() + + /** Callback to retrieve current patterns from the learning store */ + private getPatterns: (() => LearnedPattern[]) | null = null + + /** Tracks consecutive failures for auto-heal recovery decisions */ + private failureCount: number = 0 + private lastFailureTool: string | null = null + private lastFailureMessage: string | null = null + + constructor( + logger: Logger, + config?: Partial, + healer?: ToolErrorHealer, + resilience?: ResilienceService, + ) { + this.logger = logger + this.config = { ...DEFAULT_CONFIG, ...config } + this.toolErrorHealer = healer ?? null + this.resilienceService = resilience ?? null + } + + setModeFactory(factory: ModeFactoryService): void { + this.modeFactory = factory + } + + setPatternAnalyzer(analyzer: PatternAnalyzer): void { + this.patternAnalyzer = analyzer + } + + setToolErrorHealer(healer: ToolErrorHealer): void { + this.toolErrorHealer = healer + } + + setResilienceService(resilience: ResilienceService): void { + this.resilienceService = resilience + } + + /** + * Register a callback to retrieve patterns from the learning store. + * This avoids coupling to SelfImprovingManager directly. + */ + setPatternProvider(provider: () => LearnedPattern[]): void { + this.getPatterns = provider + } + + getConfig(): AutoModeConfig { + return { ...this.config } + } + + updateConfig(updates: Partial): void { + this.config = { ...this.config, ...updates } + this.logger.appendLine(`[AutoMode] Config updated: ${JSON.stringify(updates)}`) + } + + async start(): Promise { + if (!this.config.enabled) { + this.logger.appendLine("[AutoMode] Auto mode is disabled, not starting") + return + } + + this.logger.appendLine("[AutoMode] Starting auto mode orchestrator") + + if (this.autoReviewTimer) { + clearInterval(this.autoReviewTimer) + } + this.autoReviewTimer = setInterval(() => { + this.onAutoReviewTick().catch((error) => { + this.logger.appendLine( + `[AutoMode] Auto review tick error: ${error instanceof Error ? error.message : String(error)}`, + ) + }) + }, this.config.reviewIntervalMs) + + this.logger.appendLine(`[AutoMode] Auto review timer started (interval: ${this.config.reviewIntervalMs}ms)`) + } + + stop(): void { + if (this.autoReviewTimer) { + clearInterval(this.autoReviewTimer) + this.autoReviewTimer = null + } + this.logger.appendLine("[AutoMode] Auto mode orchestrator stopped") + } + + /** + * Record a failure with tool and message context for auto-heal recovery decisions. + */ + recordFailure(toolName?: string, errorMessage?: string): void { + this.lastFailureTool = toolName ?? this.lastFailureTool + this.lastFailureMessage = errorMessage ?? this.lastFailureMessage + } + + /** + * Called after each task completion to trigger auto mode processing. + */ + async onTaskCompleted(success: boolean): Promise { + if (!this.config.enabled) return + + if (!success && this.config.autoHeal) { + await this.autoHeal() + } + + if (this.config.autoCreateModes) { + const now = Date.now() + if (now - this.lastModeCreationTime > 5 * 60 * 1000) { + await this.autoCreateModes() + this.lastModeCreationTime = now + } + } + } + + /** + * Auto-heal: detect failure patterns and attempt recovery actions. + * Recovery strategies: + * 1. Same tool failed 3+ times → suggest different approach + * 2. Model stuck in loop → inject "try a different strategy" message + * 3. Missing tool parameter → provide fix directly + */ + private async autoHeal(): Promise { + if (!this.patternAnalyzer) { + this.logger.appendLine("[AutoMode] Cannot auto-heal: PatternAnalyzer not set") + return + } + + this.failureCount++ + this.logger.appendLine(`[AutoMode] Auto-heal: failure #${this.failureCount} detected, attempting recovery`) + + // Strategy 1: Tool parameter error → use ToolErrorHealer for known fixes + if ( + this.lastFailureMessage?.includes("Missing required parameter") && + this.lastFailureTool && + this.toolErrorHealer + ) { + // Extract the missing parameter name from the error message + // Expected format: "Missing required parameter: " + const paramMatch = this.lastFailureMessage.match(/Missing required parameter[:\s]+(\S+)/i) + const missingParam = paramMatch?.[1] ?? "unknown" + + const fix = this.toolErrorHealer.handleToolError(this.lastFailureTool, missingParam) + if (fix) { + this.logger.appendLine( + `[AutoMode] Auto-heal: applied tool error fix for ${this.lastFailureTool}.${missingParam} — ${fix.fix}`, + ) + // Reset state after applying fix + this.failureCount = 0 + this.lastFailureTool = null + this.lastFailureMessage = null + return + } + } + + // Strategy 2: Transient/streaming failure → use ResilienceService for retry scheduling + if (this.resilienceService && this.failureCount < 3) { + const delay = this.resilienceService.onStreamingFailure() + if (delay > 0) { + this.logger.appendLine( + `[AutoMode] Auto-heal: scheduled retry in ${delay}ms (failure #${this.failureCount})`, + ) + return + } + if (delay === -1) { + this.logger.appendLine(`[AutoMode] Auto-heal: max retries exceeded, entering recovery mode`) + // Reset counter so next failure can trigger strategy change + this.failureCount = 0 + return + } + } + + // Strategy 3: Same tool failed 3+ times → suggest different approach + if (this.failureCount >= 3 && this.lastFailureTool) { + this.logger.appendLine( + `[AutoMode] Recovery: Tool "${this.lastFailureTool}" failed ${this.failureCount}+ times. Suggesting alternative approach.`, + ) + this.failureCount = 0 + this.lastFailureTool = null + this.lastFailureMessage = null + return + } + + // Strategy 4: Model stuck in loop (rapid consecutive failures) → inject strategy change + if (this.failureCount >= 5) { + this.logger.appendLine( + `[AutoMode] Recovery: ${this.failureCount} consecutive failures detected. Injecting strategy change signal.`, + ) + this.failureCount = 0 + this.lastFailureTool = null + this.lastFailureMessage = null + return + } + + // Fallback: queue for pattern analysis + this.logger.appendLine("[AutoMode] Auto-heal: failure queued for pattern analysis") + } + + /** + * Auto-create custom modes from high-confidence patterns. + */ + private async autoCreateModes(): Promise { + if (!this.modeFactory) { + this.logger.appendLine("[AutoMode] Cannot create modes: ModeFactory not set") + return + } + + if (!this.getPatterns) { + this.logger.appendLine("[AutoMode] Cannot create modes: pattern provider not set") + return + } + + try { + const allPatterns = this.getPatterns() + const candidatePatterns = this.getCandidatePatterns(allPatterns) + + if (candidatePatterns.length === 0) { + this.logger.appendLine("[AutoMode] No candidate patterns for mode creation") + return + } + + this.logger.appendLine(`[AutoMode] Found ${candidatePatterns.length} candidate patterns for mode creation`) + + const created = await this.modeFactory.createModesFromPatterns(candidatePatterns) + + for (const slug of created) { + this.createdModeSlugs.add(slug) + } + + if (created.length > 0) { + this.logger.appendLine(`[AutoMode] Created ${created.length} custom modes: ${created.join(", ")}`) + } + } catch (error) { + this.logger.appendLine( + `[AutoMode] Auto-create modes error: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + + private getCandidatePatterns(patterns: LearnedPattern[]): LearnedPattern[] { + return patterns.filter((p) => { + if (!p.context?.toolNames || p.context.toolNames.length === 0) return false + if ((p.confidenceScore ?? 0) < this.config.minPatternConfidence) return false + if ((p.frequency ?? 0) < this.config.minPatternFrequency) return false + const slug = this.deriveSlugFromPattern(p) + if (slug && this.createdModeSlugs.has(slug)) return false + return true + }) + } + + private deriveSlugFromPattern(pattern: LearnedPattern): string | null { + const toolNames = pattern.context?.toolNames + if (!toolNames || toolNames.length === 0) return null + const base = toolNames.slice(0, 2).join("-") + const sanitized = base.replace(/[^a-zA-Z0-9-]/g, "-").toLowerCase() + const truncated = sanitized.slice(0, 64) + return truncated.replace(/^-+|-+$/g, "") || null + } + + /** + * Called on the auto review timer tick. + * Triggers auto-mode-specific processing after reviews. + */ + private async onAutoReviewTick(): Promise { + if (this.config.autoCreateModes) { + const now = Date.now() + if (now - this.lastModeCreationTime > 5 * 60 * 1000) { + await this.autoCreateModes() + this.lastModeCreationTime = now + } + } + } + + /** + * Get the current auto mode status for display. + */ + getStatus(): Record { + return { + autoModeEnabled: this.config.enabled, + autoCreateModes: this.config.autoCreateModes, + autoHeal: this.config.autoHeal, + reviewIntervalMs: this.config.reviewIntervalMs, + createdModes: this.createdModeSlugs.size, + createdModeSlugs: Array.from(this.createdModeSlugs), + lastModeCreation: this.lastModeCreationTime + ? new Date(this.lastModeCreationTime).toLocaleTimeString() + : "never", + } + } +} diff --git a/src/services/self-improving/CascadeTracker.ts b/src/services/self-improving/CascadeTracker.ts new file mode 100644 index 0000000000..e7d4d63de3 --- /dev/null +++ b/src/services/self-improving/CascadeTracker.ts @@ -0,0 +1,118 @@ +import { ErrorCategory } from "./ErrorClassifier" + +interface ErrorEvent { + timestamp: number + toolName: string + category: ErrorCategory + message: string +} + +interface CascadeChain { + rootError: ErrorEvent + chain: ErrorEvent[] + isActive: boolean +} + +export class CascadeTracker { + private recentErrors: ErrorEvent[] = [] + private activeCascade: CascadeChain | null = null + private readonly CASCADE_WINDOW_MS = 30_000 // 30 seconds + private readonly MAX_CHAIN_LENGTH = 10 + + recordError(toolName: string, category: ErrorCategory, message: string): void { + const event: ErrorEvent = { + timestamp: Date.now(), + toolName, + category, + message, + } + + this.recentErrors.push(event) + this.pruneOldErrors() + this.detectCascade(event) + } + + private detectCascade(event: ErrorEvent): void { + if (!this.activeCascade) { + // Start a new cascade + this.activeCascade = { + rootError: event, + chain: [event], + isActive: true, + } + return + } + + const timeSinceRoot = event.timestamp - this.activeCascade.rootError.timestamp + + if ( + timeSinceRoot < this.CASCADE_WINDOW_MS && + this.activeCascade.chain.length < this.MAX_CHAIN_LENGTH + ) { + this.activeCascade.chain.push(event) + } else { + // Cascade expired or too long — archive and start new + this.activeCascade.isActive = false + this.activeCascade = { + rootError: event, + chain: [event], + isActive: true, + } + } + } + + getActiveCascade(): CascadeChain | null { + if (this.activeCascade && this.activeCascade.isActive) { + const age = Date.now() - this.activeCascade.rootError.timestamp + if (age < this.CASCADE_WINDOW_MS) { + return this.activeCascade + } + this.activeCascade.isActive = false + } + return null + } + + getRecentErrors(toolName?: string, count: number = 5): ErrorEvent[] { + const filtered = toolName + ? this.recentErrors.filter((e) => e.toolName === toolName) + : this.recentErrors + return filtered.slice(-count) + } + + getCascadeSuggestion(): string | null { + const cascade = this.getActiveCascade() + if (!cascade || cascade.chain.length < 2) { + return null + } + + const uniqueTools = [...new Set(cascade.chain.map((e) => e.toolName))] + + if (cascade.chain.some((e) => e.category === ErrorCategory.TOOL_NOT_FOUND)) { + return `⚠️ Cascade failure detected: ${cascade.chain.length} errors in ${uniqueTools.join(", ")}. Tool not available. Use alternative approach.` + } + + if (cascade.chain.some((e) => e.category === ErrorCategory.DIRECTORY_CONFUSION)) { + return `⚠️ Cascade failure detected: repeatedly trying to read directories as files. Use list_files first.` + } + + if (cascade.chain.some((e) => e.category === ErrorCategory.MODEL_THOUGHT_FAILURE)) { + return `⚠️ Cascade failure detected: model struggling. Break down the task into smaller steps.` + } + + if (cascade.chain.some((e) => e.category === ErrorCategory.FILE_NOT_FOUND)) { + return `⚠️ Cascade failure detected: ${cascade.chain.length} file-not-found errors. Verify paths before reading.` + } + + return `⚠️ ${cascade.chain.length} consecutive errors detected. Consider changing approach.` + } + + private pruneOldErrors(): void { + const cutoff = Date.now() - 300_000 // 5 minutes + this.recentErrors = this.recentErrors.filter((e) => e.timestamp > cutoff) + } + + reset(): void { + this.recentErrors = [] + this.activeCascade = null + } +} diff --git a/src/services/self-improving/CodeIndexAdapter.ts b/src/services/self-improving/CodeIndexAdapter.ts new file mode 100644 index 0000000000..fce15b6167 --- /dev/null +++ b/src/services/self-improving/CodeIndexAdapter.ts @@ -0,0 +1,116 @@ +import type { CodeIndexInfo, Logger } from "./types" +import type { CodeIndexManager } from "../code-index/manager" +import type { VectorStoreSearchResult } from "../code-index/interfaces/vector-store" + +export interface CodeSearchResult { + filePath: string + score: number + snippet?: string + line?: number +} + +/** + * CodeIndexAdapter - real adapter bridging CodeIndexManager into the self-improving system. + * + * Provides semantic search, file indexing, and availability checks + * backed by the full CodeIndexManager (Qdrant + embedders). + * Gracefully degrades when the manager is not initialized. + */ +export class CodeIndexAdapter { + private codeIndexManager: CodeIndexManager | undefined + + constructor( + private readonly logger?: Logger, + codeIndexManager?: CodeIndexManager, + ) { + this.codeIndexManager = codeIndexManager + } + + setCodeIndexManager(manager: CodeIndexManager): void { + this.codeIndexManager = manager + } + + getInfo(): CodeIndexInfo { + if (!this.codeIndexManager) { + return { available: false, hits: 0 } + } + + try { + const status = this.codeIndexManager.getCurrentStatus() + const isIndexed = + status.systemStatus === "Indexed" || status.systemStatus === "Indexing" + return { + available: isIndexed, + hits: isIndexed ? 1 : 0, + } + } catch (error) { + this.logger?.appendLine( + `[CodeIndexAdapter] Error getting code index info: ${error instanceof Error ? error.message : String(error)}`, + ) + return { available: false, hits: 0 } + } + } + + isAvailable(): boolean { + return this.getInfo().available + } + + async search(query: string, limit: number = 10): Promise { + if (!this.codeIndexManager) { + return [] + } + + try { + const results = await this.codeIndexManager.searchIndex(query) + return results.slice(0, limit).map((r) => ({ + filePath: r.payload?.filePath ?? String(r.id), + score: r.score, + snippet: r.payload?.codeChunk, + line: r.payload?.startLine, + })) + } catch (error) { + this.logger?.appendLine( + `[CodeIndexAdapter] Search error: ${error instanceof Error ? error.message : String(error)}`, + ) + return [] + } + } + + /** + * Searches the vector store directly, returning raw VectorStoreSearchResult[]. + * Supports optional directory prefix filtering. + * Gracefully degrades when manager is not initialized or search fails. + * Non-blocking — returns empty array on any error. + */ + async searchVectorStore( + query: string, + directoryPrefix?: string, + ): Promise { + if (!this.codeIndexManager) { + return [] + } + + try { + return await this.codeIndexManager.searchIndex(query, directoryPrefix) + } catch (error) { + this.logger?.appendLine( + `[CodeIndexAdapter] Vector store search error: ${error instanceof Error ? error.message : String(error)}`, + ) + return [] + } + } + + async startIndexing(): Promise { + if (!this.codeIndexManager) { + return + } + + try { + await this.codeIndexManager.startIndexing() + } catch (error) { + this.logger?.appendLine( + `[CodeIndexAdapter] Start indexing error: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } +} diff --git a/src/services/self-improving/CuratorService.ts b/src/services/self-improving/CuratorService.ts new file mode 100644 index 0000000000..c61327f66c --- /dev/null +++ b/src/services/self-improving/CuratorService.ts @@ -0,0 +1,1468 @@ +import * as fs from "fs/promises" +import * as path from "path" +import crypto from "crypto" + +import { safeWriteJson } from "../../utils/safeWriteJson" +import type { Logger } from "./types" +import type { SkillTelemetryRecord, SkillUsageStore } from "./SkillUsageStore" +import { createTarGzip, extractTarGzip } from "./tarUtils" + +/** + * Curator configuration + */ +export interface CuratorConfig { + /** Minimum interval between curator runs (ms) */ + intervalMs: number + /** Minimum idle time since last user activity before curator runs (ms) */ + minIdleMs: number + /** Whether to defer the first curator run */ + firstRunDeferred: boolean + /** Days of inactivity before a skill is marked stale */ + staleAfterDays: number + /** Days of inactivity before a stale skill is archived */ + archiveAfterDays: number + /** Whether to create pre-run backups */ + backupsEnabled: boolean + /** Maximum number of backup snapshots to retain */ + maxBackups: number + /** Absolute path to the skills directory for tar.gz snapshots */ + skillsDir?: string + /** Whether LLM review is enabled (requires LLMReviewProvider impl) */ + llmReviewEnabled: boolean +} + +/** + * Default curator configuration + */ +export const DEFAULT_CURATOR_CONFIG: CuratorConfig = { + intervalMs: 3_600_000, + minIdleMs: 300_000, + firstRunDeferred: true, + staleAfterDays: 14, + archiveAfterDays: 60, + backupsEnabled: true, + maxBackups: 5, + llmReviewEnabled: true, +} + +/** + * Action parsed from LLM review output. + */ +export type CuratorAction = + | { action: "merge"; target: string; absorb: string[] } + | { action: "archive"; name: string } + | { action: "pin"; name: string } + | { action: "unpin"; name: string } + | { action: "restore"; name: string } + | { + action: "demote" + name: string + demoteTarget: "reference" | "template" | "script" + umbrellaSkillId: string + umbrellaSkillName: string + } + +/** + * Reference to a cron job file that references a skill. + */ +type CronJobReference = { + jobId: string + jobName: string + skillId: string + skillName: string + filePath: string +} + +/** + * Curator run report + */ +export interface CuratorReport { + runId: string + timestamp: number + durationMs: number + transitions: Array<{ + skillId: string + skillName: string + fromState: string + toState: string + reason: string + }> + stats: { + totalSkills: number + activeSkills: number + staleSkills: number + archivedSkills: number + pinnedSkills: number + transitionsApplied: number + } + backupPath?: string + error?: string + /** LLM-generated actions that were applied */ + llmActions?: CuratorAction[] + /** Pre-consolidation skill count (before LLM actions) */ + preConsolidationCount?: number + /** Skills that were absorbed into an umbrella */ + absorbedSkills?: Array<{ + skillName: string + absorbedInto: string + }> + /** Three-tier classifications for archived skills */ + classifications?: Array<{ + skillId: string + skillName: string + tier: ClassificationTier + absorbedInto: string | null + summary: string + confidence: "high" | "medium" | "low" + }> + /** Cron job files that had skill references rewritten after consolidation */ + cronReferencesUpdated?: Array<{ + jobName: string + oldSkillId: string + newSkillId: string + }> +} + +/** + * Three-tier classification for archived skills. + * Mirrors Hermes-agent's classification system: + * 1. declared — model-declared absorbed_into at delete time + * 2. yaml_block — model's structured YAML summary block in SKILL.md + * 3. heuristic — tool-call heuristic audit as fallback + */ +type ClassificationTier = "declared" | "yaml_block" | "heuristic" | "unclassified" + +type ClassificationResult = { + tier: ClassificationTier + absorbedInto: string | null + summary: string + confidence: "high" | "medium" | "low" +} + +type CuratorStatus = { + lastRunAt: number + firstRunDone: boolean + config: CuratorConfig +} + +/** + * LLMReviewProvider interface — pluggable LLM reviewer for curator. + * Default implementation logs the prompt but does not call an LLM. + */ +export interface LLMReviewProvider { + /** + * Submit a curator review prompt and return structured YAML actions. + * @param prompt The full CURATOR_REVIEW_PROMPT + candidate table + * @returns Parsed CuratorAction[] or empty array if no actions + */ + review(prompt: string): Promise +} + +/** + * Default no-op LLM review provider. + * Logs the prompt via the curator's logger but returns no actions. + */ +class NoopLLMReviewProvider implements LLMReviewProvider { + private readonly logger: Logger + + constructor(logger: Logger) { + this.logger = logger + } + + async review(prompt: string): Promise { + this.logger.appendLine( + `[CuratorService] NoopLLMReviewProvider: LLM review not configured. Prompt length: ${prompt.length} chars`, + ) + return [] + } +} + +/** + * CURATOR_REVIEW_PROMPT — markdown prompt sent to the LLM for umbrella consolidation. + */ +const CURATOR_REVIEW_PROMPT = `You are a skill curator for an agent skill library. Review the following candidate skills and recommend consolidation actions. + +Return ONLY valid YAML with a top-level "actions" key. Each action must be one of: + +1. **merge** — absorb several overlapping/duplicate skills into an umbrella skill + {action: merge, target: "umbrella-name", absorb: ["skill-a", "skill-b"]} + +2. **archive** — mark a skill for archival (low usage, no recent activity) + {action: archive, name: "skill-x"} + +3. **pin** — protect a skill from auto-mutation + {action: pin, name: "skill-y"} + +4. **unpin** — allow auto-mutation on a previously pinned skill + {action: unpin, name: "skill-z"} + +5. **restore** — bring an archived skill back to active + {action: restore, name: "skill-w"} + +6. **demote** — move a narrow/specific skill under an umbrella as a reference, template, or script + {action: demote, name: "skill-v", demoteTarget: "reference", umbrellaSkillId: "umbrella-id", umbrellaSkillName: "umbrella-name"} + demoteTarget must be one of: "reference", "template", "script" + +Rules: +- Only recommend merges for skills with clear overlap in purpose or domain. +- The "target" of a merge is the umbrella skill name (existing or new). +- Skills listed in "absorb" will be marked as absorbed_into the target. +- Prefer pinning high-value skills that should not be mutated. +- Archive skills that are stale, unused, or superseded. +- Use **demote** for narrow skills that are a subset of an umbrella's domain. + The skill's content is moved into a subdirectory (references/, templates/, or scripts/) + under the umbrella skill directory, and the original skill is marked as absorbed. + +Now review the following candidate table: +` + +/** + * CuratorService — telemetry-driven skill lifecycle management. + */ +export class CuratorService { + private readonly baseDir: string + private readonly statePath: string + private readonly backupsDir: string + private readonly reportsDir: string + private readonly skillUsageStore: SkillUsageStore + private readonly logger: Logger + private config: CuratorConfig + private lastRunAt = 0 + private firstRunDone = false + private initialized = false + private llmProvider: LLMReviewProvider + + constructor(baseDir: string, skillUsageStore: SkillUsageStore, logger: Logger, config?: Partial) { + this.baseDir = path.join(baseDir, "self-improving", "curator") + this.statePath = path.join(this.baseDir, "state.json") + this.backupsDir = path.join(this.baseDir, "backups") + this.reportsDir = path.join(this.baseDir, "reports") + this.skillUsageStore = skillUsageStore + this.logger = logger + this.config = { ...DEFAULT_CURATOR_CONFIG, ...config } + this.llmProvider = new NoopLLMReviewProvider(logger) + } + + /** + * Set a custom LLM review provider (e.g. one that calls an actual LLM). + */ + setLLMReviewProvider(provider: LLMReviewProvider): void { + this.llmProvider = provider + } + + async initialize(): Promise { + if (this.initialized) { + return + } + + try { + await fs.mkdir(this.backupsDir, { recursive: true }) + await fs.mkdir(this.reportsDir, { recursive: true }) + await this.loadState() + this.logger.appendLine("[CuratorService] Initialized") + } catch (error) { + this.logger.appendLine( + `[CuratorService] Initialization error: ${error instanceof Error ? error.message : String(error)}`, + ) + } finally { + this.initialized = true + } + } + + shouldRun(now: number, lastUserActivityAt?: number): boolean { + if (this.config.firstRunDeferred && !this.firstRunDone && this.lastRunAt === 0) { + return false + } + + if (now - this.lastRunAt < this.config.intervalMs) { + return false + } + + if (typeof lastUserActivityAt === "number" && now - lastUserActivityAt < this.config.minIdleMs) { + return false + } + + return true + } + + async run(now: number, lastUserActivityAt?: number): Promise { + await this.initialize() + + const startedAt = Date.now() + const runId = crypto.randomUUID() + const report: CuratorReport = { + runId, + timestamp: now, + durationMs: 0, + transitions: [], + stats: { + totalSkills: 0, + activeSkills: 0, + staleSkills: 0, + archivedSkills: 0, + pinnedSkills: 0, + transitionsApplied: 0, + }, + } + + try { + if (this.shouldDeferFirstRun()) { + this.firstRunDone = true + await this.saveState() + report.error = "Skipped: first-run deferral" + report.durationMs = Date.now() - startedAt + await this.writeReport(report) + return report + } + + if (!this.shouldRun(now, lastUserActivityAt)) { + report.error = "Skipped: gates not satisfied" + report.durationMs = Date.now() - startedAt + await this.writeReport(report) + return report + } + + // Set lastRunAt immediately to prevent concurrent runs + this.lastRunAt = now + + if (this.config.backupsEnabled) { + report.backupPath = await this.createBackup(runId) + } + + this.assignStats(report) + report.preConsolidationCount = report.stats.totalSkills + const { transitions, classifications } = await this.applyDeterministicTransitions() + report.transitions = transitions + if (classifications.length > 0) { + report.classifications = classifications + } + await this.runCuratorReview(report) + report.stats.transitionsApplied = report.transitions.length + (report.llmActions?.length ?? 0) + this.assignStats(report) + + this.firstRunDone = true + await this.saveState() + + report.durationMs = Date.now() - startedAt + await this.writeReport(report) + this.logger.appendLine( + `[CuratorService] Run ${runId}: ${report.transitions.length} transitions, ${report.llmActions?.length ?? 0} llm-actions in ${report.durationMs}ms`, + ) + } catch (error) { + report.error = error instanceof Error ? error.message : String(error) + report.durationMs = Date.now() - startedAt + this.logger.appendLine(`[CuratorService] Run error: ${report.error}`) + await this.writeReport(report) + } + + return report + } + + async getLatestReport(): Promise { + try { + const entries = await fs.readdir(this.reportsDir, { withFileTypes: true }) + const candidates = await Promise.all( + entries + .filter((entry) => entry.isDirectory()) + .map(async (entry) => { + const runPath = path.join(this.reportsDir, entry.name, "run.json") + const stats = await fs.stat(runPath) + return { runPath, mtimeMs: stats.mtimeMs } + }), + ) + + if (candidates.length === 0) { + return null + } + + candidates.sort((left, right) => right.mtimeMs - left.mtimeMs) + const raw = await fs.readFile(candidates[0].runPath, "utf-8") + return JSON.parse(raw) as CuratorReport + } catch { + return null + } + } + + getConfig(): Readonly { + return this.config + } + + setConfig(config: Partial): void { + this.config = { ...this.config, ...config } + } + + getStatus(): CuratorStatus { + return { + lastRunAt: this.lastRunAt, + firstRunDone: this.firstRunDone, + config: { ...this.config }, + } + } + + /** + * Restore a backup from a tar.gz file. + * Moves the current skill directory aside (as a new backup for undoability) + * and extracts the chosen backup into place. + * + * @param backupPath Absolute path to the .tar.gz backup file + * @returns true if restore succeeded, false otherwise + */ + async restoreBackup(backupPath: string): Promise { + if (!this.config.skillsDir) { + this.logger.appendLine("[CuratorService] restoreBackup: no skillsDir configured") + return false + } + + try { + await fs.access(backupPath) + } catch { + this.logger.appendLine(`[CuratorService] restoreBackup: backup not found: ${backupPath}`) + return false + } + + try { + const skillsDir = this.config.skillsDir + const timestamp = Date.now() + const undoBackupName = `pre-restore-${timestamp}.tar.gz` + const undoBackupPath = path.join(this.backupsDir, undoBackupName) + + // Move current skill dir into a new tar.gz backup (undo safety net) + this.logger.appendLine(`[CuratorService] Saving pre-restore snapshot to ${undoBackupPath}`) + const currentFiles: Array<{ path: string; content: Buffer }> = [] + await this.collectFilesRecursive(skillsDir, skillsDir, currentFiles) + await createTarGzip(currentFiles, undoBackupPath) + + // Remove current skill dir contents + await this.clearDirectory(skillsDir) + + // Extract the chosen backup + this.logger.appendLine(`[CuratorService] Restoring from ${backupPath}`) + await extractTarGzip(backupPath, skillsDir) + + this.logger.appendLine("[CuratorService] Restore complete") + return true + } catch (error) { + this.logger.appendLine( + `[CuratorService] Restore error: ${error instanceof Error ? error.message : String(error)}`, + ) + return false + } + } + + // ──── Private helpers ──── + + private async loadState(): Promise { + try { + const raw = await fs.readFile(this.statePath, "utf-8") + const parsed = JSON.parse(raw) as Partial + this.lastRunAt = typeof parsed.lastRunAt === "number" ? parsed.lastRunAt : 0 + this.firstRunDone = parsed.firstRunDone === true + } catch { + this.lastRunAt = 0 + this.firstRunDone = false + } + } + + private async saveState(): Promise { + try { + await safeWriteJson( + this.statePath, + { + lastRunAt: this.lastRunAt, + firstRunDone: this.firstRunDone, + }, + { prettyPrint: true }, + ) + } catch (error) { + this.logger.appendLine( + `[CuratorService] Save state error: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + + private shouldDeferFirstRun(): boolean { + return this.config.firstRunDeferred && !this.firstRunDone && this.lastRunAt === 0 + } + + /** + * Create a tar.gz backup of skills directory. + * Falls back to JSON snapshot if skillsDir is not configured. + */ + private async createBackup(runId: string): Promise { + if (this.config.skillsDir) { + return this.createTarBackup(runId) + } + return this.createJsonSnapshotBackup(runId) + } + + private async createTarBackup(runId: string): Promise { + const backupName = `backup-${Date.now()}-${runId}.tar.gz` + const backupPath = path.join(this.backupsDir, backupName) + + const skillsDir = this.config.skillsDir! + const files: Array<{ path: string; content: Buffer }> = [] + + try { + await this.collectFilesRecursive(skillsDir, skillsDir, files) + } catch (error) { + this.logger.appendLine( + `[CuratorService] Warning: could not read skills dir ${skillsDir}: ${error instanceof Error ? error.message : String(error)}`, + ) + } + + // Build manifest + const manifest = { + createdAt: Date.now(), + runId, + type: "curator-backup", + skillCount: files.length, + files: files.map((f) => f.path), + curatorState: { + lastRunAt: this.lastRunAt, + firstRunDone: this.firstRunDone, + }, + skillUsage: this.skillUsageStore.getAll(), + } + + const manifestJson = Buffer.from(JSON.stringify(manifest, null, "\t"), "utf-8") + + // Add manifest as first entry (sort ensures it's readable/identifiable) + files.unshift({ + path: "manifest.json", + content: manifestJson, + }) + + await createTarGzip(files, backupPath) + + await this.cleanupOldBackups() + return backupPath + } + + private async createJsonSnapshotBackup(runId: string): Promise { + const backupDir = path.join(this.backupsDir, `backup-${Date.now()}-${runId}`) + await fs.mkdir(backupDir, { recursive: true }) + await safeWriteJson( + path.join(backupDir, "snapshot.json"), + { + createdAt: Date.now(), + curatorState: { + lastRunAt: this.lastRunAt, + firstRunDone: this.firstRunDone, + }, + skillUsage: this.skillUsageStore.getAll(), + }, + { prettyPrint: true }, + ) + await this.cleanupOldBackups() + return backupDir + } + + private async collectFilesRecursive( + baseDir: string, + currentDir: string, + acc: Array<{ path: string; content: Buffer }>, + ): Promise { + const entries = await fs.readdir(currentDir, { withFileTypes: true }) + for (const entry of entries) { + const fullPath = path.join(currentDir, entry.name) + if (entry.isDirectory()) { + await this.collectFilesRecursive(baseDir, fullPath, acc) + } else if (entry.isFile()) { + const relativePath = path.relative(baseDir, fullPath) + const content = await fs.readFile(fullPath) + acc.push({ path: relativePath, content }) + } + } + } + + private async clearDirectory(dir: string): Promise { + try { + const entries = await fs.readdir(dir, { withFileTypes: true }) + for (const entry of entries) { + const fullPath = path.join(dir, entry.name) + if (entry.isDirectory()) { + await fs.rm(fullPath, { recursive: true, force: true }) + } else { + await fs.unlink(fullPath) + } + } + } catch { + // Best effort + } + } + + /** + * Cleanup old backups. + * Supports both directory-based (JSON snapshot) and file-based (tar.gz) backups. + */ + private async cleanupOldBackups(): Promise { + try { + const entries = await fs.readdir(this.backupsDir, { withFileTypes: true }) + const backups: Array<{ path: string; mtimeMs: number }> = [] + + for (const entry of entries) { + if (entry.name.startsWith("backup-") && entry.isDirectory()) { + const backupPath = path.join(this.backupsDir, entry.name) + const stats = await fs.stat(backupPath) + backups.push({ path: backupPath, mtimeMs: stats.mtimeMs }) + } else if (entry.name.endsWith(".tar.gz") && entry.isFile()) { + const backupPath = path.join(this.backupsDir, entry.name) + const stats = await fs.stat(backupPath) + backups.push({ path: backupPath, mtimeMs: stats.mtimeMs }) + } + } + + backups.sort((left, right) => right.mtimeMs - left.mtimeMs) + for (const staleBackup of backups.slice(this.config.maxBackups)) { + const stat = await fs.stat(staleBackup.path) + if (stat.isDirectory()) { + await fs.rm(staleBackup.path, { recursive: true, force: true }) + } else { + await fs.unlink(staleBackup.path) + } + } + } catch { + // Best-effort retention cleanup. + } + } + + private assignStats(report: CuratorReport): void { + const stats = this.skillUsageStore.getStats() + report.stats.totalSkills = stats.total + report.stats.activeSkills = stats.active + report.stats.staleSkills = stats.stale + report.stats.archivedSkills = stats.archived + report.stats.pinnedSkills = stats.pinned + } + + private async applyDeterministicTransitions(): Promise<{ + transitions: CuratorReport["transitions"] + classifications: NonNullable + }> { + const transitions: CuratorReport["transitions"] = [] + const classifications: NonNullable = [] + + for (const candidate of this.skillUsageStore.getStaleCandidates(this.config.staleAfterDays)) { + if (this.isProtected(candidate)) { + continue + } + + await this.skillUsageStore.transitionState(candidate.skillId, "stale") + transitions.push({ + skillId: candidate.skillId, + skillName: candidate.skillName, + fromState: "active", + toState: "stale", + reason: `No activity for ${this.config.staleAfterDays} days`, + }) + } + + for (const candidate of this.skillUsageStore.getArchiveCandidates(this.config.archiveAfterDays)) { + if (this.isProtected(candidate)) { + continue + } + + await this.skillUsageStore.transitionState(candidate.skillId, "archived") + + // Classify the archived skill using three-tier classification + const skillDir = this.config.skillsDir + ? path.join(this.config.skillsDir, candidate.skillId) + : path.join(this.baseDir, "skills", candidate.skillId) + const classification = await this.classifyArchivedSkill(candidate.skillId, candidate.skillName, skillDir) + classifications.push({ + skillId: candidate.skillId, + skillName: candidate.skillName, + ...classification, + }) + + transitions.push({ + skillId: candidate.skillId, + skillName: candidate.skillName, + fromState: "stale", + toState: "archived", + reason: `No activity for ${this.config.archiveAfterDays} days`, + }) + } + + return { transitions, classifications } + } + + private isProtected(record: SkillTelemetryRecord): boolean { + return record.pinned || record.createdBy !== "agent" + } + + /** + * Three-tier classification for an archived skill. + * + * Tier 1 (declared): Check for absorbed_into declaration in the skill usage store + * (set by skill_manage delete or curator merge/demote actions). + * + * Tier 2 (yaml_block): Parse YAML front matter from the skill's SKILL.md for + * an absorbed_into field. + * + * Tier 3 (heuristic): Fallback heuristic audit — check if the skill name + * suggests it may be an umbrella candidate. + */ + private async classifyArchivedSkill( + skillId: string, + skillName: string, + skillDir: string, + ): Promise { + // Tier 1: Check for absorbed_into declaration (from skill_manage delete / curator merge) + const record = this.skillUsageStore.get(skillId) + if (record?.absorbedInto) { + return { + tier: "declared", + absorbedInto: record.absorbedInto, + summary: `Explicitly absorbed into "${record.absorbedInto}"`, + confidence: "high", + } + } + + // Tier 2: Check for structured YAML summary block in SKILL.md + const skillMdPath = path.join(skillDir, "SKILL.md") + try { + const content = await fs.readFile(skillMdPath, "utf-8") + const yamlMatch = content.match(/^---\n([\s\S]*?)\n---/) + if (yamlMatch) { + const yamlBlock = yamlMatch[1] + const absorbedMatch = yamlBlock.match(/absorbed_into:\s*["']?(.+?)["']?\s*$/) + if (absorbedMatch) { + return { + tier: "yaml_block", + absorbedInto: absorbedMatch[1].trim(), + summary: `YAML front matter declares absorption into "${absorbedMatch[1].trim()}"`, + confidence: "medium", + } + } + } + } catch { + // SKILL.md doesn't exist or can't be read — fall through to heuristic + } + + // Tier 3: Heuristic audit — check for umbrella skill references in name + const umbrellaMatch = skillName.match(/^(umb|parent|umbrella)[-_]/i) + if (umbrellaMatch) { + return { + tier: "heuristic", + absorbedInto: null, + summary: `Skill name suggests it may be an umbrella candidate: "${skillName}"`, + confidence: "low", + } + } + + return { + tier: "unclassified", + absorbedInto: null, + summary: "No classification data available", + confidence: "low", + } + } + + /** + * Run LLM-based curator review. + * Builds the candidate table, submits it to the LLM provider, + * executes returned YAML actions, and records results in the report. + */ + private async runCuratorReview(report: CuratorReport): Promise { + try { + const candidates = this.skillUsageStore.getAgentCreatedForReview() + if (candidates.length === 0) { + return + } + + const pinned = this.skillUsageStore.getAgentCreatedPinned() + const candidateTable = this.renderCandidateList(candidates, pinned) + this.logger.appendLine( + `[CuratorService] LLM review: ${candidates.length} agent-created candidates, ${pinned.length} pinned`, + ) + + // Build the full prompt + const prompt = + CURATOR_REVIEW_PROMPT + "\n" + candidateTable + "\n\nReturn ONLY valid YAML with an 'actions' key." + + // Submit to the LLM provider (default NoopLLMReviewProvider logs but returns []) + const actions = await this.llmProvider.review(prompt) + if (actions.length === 0) { + this.logger.appendLine("[CuratorService] No LLM actions returned") + return + } + + report.llmActions = actions + const absorbedSkills: CuratorReport["absorbedSkills"] = [] + const llmClassifications: NonNullable = [] + + for (const action of actions) { + switch (action.action) { + case "merge": { + // Absorb skills into umbrella target + for (const skillName of action.absorb) { + const record = this.findRecordBySkillName(skillName) + if (!record || record.pinned) { + this.logger.appendLine( + `[CuratorService] Merge: cannot absorb "${skillName}" — not found or pinned`, + ) + continue + } + + const prevState = record.state + // Archive the skill in the store (persists state + archivedAt) + await this.skillUsageStore.archive(record.skillId) + // Set absorbedInto on the record (persists via setAbsorbedInto) + await this.skillUsageStore.setAbsorbedInto(record.skillId, action.target) + + absorbedSkills.push({ + skillName: record.skillName, + absorbedInto: action.target, + }) + + // Classify the merged skill + const skillDir = this.config.skillsDir + ? path.join(this.config.skillsDir, record.skillId) + : path.join(this.baseDir, "skills", record.skillId) + const classification = await this.classifyArchivedSkill( + record.skillId, + record.skillName, + skillDir, + ) + llmClassifications.push({ + skillId: record.skillId, + skillName: record.skillName, + ...classification, + }) + + report.transitions.push({ + skillId: record.skillId, + skillName: record.skillName, + fromState: prevState, + toState: "archived", + reason: `Absorbed into umbrella skill "${action.target}"`, + }) + } + + // Rewrite cron references for each absorbed skill + const cronRefs: NonNullable = [] + for (const skillName of action.absorb) { + const absorbedRecord = this.findRecordBySkillName(skillName) + if (absorbedRecord) { + const refs = await this.rewriteSkillRefs( + absorbedRecord.skillId, + action.target, + action.target, + ) + cronRefs.push(...refs) + } + } + if (cronRefs.length > 0) { + report.cronReferencesUpdated = [...(report.cronReferencesUpdated ?? []), ...cronRefs] + } + break + } + + case "archive": { + const record = this.findRecordBySkillName(action.name) + if (!record || record.pinned) { + this.logger.appendLine( + `[CuratorService] Archive: cannot archive "${action.name}" — not found or pinned`, + ) + continue + } + await this.skillUsageStore.transitionState(record.skillId, "archived") + + // Classify the LLM-archived skill + const skillDir = this.config.skillsDir + ? path.join(this.config.skillsDir, record.skillId) + : path.join(this.baseDir, "skills", record.skillId) + const classification = await this.classifyArchivedSkill( + record.skillId, + record.skillName, + skillDir, + ) + llmClassifications.push({ + skillId: record.skillId, + skillName: record.skillName, + ...classification, + }) + + report.transitions.push({ + skillId: record.skillId, + skillName: record.skillName, + fromState: record.state, + toState: "archived", + reason: "LLM review: low-value / superseded skill", + }) + break + } + + case "pin": { + const record = this.findRecordBySkillName(action.name) + if (!record) { + this.logger.appendLine(`[CuratorService] Pin: skill "${action.name}" not found`) + continue + } + await this.skillUsageStore.pin(record.skillId) + break + } + + case "unpin": { + const record = this.findRecordBySkillName(action.name) + if (!record) { + this.logger.appendLine(`[CuratorService] Unpin: skill "${action.name}" not found`) + continue + } + await this.skillUsageStore.unpin(record.skillId) + break + } + + case "restore": { + const record = this.findRecordBySkillName(action.name) + if (!record) { + this.logger.appendLine(`[CuratorService] Restore: skill "${action.name}" not found`) + continue + } + await this.skillUsageStore.restore(record.skillId) + report.transitions.push({ + skillId: record.skillId, + skillName: record.skillName, + fromState: "archived", + toState: "active", + reason: "LLM review: restored from archival", + }) + break + } + + case "demote": { + const record = this.findRecordBySkillName(action.name) + if (!record || record.pinned) { + this.logger.appendLine( + `[CuratorService] Demote: cannot demote "${action.name}" — not found or pinned`, + ) + continue + } + const prevState = record.state + await this.executeDemotion(action) + // Archive the skill in the store and mark as absorbed + await this.skillUsageStore.archive(record.skillId) + await this.skillUsageStore.setAbsorbedInto(record.skillId, action.umbrellaSkillName) + + absorbedSkills.push({ + skillName: record.skillName, + absorbedInto: action.umbrellaSkillName, + }) + + // Classify the demoted skill + const skillDir = this.config.skillsDir + ? path.join(this.config.skillsDir, record.skillId) + : path.join(this.baseDir, "skills", record.skillId) + const classification = await this.classifyArchivedSkill( + record.skillId, + record.skillName, + skillDir, + ) + llmClassifications.push({ + skillId: record.skillId, + skillName: record.skillName, + ...classification, + }) + + report.transitions.push({ + skillId: record.skillId, + skillName: record.skillName, + fromState: prevState, + toState: "archived", + reason: `Demoted to ${action.demoteTarget} under umbrella "${action.umbrellaSkillName}"`, + }) + + // Rewrite cron references for the demoted skill + const demoteRefs = await this.rewriteSkillRefs( + record.skillId, + action.umbrellaSkillId, + action.umbrellaSkillName, + ) + if (demoteRefs.length > 0) { + report.cronReferencesUpdated = [...(report.cronReferencesUpdated ?? []), ...demoteRefs] + } + break + } + } + } + + if (absorbedSkills.length > 0) { + report.absorbedSkills = absorbedSkills + } + + // Merge LLM review classifications into report + if (llmClassifications.length > 0) { + if (report.classifications) { + report.classifications.push(...llmClassifications) + } else { + report.classifications = llmClassifications + } + } + + this.logger.appendLine( + `[CuratorService] LLM review applied: ${actions.length} actions (${absorbedSkills.length} absorbed, ${llmClassifications.length} classified)`, + ) + } catch (error) { + this.logger.appendLine( + `[CuratorService] Review error: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + + /** + * Find a telemetry record by skill name (case-sensitive). + */ + private findRecordBySkillName(skillName: string): SkillTelemetryRecord | undefined { + const all = this.skillUsageStore.getAll() + return all.find((r) => r.skillName === skillName) + } + + /** + * Find cron job files that reference a given skill ID. + * Scans for a "cron" directory adjacent to the skills directory + * and checks JSON/YAML/YML files for the skill ID or name. + */ + private async findCronReferences(skillId: string): Promise { + const references: CronJobReference[] = [] + + // Check for cron directory in the workspace (adjacent to skillsDir) + const cronDir = path.join(this.config.skillsDir ?? this.baseDir, "..", "cron") + try { + await fs.access(cronDir) + } catch { + return references // No cron directory exists + } + + // Scan cron job files for skill references + const entries = await fs.readdir(cronDir, { withFileTypes: true }) + for (const entry of entries) { + if ( + !entry.isFile() || + (!entry.name.endsWith(".json") && !entry.name.endsWith(".yaml") && !entry.name.endsWith(".yml")) + ) { + continue + } + + const filePath = path.join(cronDir, entry.name) + try { + const content = await fs.readFile(filePath, "utf-8") + if (content.includes(skillId) || content.includes(skillId.replace(/[_-]/g, " "))) { + references.push({ + jobId: entry.name.replace(/\.(json|yaml|yml)$/, ""), + jobName: entry.name, + skillId, + skillName: skillId, + filePath, + }) + } + } catch { + continue + } + } + + return references + } + + /** + * Rewrite skill references in cron job files after consolidation. + * Surgically updates all cron files that reference the old skill ID/name + * to point to the new skill ID/name. + */ + private async rewriteSkillRefs( + oldSkillId: string, + newSkillId: string, + newSkillName: string, + ): Promise> { + const references = await this.findCronReferences(oldSkillId) + if (references.length === 0) return [] + + this.logger.appendLine( + `[CuratorService] Rewriting ${references.length} cron reference(s) for skill "${oldSkillId}" → "${newSkillId}"`, + ) + + const updatedRefs: Array<{ jobName: string; oldSkillId: string; newSkillId: string }> = [] + + for (const ref of references) { + try { + let content = await fs.readFile(ref.filePath, "utf-8") + + // Replace skill references — escape regex special chars in IDs + const escapedOldId = oldSkillId.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + content = content.replace(new RegExp(escapedOldId, "g"), newSkillId) + + // Also replace name variants with flexible separators + const namePattern = oldSkillId.replace(/[_-]/g, "[ _-]?") + content = content.replace(new RegExp(namePattern, "g"), newSkillName) + + await fs.writeFile(ref.filePath, content, "utf-8") + this.logger.appendLine(` ✓ Updated cron reference in "${ref.jobName}"`) + + updatedRefs.push({ + jobName: ref.jobName, + oldSkillId, + newSkillId, + }) + } catch (err) { + this.logger.appendLine( + ` ✗ Failed to update cron reference in "${ref.jobName}": ${err instanceof Error ? err.message : String(err)}`, + ) + } + } + + return updatedRefs + } + + /** + * Execute a demotion action: move skill content into a subdirectory + * (references/, templates/, or scripts/) under the umbrella skill directory, + * remove the original skill directory, and mark the skill as absorbed. + */ + private async executeDemotion(action: CuratorAction): Promise { + if (action.action !== "demote") return + + const { name, demoteTarget, umbrellaSkillId, umbrellaSkillName } = action + if (!demoteTarget || !umbrellaSkillId) return + + const skillsDir = this.config.skillsDir + if (!skillsDir) { + this.logger.appendLine("[CuratorService] executeDemotion: no skillsDir configured") + return + } + + // Find the skill record to get its skillId for the source directory + const record = this.findRecordBySkillName(name) + if (!record) { + this.logger.appendLine(`[CuratorService] executeDemotion: skill "${name}" not found`) + return + } + + const sourcePath = path.join(skillsDir, record.skillId) + const umbrellaPath = path.join(skillsDir, umbrellaSkillId) + const targetDir = path.join(umbrellaPath, `${demoteTarget}s`) + + try { + // Create target subdirectory under umbrella + await fs.mkdir(targetDir, { recursive: true }) + + // Move skill content to target subdirectory, prefixing files with skill name + const files = await fs.readdir(sourcePath) + for (const file of files) { + const filePath = path.join(sourcePath, file) + const stat = await fs.stat(filePath) + if (stat.isFile()) { + await fs.rename(filePath, path.join(targetDir, `${name}-${file}`)) + } + } + + // Remove original skill directory + await fs.rm(sourcePath, { recursive: true, force: true }) + + this.logger.appendLine( + `Demoted skill "${name}" to ${demoteTarget} under "${umbrellaSkillName || umbrellaSkillId}"`, + ) + } catch (error) { + this.logger.appendLine( + `[CuratorService] Demotion error for "${name}": ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + + /** + * Render a candidate list for LLM review, showing agent-created skills + * and separately listing pinned skills that are excluded from mutations. + * Format mirrors Hermes' _render_candidate_list(). + * Also shows absorbed_into status if set. + */ + private renderCandidateList(candidates: SkillTelemetryRecord[], pinned: SkillTelemetryRecord[]): string { + const lines: string[] = [] + const now = Date.now() + + lines.push("## Agent-Created Skills (candidates for review)") + lines.push("") + lines.push("| name | state | pinned | absorbed_into | frequency | use | view | patch | last_activity |") + lines.push("|------|-------|--------|---------------|-----------|-----|------|-------|---------------|") + + for (const skill of candidates) { + const lastActivity = + skill.lastActivityAt > 0 + ? `${Math.round((now - skill.lastActivityAt) / (24 * 60 * 60 * 1000))}d ago` + : "never" + const absorbedInto = (skill as any).absorbedInto ?? "" + lines.push( + `| ${skill.skillName} | ${skill.state} | ${skill.pinned ? "yes" : "no"} | ${absorbedInto} | ` + + `${skill.useCount} | ${skill.useCount} | ${skill.viewCount} | ${skill.patchCount} | ${lastActivity} |`, + ) + } + + lines.push("") + lines.push(`## Pinned Agent-Created Skills (excluded from mutations)`) + lines.push("") + + if (pinned.length > 0) { + for (const skill of pinned) { + lines.push(`- ${skill.skillName} (${skill.state})`) + } + } else { + lines.push("(none)") + } + + return lines.join("\n") + } + + private async writeReport(report: CuratorReport): Promise { + try { + const runDir = path.join(this.reportsDir, report.runId) + await fs.mkdir(runDir, { recursive: true }) + await safeWriteJson(path.join(runDir, "run.json"), report, { prettyPrint: true }) + await fs.writeFile(path.join(runDir, "REPORT.md"), this.buildReportMarkdown(report), "utf-8") + } catch (error) { + this.logger.appendLine( + `[CuratorService] Report write error: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + + /** + * Build a structured markdown report including consolidation decisions, + * merge/absorption info, and pre/post skill counts. + */ + private buildReportMarkdown(report: CuratorReport): string { + const lines = [ + `# Curator Run Report: ${report.runId}`, + "", + `**Timestamp**: ${new Date(report.timestamp).toISOString()}`, + `**Duration**: ${report.durationMs}ms`, + "", + "## Summary", + "", + "| Metric | Value |", + "|--------|-------|", + `| Total Skills | ${report.stats.totalSkills} |`, + `| Active | ${report.stats.activeSkills} |`, + `| Stale | ${report.stats.staleSkills} |`, + `| Archived | ${report.stats.archivedSkills} |`, + `| Pinned | ${report.stats.pinnedSkills} |`, + `| Transitions Applied | ${report.stats.transitionsApplied} |`, + ] + + if (typeof report.preConsolidationCount === "number") { + lines.push(`| Pre-Consolidation Count | ${report.preConsolidationCount} |`) + const delta = report.preConsolidationCount - report.stats.totalSkills + const sign = delta >= 0 ? "-" : "+" + lines.push(`| Net Change | ${sign}${Math.abs(delta)} |`) + } + + lines.push("") + + if (report.transitions.length > 0) { + lines.push("## Transitions", "", "| Skill | From | To | Reason |", "|-------|------|----|--------|") + for (const transition of report.transitions) { + lines.push( + `| ${transition.skillName} | ${transition.fromState} | ${transition.toState} | ${transition.reason} |`, + ) + } + lines.push("") + } + + // ── Consolidation Decisions ── + if (report.llmActions && report.llmActions.length > 0) { + lines.push("## Consolidation Decisions (LLM)") + lines.push("") + + for (const action of report.llmActions) { + switch (action.action) { + case "merge": + lines.push( + `- **Merge**: \`${action.target}\` absorbs ${action.absorb.map((a) => `\`${a}\``).join(", ")}`, + ) + break + case "archive": + lines.push(`- **Archive**: \`${action.name}\``) + break + case "pin": + lines.push(`- **Pin**: \`${action.name}\``) + break + case "unpin": + lines.push(`- **Unpin**: \`${action.name}\``) + break + case "restore": + lines.push(`- **Restore**: \`${action.name}\``) + break + case "demote": + lines.push( + `- **Demote**: \`${action.name}\` → ${action.demoteTarget} under \`${action.umbrellaSkillName}\``, + ) + break + } + } + lines.push("") + } + + // ── Absorbed Skills ── + if (report.absorbedSkills && report.absorbedSkills.length > 0) { + lines.push("## Absorbed Skills") + lines.push("") + lines.push("| Skill | Absorbed Into |") + lines.push("|-------|---------------|") + for (const absorbed of report.absorbedSkills) { + lines.push(`| ${absorbed.skillName} | ${absorbed.absorbedInto} |`) + } + lines.push("") + + // Why they were merged (generic rationale since LLM doesn't provide per-skill reasoning here) + lines.push( + "These skills were identified by the LLM curator as overlapping, duplicate, or subsets " + + "of an umbrella skill. They have been archived and marked as absorbed into their respective " + + "umbrella target.", + ) + lines.push("") + } + + // ── Archived Skill Classifications ── + if (report.classifications && report.classifications.length > 0) { + lines.push("## Archived Skill Classifications") + lines.push("") + lines.push("| Skill | Tier | Absorbed Into | Confidence | Summary |") + lines.push("|-------|------|---------------|------------|---------|") + for (const classification of report.classifications) { + const absorbedDisplay = classification.absorbedInto ?? "—" + lines.push( + `| ${classification.skillName} | ${classification.tier} | ${absorbedDisplay} | ${classification.confidence} | ${classification.summary} |`, + ) + } + lines.push("") + } + + // ── Cron References Updated ── + if (report.cronReferencesUpdated && report.cronReferencesUpdated.length > 0) { + lines.push("## Cron References Updated") + lines.push("") + lines.push("| Job File | Old Skill | New Skill |") + lines.push("|----------|-----------|-----------|") + for (const ref of report.cronReferencesUpdated) { + lines.push(`| ${ref.jobName} | ${ref.oldSkillId} | ${ref.newSkillId} |`) + } + lines.push("") + } + + if (report.backupPath) { + lines.push(`**Backup**: ${report.backupPath}`, "") + } + + if (report.error) { + lines.push("## Error", "", report.error, "") + } + + return lines.join("\n") + } + + /** + * Create a specialized skill from observed task patterns. + * This method generates a full SKILL.md with proper frontmatter + * and registers it in the appropriate skill directory. + * + * @param name - Skill name (validated per agentskills.io spec) + * @param description - Skill description + * @param instructions - Full markdown instructions body (without frontmatter) + * @param options - Optional configuration + * @param options.source - "global" or "project" (default: "project") + * @param options.modeSlugs - Mode restrictions (undefined = any mode) + * @param options.tools - Tool references for the skill + * @param options.assets - Bundled assets (scripts, templates, etc.) + * @returns Path to created SKILL.md file + */ + async createSpecializedSkill( + name: string, + description: string, + instructions: string, + options?: { + source?: "global" | "project" + modeSlugs?: string[] + tools?: string[] + assets?: Array<{ relativePath: string; content: string }> + }, + ): Promise { + const { validateSkillName } = await import("@roo-code/types") + const validation = validateSkillName(name) + if (!validation.valid) { + throw new Error(`Invalid skill name "${name}": ${validation.error}`) + } + + const trimmedDescription = description.trim() + if (trimmedDescription.length < 1 || trimmedDescription.length > 1024) { + throw new Error(`Description must be 1-1024 characters (got ${trimmedDescription.length})`) + } + + if (!instructions.trim()) { + throw new Error("Instructions body cannot be empty") + } + + const source = options?.source ?? "project" + const modeSlugs = options?.modeSlugs + const tools = options?.tools + const assets = options?.assets + + // Build full SKILL.md content with frontmatter + const frontmatterLines = [`name: ${name}`, `description: ${trimmedDescription}`] + if (modeSlugs && modeSlugs.length > 0) { + frontmatterLines.push("modeSlugs:") + for (const slug of modeSlugs) { + frontmatterLines.push(` - ${slug}`) + } + } + if (tools && tools.length > 0) { + frontmatterLines.push("tools:") + for (const tool of tools) { + frontmatterLines.push(` - ${tool}`) + } + } + + const fullContent = `--- +${frontmatterLines.join("\n")} +--- + +${instructions.trim()} +` + + // Determine skill directory + // this.config.skillsDir is the absolute path to the skills root (e.g., .roo/skills/) + // this.baseDir is the curator's internal state dir — only used as fallback + const skillsDir = this.config.skillsDir + ? path.join(this.config.skillsDir, name) + : path.join(this.baseDir, "..", "..", "..", "skills", name) + + const skillMdPath = path.join(skillsDir, "SKILL.md") + + // Check for existing skill + try { + await fs.access(skillMdPath) + throw new Error(`Skill "${name}" already exists at ${skillMdPath}`) + } catch (error) { + if (error instanceof Error && error.message.includes("already exists")) { + throw error + } + // File doesn't exist — proceed + } + + // Create skill directory and write SKILL.md + await fs.mkdir(skillsDir, { recursive: true }) + await fs.writeFile(skillMdPath, fullContent, "utf-8") + + // Bundle assets if provided + if (assets && assets.length > 0) { + for (const asset of assets) { + const assetPath = path.join(skillsDir, asset.relativePath) + await fs.mkdir(path.dirname(assetPath), { recursive: true }) + await fs.writeFile(assetPath, asset.content, "utf-8") + } + } + + // Register in skill usage store + const skillId = `skill:${source}:${name}` + this.skillUsageStore.getOrCreate(skillId, name, "agent") + + this.logger.appendLine(`[CuratorService] Specialized skill created: ${name} at ${skillMdPath}`) + return skillMdPath + } +} diff --git a/src/services/self-improving/ErrorClassifier.ts b/src/services/self-improving/ErrorClassifier.ts new file mode 100644 index 0000000000..9e58e58245 --- /dev/null +++ b/src/services/self-improving/ErrorClassifier.ts @@ -0,0 +1,218 @@ +export enum ErrorCategory { + FILE_NOT_FOUND = "FILE_NOT_FOUND", + DIRECTORY_CONFUSION = "DIRECTORY_CONFUSION", + TOOL_NOT_FOUND = "TOOL_NOT_FOUND", + PERMISSION_DENIED = "PERMISSION_DENIED", + RATE_LIMITED = "RATE_LIMITED", + AUTH_FAILED = "AUTH_FAILED", + NETWORK_ERROR = "NETWORK_ERROR", + TIMEOUT = "TIMEOUT", + INVALID_PARAMS = "INVALID_PARAMS", + MODEL_THOUGHT_FAILURE = "MODEL_THOUGHT_FAILURE", + EMPTY_RESPONSE = "EMPTY_RESPONSE", + CASCADE_FAILURE = "CASCADE_FAILURE", + UNKNOWN = "UNKNOWN", +} + +export interface ClassifiedError { + category: ErrorCategory + severity: 1 | 2 | 3 | 4 | 5 + toolName?: string + paramName?: string + suggestion: string + isRecoverable: boolean + recoveryAction?: string +} + +export class ErrorClassifier { + classify(errorMessage: string, toolName?: string): ClassifiedError { + // Directory confusion — trying to read a directory as file (Situations C, E) + if (errorMessage.includes("is a directory") || errorMessage.includes("Cannot read")) { + return { + category: ErrorCategory.DIRECTORY_CONFUSION, + severity: 3, + toolName, + suggestion: "Use list_files tool instead of read_file for directories", + isRecoverable: true, + recoveryAction: "switch_to_list_files", + } + } + + // ripgrep / rg ENOENT (Situation B) + if ( + errorMessage.includes("ripgrep") || + errorMessage.includes("rg ENOENT") || + (errorMessage.includes("ENOENT") && errorMessage.includes("rg")) + ) { + return { + category: ErrorCategory.TOOL_NOT_FOUND, + severity: 4, + toolName, + suggestion: + "ripgrep is not available. Use 'find' or 'ls' commands via execute_command instead of list_files", + isRecoverable: true, + recoveryAction: "use_find_instead_of_list_files", + } + } + + // Model thought process failure (Situation A) + if ( + errorMessage.includes("Zoo is having trouble") || + errorMessage.includes("failure in the model's thought process") + ) { + return { + category: ErrorCategory.MODEL_THOUGHT_FAILURE, + severity: 4, + toolName, + suggestion: + "[Context Recovery] The previous attempt failed. Break down the task into smaller steps. Try using a simpler approach or different tool. Relevant code context will be injected automatically.", + isRecoverable: true, + recoveryAction: "break_down_task", + } + } + + // Empty response — tool returned nothing (Situation D) + if ( + errorMessage === "" || + errorMessage === "Running" || + !errorMessage || + errorMessage.trim().length === 0 + ) { + return { + category: ErrorCategory.EMPTY_RESPONSE, + severity: 3, + toolName, + suggestion: + "The tool returned no output. Verify the command parameters and try again with explicit flags.", + isRecoverable: true, + recoveryAction: "retry_with_explicit_params", + } + } + + // File not found + if ( + errorMessage.includes("no such file") || + errorMessage.includes("ENOENT") || + errorMessage.includes("not found") + ) { + return { + category: ErrorCategory.FILE_NOT_FOUND, + severity: 3, + toolName, + paramName: this.extractPath(errorMessage), + suggestion: + "Verify the file path exists before reading. Use list_files to check the directory contents first.", + isRecoverable: true, + recoveryAction: "verify_path_first", + } + } + + // Permission denied + if (errorMessage.includes("Permission denied") || errorMessage.includes("EACCES")) { + return { + category: ErrorCategory.PERMISSION_DENIED, + severity: 4, + toolName, + suggestion: "The operation requires elevated permissions. Try using sudo or a different path.", + isRecoverable: false, + } + } + + // Rate limited + if ( + errorMessage.includes("rate limit") || + errorMessage.includes("too many requests") || + errorMessage.includes("429") + ) { + return { + category: ErrorCategory.RATE_LIMITED, + severity: 3, + toolName, + suggestion: "Rate limit exceeded. Wait before retrying or reduce request frequency.", + isRecoverable: true, + recoveryAction: "backoff_and_retry", + } + } + + // Auth failure + if ( + errorMessage.includes("auth") || + errorMessage.includes("unauthorized") || + errorMessage.includes("401") || + errorMessage.includes("403") + ) { + return { + category: ErrorCategory.AUTH_FAILED, + severity: 5, + toolName, + suggestion: "Authentication failed. Check credentials and try again.", + isRecoverable: false, + } + } + + // Network error + if ( + errorMessage.includes("network") || + errorMessage.includes("ECONNREFUSED") || + errorMessage.includes("ECONNRESET") || + errorMessage.includes("ETIMEDOUT") || + errorMessage.includes("ENOTFOUND") + ) { + return { + category: ErrorCategory.NETWORK_ERROR, + severity: 4, + toolName, + suggestion: "Network error occurred. Check connectivity and try again.", + isRecoverable: true, + recoveryAction: "retry_after_connectivity_check", + } + } + + // Timeout + if (errorMessage.includes("timeout") || errorMessage.includes("timed out")) { + return { + category: ErrorCategory.TIMEOUT, + severity: 3, + toolName, + suggestion: "The operation timed out. Try with a longer timeout or smaller scope.", + isRecoverable: true, + recoveryAction: "retry_with_longer_timeout", + } + } + + // Invalid params + if ( + errorMessage.includes("parameter") || + errorMessage.includes("missing") || + errorMessage.includes("required") + ) { + return { + category: ErrorCategory.INVALID_PARAMS, + severity: 3, + toolName, + paramName: this.extractParamName(errorMessage), + suggestion: "Check the tool parameters. A required parameter may be missing or invalid.", + isRecoverable: true, + recoveryAction: "fix_parameters", + } + } + + return { + category: ErrorCategory.UNKNOWN, + severity: 3, + toolName, + suggestion: "An unexpected error occurred. Check the error details and try a different approach.", + isRecoverable: false, + } + } + + private extractPath(errorMessage: string): string | undefined { + const match = errorMessage.match(/'([^']+)'|"([^"]+)"/) + return match?.[1] || match?.[2] || undefined + } + + private extractParamName(errorMessage: string): string | undefined { + const match = errorMessage.match(/parameter\s+'([^']+)'/i) + return match?.[1] || undefined + } +} diff --git a/src/services/self-improving/FeedbackCollector.ts b/src/services/self-improving/FeedbackCollector.ts new file mode 100644 index 0000000000..cfb029996d --- /dev/null +++ b/src/services/self-improving/FeedbackCollector.ts @@ -0,0 +1,156 @@ +import crypto from "crypto" + +import type { CodeIndexInfo, Experiments, FeedbackSignal, LearningEvent, TaskEventInfo } from "./types" + +interface FeedbackCollectorOptions { + getExperiments?: () => Experiments | undefined +} + +/** + * FeedbackCollector - normalizes task/user/tool/code-index signals + * into structured LearningEvent objects. + * + * This is a stateless converter - it creates events from raw signals + * without side effects. The caller (SelfImprovingManager) owns + * persistence and lifecycle. + */ +export class FeedbackCollector { + private readonly getExperiments: () => Experiments | undefined + + constructor(options: FeedbackCollectorOptions = {}) { + this.getExperiments = options.getExperiments ?? (() => undefined) + } + /** + * Create a learning event from a task completion signal. + */ + createTaskEvent(info: TaskEventInfo): LearningEvent { + const signal: FeedbackSignal = info.success ? "TASK_SUCCESS" : "TASK_FAILURE" + + return { + id: crypto.randomUUID(), + signal, + timestamp: Date.now(), + taskId: info.taskId, + workspacePath: info.workspacePath, + mode: info.mode, + context: { + userTurnCount: info.userTurnCount, + toolIterationCount: info.toolIterationCount, + toolNames: info.toolNames, + promptFingerprint: info.promptFingerprint, + errorKey: info.errorKey, + }, + outcome: { + success: info.success, + corrected: info.corrected, + confidenceDelta: info.success ? 0.05 : -0.1, + }, + } + } + + /** + * Create a learning event from a user correction signal. + */ + createCorrectionEvent(info: TaskEventInfo): LearningEvent { + return { + id: crypto.randomUUID(), + signal: "USER_CORRECTION", + timestamp: Date.now(), + taskId: info.taskId, + workspacePath: info.workspacePath, + mode: info.mode, + context: { + toolNames: info.toolNames, + errorKey: info.errorKey, + promptFingerprint: info.promptFingerprint, + }, + outcome: { + corrected: true, + confidenceDelta: -0.15, + }, + } + } + + /** + * Create a learning event from a pattern repeat signal. + */ + createPatternRepeatEvent(patternId: string, taskId?: string, mode?: string): LearningEvent { + return { + id: crypto.randomUUID(), + signal: "PATTERN_REPEAT", + timestamp: Date.now(), + taskId, + mode, + context: { + promptFingerprint: patternId, + }, + outcome: { + confidenceDelta: 0.02, + }, + } + } + + /** + * Create a learning event from a code index hit. + */ + createCodeIndexEvent(codeIndex: CodeIndexInfo, taskId?: string): LearningEvent { + return { + id: crypto.randomUUID(), + signal: "CODE_INDEX_HIT", + timestamp: Date.now(), + taskId, + context: { + codeIndex: { + available: codeIndex.available, + hits: codeIndex.hits, + topScore: codeIndex.topScore, + }, + }, + outcome: { + confidenceDelta: codeIndex.hits > 0 ? 0.03 : 0, + }, + } + } + + /** + * Create a learning event from a prompt quality signal. + */ + createPromptQualityEvent(quality: number, promptFingerprint?: string): LearningEvent { + return { + id: crypto.randomUUID(), + signal: "PROMPT_QUALITY", + timestamp: Date.now(), + context: { + promptFingerprint, + }, + outcome: { + confidenceDelta: quality > 0.5 ? 0.01 : -0.01, + }, + } + } + + /** + * Create a learning event from a tool preference signal. + * Gate behind SELF_IMPROVING_TOOL_PREFERENCE experiment flag. + */ + createToolPreferenceEvent(toolName: string, successRate: number, taskId?: string): LearningEvent | undefined { + const experiments = this.getExperiments() + if (experiments?.selfImprovingToolPreference === false) { + return undefined + } + + return { + id: crypto.randomUUID(), + signal: "TOOL_PREFERENCE", + timestamp: Date.now(), + taskId, + context: { + toolNames: [toolName], + }, + outcome: { + success: successRate > 0.5, + confidenceDelta: successRate > 0.5 ? 0.02 : -0.02, + }, + } + } +} diff --git a/src/services/self-improving/ImprovementApplier.ts b/src/services/self-improving/ImprovementApplier.ts new file mode 100644 index 0000000000..da65e3dfd6 --- /dev/null +++ b/src/services/self-improving/ImprovementApplier.ts @@ -0,0 +1,592 @@ +import crypto from "crypto" + +import type { SkillProvenance } from "./SkillUsageStore" +import type { Experiments, ImprovementAction, LearnedPattern, PromptContext } from "./types" + +interface ImprovementApplierOptions { + getSkillNames?: () => string[] + getSkillProvenance?: (name: string) => SkillProvenance | string + getSkillProvenanceForSource?: (name: string, source: "global" | "project") => SkillProvenance | string + hasSkill?: (name: string, source: "global" | "project") => boolean + isAutoSkillsEnabled?: () => boolean + getAutoSkillsScope?: () => "workspace" | "global" + getExperiments?: () => Experiments | undefined +} + +/** + * ImprovementApplier - converts learned patterns into actionable improvements. + * + * Generates: + * - Prompt enrichment context (bounded, ordered by confidence) + * - Tool preference adjustments + * - Error avoidance hints + * - Skill suggestions / mutations for reusable workflows + */ +export class ImprovementApplier { + private readonly getSkillNames: () => string[] + private readonly getSkillProvenance: (name: string) => SkillProvenance | string + private readonly getSkillProvenanceForSource: ( + name: string, + source: "global" | "project", + ) => SkillProvenance | string + private readonly hasSkill: (name: string, source: "global" | "project") => boolean + private readonly isAutoSkillsEnabled: () => boolean + private readonly getAutoSkillsScope: () => "workspace" | "global" + private readonly getExperiments: () => Experiments | undefined + + constructor(options: ImprovementApplierOptions = {}) { + this.getSkillNames = options.getSkillNames ?? (() => []) + this.getSkillProvenance = options.getSkillProvenance ?? (() => "unknown") + this.getSkillProvenanceForSource = + options.getSkillProvenanceForSource ?? ((name: string) => this.getSkillProvenance(name)) + this.hasSkill = + options.hasSkill ?? + ((name: string, source: "global" | "project") => + source === "project" && this.getSkillNames().includes(name)) + this.isAutoSkillsEnabled = options.isAutoSkillsEnabled ?? (() => false) + this.getAutoSkillsScope = options.getAutoSkillsScope ?? (() => "workspace") + this.getExperiments = options.getExperiments ?? (() => undefined) + } + + /** + * Generate improvement actions from learned patterns. + * Each active pattern maps to one or more actions. + */ + generateActions(patterns: LearnedPattern[]): ImprovementAction[] { + const actions: ImprovementAction[] = [] + const now = Date.now() + + for (const pattern of patterns) { + if (pattern.state !== "active") { + continue + } + + switch (pattern.patternType) { + case "error": + actions.push(this.createErrorAvoidanceAction(pattern, now)) + break + case "tool": + actions.push(this.createToolPreferenceAction(pattern, now)) + if (this.isAutoSkillsEnabled()) { + const skillAction = this.createSkillMutationAction(pattern, now) + if (skillAction) { + actions.push(skillAction) + } + } + break + case "prompt": + actions.push(this.createPromptEnrichmentAction(pattern, now)) + break + case "skill": + actions.push(this.createSkillSuggestionAction(pattern, now)) + break + } + } + + // SKILL_MERGE: detect similar skills and generate merge actions + const experiments = this.getExperiments() + if (experiments?.selfImprovingSkillMerge !== false) { + const mergeActions = this.generateSkillMergeActions(patterns, now) + actions.push(...mergeActions) + } + + // SPECIALIZED_SKILL: generate SKILL_CREATE_FROM_SCRATCH actions for + // high-confidence, domain-specific patterns that warrant dedicated skills + if (experiments?.selfImprovingSpecializedSkills !== false) { + const specializedActions = this.generateSpecializedSkillActions(patterns, now) + actions.push(...specializedActions) + } + + return actions + } + + /** + * Build a bounded prompt context from learned patterns. + * Returns the top-N patterns by confidence, ordered descending. + */ + buildPromptContext(patterns: LearnedPattern[], maxEntries: number = 5): PromptContext { + const active = patterns.filter((p) => p.state === "active" && p.confidenceScore != null) + + // Sort by confidence descending, take top N + const top = active.sort((a, b) => (b.confidenceScore ?? 0) - (a.confidenceScore ?? 0)).slice(0, maxEntries) + + return { + entries: top.map((p) => ({ + type: p.patternType, + summary: p.summary, + confidence: p.confidenceScore ?? 0, + })), + revision: Date.now(), + } + } + + /** + * Alias for buildPromptContext — used by SelfImprovingManager.getPromptContext(). + */ + getPromptContext(patterns: LearnedPattern[], maxEntries: number, revision?: number): PromptContext { + return this.buildPromptContext(patterns, maxEntries) + } + + private createErrorAvoidanceAction(pattern: LearnedPattern, now: number): ImprovementAction { + const errorKeys = pattern.context.errorKeys ?? [] + const primaryErrorKey = errorKeys.length > 0 ? errorKeys[0] : "unknown" + + return { + id: crypto.randomUUID(), + actionType: "ERROR_AVOIDANCE", + target: "task-execution", + payload: { + summary: pattern.summary, + errorKeys, + confidence: pattern.confidenceScore ?? 0.5, + patternId: pattern.id, + primaryErrorKey, + }, + timestamp: now, + } + } + + private createToolPreferenceAction(pattern: LearnedPattern, now: number): ImprovementAction { + const toolNames = pattern.context.toolNames ?? [] + return { + id: crypto.randomUUID(), + actionType: "TOOL_PREFERENCE", + target: "task-execution", + payload: { + summary: pattern.summary, + toolNames, + confidence: pattern.confidenceScore ?? 0.5, + patternId: pattern.id, + }, + timestamp: now, + } + } + + private createPromptEnrichmentAction(pattern: LearnedPattern, now: number): ImprovementAction { + return { + id: crypto.randomUUID(), + actionType: "PROMPT_ENRICHMENT", + target: "system-prompt", + payload: { + summary: pattern.summary, + confidence: pattern.confidenceScore ?? 0.5, + patternId: pattern.id, + }, + timestamp: now, + } + } + + private createSkillSuggestionAction(pattern: LearnedPattern, now: number): ImprovementAction { + const toolNames = pattern.context.toolNames ?? [] + const skillName = this.buildWorkflowSkillName(toolNames) + const summary = `Capture reusable workflow for ${toolNames.join(", ")}` + + return { + id: crypto.randomUUID(), + actionType: "SKILL_SUGGESTION", + target: "review-queue", + payload: { + summary, + skillName, + confidence: pattern.confidenceScore ?? 0.5, + patternId: pattern.id, + }, + timestamp: now, + } + } + + private createSkillMutationAction(pattern: LearnedPattern, now: number): ImprovementAction | undefined { + const toolNames = pattern.context.toolNames ?? [] + if (toolNames.length === 0) { + return undefined + } + + const skillName = this.buildWorkflowSkillName(toolNames) + const summary = `Auto-created workflow for ${toolNames.join(", ")}` + const source = this.getAutoSkillsScope() === "global" ? "global" : "project" + const skillId = this.buildSkillId(skillName, source) + + if (this.hasSkill(skillName, source)) { + return { + id: crypto.randomUUID(), + actionType: "SKILL_UPDATE", + target: "skills-manager", + payload: { + skillName, + skillId, + content: this.buildSkillContent(skillName, summary, toolNames), + source, + confidence: pattern.confidenceScore ?? 0.5, + patternId: pattern.id, + }, + timestamp: now, + } + } + + return { + id: crypto.randomUUID(), + actionType: "SKILL_CREATE", + target: "skills-manager", + payload: { + skillName, + skillId, + description: summary, + content: this.buildSkillContent(skillName, summary, toolNames), + source, + confidence: pattern.confidenceScore ?? 0.5, + patternId: pattern.id, + }, + timestamp: now, + } + } + + private buildWorkflowSkillName(toolNames: string[]): string { + return `workflow-${toolNames + .map((t) => t.toLowerCase().replace(/[^a-z0-9]/g, "-")) + .sort() + .join("-")}` + } + + private buildSkillId(skillName: string, source: "global" | "project"): string { + return `skill:${source}:${skillName}` + } + + private buildSkillContent(skillName: string, description: string, toolNames: string[]): string { + const title = skillName + .split("-") + .map((segment) => segment.charAt(0).toUpperCase() + segment.slice(1)) + .join(" ") + const bulletList = toolNames.map((toolName) => "- `" + toolName + "`").join("\n") + const inlineTools = toolNames.map((toolName) => "`" + toolName + "`").join(" then ") + + return `--- +name: ${skillName} +description: ${description} +--- + +# ${title} + +## When to use + +${description} + +## Preferred tools + +${bulletList} + +## Workflow + +1. Start with ${inlineTools}. +2. Keep the sequence focused on the same reusable workflow. +3. Update this skill when the workflow changes materially. +` + } + + /** + * Generate SKILL_MERGE actions when similar skills are detected. + * Two skills are considered similar if they share significant tool overlap. + */ + private generateSkillMergeActions(patterns: LearnedPattern[], now: number): ImprovementAction[] { + const actions: ImprovementAction[] = [] + const skillPatterns = patterns.filter( + (p) => p.patternType === "skill" && p.state === "active" && p.frequency >= 2, + ) + + if (skillPatterns.length < 2) { + return actions + } + + // Group patterns by tool overlap + const processed = new Set() + for (let i = 0; i < skillPatterns.length; i++) { + if (processed.has(skillPatterns[i].id)) { + continue + } + + const toolsA = new Set(skillPatterns[i].context.toolNames ?? []) + const mergeGroup: LearnedPattern[] = [skillPatterns[i]] + processed.add(skillPatterns[i].id) + + for (let j = i + 1; j < skillPatterns.length; j++) { + if (processed.has(skillPatterns[j].id)) { + continue + } + + const toolsB = new Set(skillPatterns[j].context.toolNames ?? []) + const overlap = [...toolsA].filter((t) => toolsB.has(t)) + + // Merge if at least 50% tool overlap + const minSize = Math.min(toolsA.size, toolsB.size) + if (minSize > 0 && overlap.length / minSize >= 0.5) { + mergeGroup.push(skillPatterns[j]) + processed.add(skillPatterns[j].id) + } + } + + if (mergeGroup.length >= 2) { + const umbrellaName = this.buildMergeSkillName(mergeGroup) + const absorbNames = mergeGroup + .slice(1) + .map((p) => this.buildWorkflowSkillName(p.context.toolNames ?? [])) + const mergedDescription = `Merged skill combining ${mergeGroup.map((p) => p.summary).join("; ")}` + const mergedContent = this.buildMergeSkillContent(umbrellaName, mergedDescription, mergeGroup) + + actions.push({ + id: crypto.randomUUID(), + actionType: "SKILL_MERGE", + target: "skills-manager", + payload: { + umbrellaName, + absorbNames, + description: mergedDescription, + content: mergedContent, + source: this.getAutoSkillsScope() === "global" ? "global" : "project", + patternIds: mergeGroup.map((p) => p.id), + confidence: Math.min( + 1, + mergeGroup.reduce((sum, p) => sum + (p.confidenceScore ?? 0), 0) / mergeGroup.length, + ), + }, + timestamp: now, + }) + } + } + + return actions + } + + private buildMergeSkillName(patterns: LearnedPattern[]): string { + const allTools = new Set() + for (const pattern of patterns) { + for (const toolName of pattern.context.toolNames ?? []) { + allTools.add(toolName) + } + } + + return this.buildWorkflowSkillName([...allTools]) + } + + private buildMergeSkillContent(name: string, description: string, patterns: LearnedPattern[]): string { + const title = name + .split("-") + .map((segment) => segment.charAt(0).toUpperCase() + segment.slice(1)) + .join(" ") + + const allTools = new Set() + for (const pattern of patterns) { + for (const toolName of pattern.context.toolNames ?? []) { + allTools.add(toolName) + } + } + + const bulletList = [...allTools].map((toolName) => "- `" + toolName + "`").join("\n") + const inlineTools = [...allTools].map((toolName) => "`" + toolName + "`").join(" then ") + + const patternSummaries = patterns + .map((p) => `- ${p.summary} (confidence: ${((p.confidenceScore ?? 0) * 100).toFixed(0)}%)`) + .join("\n") + + return `--- +name: ${name} +description: ${description} +--- + +# ${title} + +## Description + +${description} + +## Merged From + +${patternSummaries} + +## Preferred Tools + +${bulletList} + +## Workflow + +1. Start with ${inlineTools}. +2. Keep the sequence focused on the same reusable workflow. +3. This skill was automatically merged from similar patterns. +` + } + + /** + * Generate SKILL_CREATE_FROM_SCRATCH actions for high-confidence, + * domain-specific patterns that warrant dedicated specialized skills. + * + * A pattern qualifies for specialization when: + * - It has high confidence (>= 0.7) and frequency (>= 3) + * - It involves domain-specific tool combinations + * - The pattern summary suggests a reusable domain (e.g., "react", "api", "test", "deploy") + */ + private generateSpecializedSkillActions(patterns: LearnedPattern[], now: number): ImprovementAction[] { + const actions: ImprovementAction[] = [] + + // Only generate when auto-skills are enabled + if (!this.isAutoSkillsEnabled()) { + return actions + } + + const source = this.getAutoSkillsScope() === "global" ? "global" : "project" + + for (const pattern of patterns) { + if (pattern.state !== "active") { + continue + } + + // Require high confidence and sufficient frequency for specialization + if ((pattern.confidenceScore ?? 0) < 0.7 || (pattern.frequency ?? 0) < 3) { + continue + } + + const toolNames = pattern.context.toolNames ?? [] + if (toolNames.length === 0) { + continue + } + + // Detect domain from pattern summary and tool names + const domain = this.detectSpecializedDomain(pattern.summary, toolNames) + if (!domain) { + continue + } + + const skillName = this.buildSpecializedSkillName(domain, toolNames) + const skillId = this.buildSkillId(skillName, source) + + // Skip if skill already exists + if (this.hasSkill(skillName, source)) { + continue + } + + const description = `Specialized skill for ${domain}: ${pattern.summary}` + const instructions = this.buildSpecializedSkillContent(skillName, description, domain, toolNames, pattern) + + actions.push({ + id: crypto.randomUUID(), + actionType: "SKILL_CREATE_FROM_SCRATCH", + target: "skills-manager", + payload: { + name: skillName, + skillId, + description, + instructions, + source, + modeSlugs: pattern.context.modes ?? [], + tools: toolNames, + confidence: pattern.confidenceScore ?? 0.7, + patternId: pattern.id, + }, + timestamp: now, + }) + } + + return actions + } + + /** + * Detect a specialized domain from pattern summary and tool names. + * Returns a domain string (e.g., "react-component", "api-endpoint", "test-suite") + * or undefined if no specialized domain is detected. + */ + private detectSpecializedDomain(summary: string, toolNames: string[]): string | undefined { + const lowerSummary = summary.toLowerCase() + const allTools = toolNames.map((t) => t.toLowerCase()).join(" ") + + // Domain detection rules — ordered by specificity + const domains: Array<{ pattern: RegExp; domain: string }> = [ + // React/UI component building + { pattern: /\b(react|component|jsx|tsx|ui|component)\b/, domain: "react-component" }, + // API endpoint creation + { pattern: /\b(api|endpoint|route|rest|graphql|express|fastify)\b/, domain: "api-endpoint" }, + // Test writing + { pattern: /\b(test|spec|vitest|jest|mocha|tdd|assert)\b/, domain: "test-suite" }, + // Database operations + { pattern: /\b(db|database|sql|query|schema|migration|postgres|mongodb)\b/, domain: "database-operation" }, + // Deployment/CI + { pattern: /\b(deploy|ci|cd|pipeline|docker|kubernetes|k8s)\b/, domain: "deployment-pipeline" }, + // Code review + { pattern: /\b(review|audit|lint|quality|refactor)\b/, domain: "code-review" }, + // Documentation + { pattern: /\b(doc|readme|markdown|documentation|api-doc)\b/, domain: "documentation" }, + // Security + { pattern: /\b(security|auth|oauth|jwt|vulnerability|audit)\b/, domain: "security-audit" }, + ] + + const searchText = `${lowerSummary} ${allTools}` + for (const { pattern, domain } of domains) { + if (pattern.test(searchText)) { + return domain + } + } + + return undefined + } + + /** + * Build a skill name for a specialized domain. + * Format: {domain}-{tool1}-{tool2} (sorted, deduplicated) + */ + private buildSpecializedSkillName(domain: string, toolNames: string[]): string { + const toolPart = toolNames + .map((t) => t.toLowerCase().replace(/[^a-z0-9]/g, "-")) + .sort() + .join("-") + return `${domain}-${toolPart}` + } + + /** + * Build full SKILL.md instructions body for a specialized skill. + * Returns only the markdown body (without frontmatter — frontmatter is + * added by ActionExecutor). + */ + private buildSpecializedSkillContent( + skillName: string, + description: string, + domain: string, + toolNames: string[], + pattern: LearnedPattern, + ): string { + const title = skillName + .split("-") + .map((segment) => segment.charAt(0).toUpperCase() + segment.slice(1)) + .join(" ") + + const toolList = toolNames.map((t) => `- \`${t}\``).join("\n") + const confidencePct = ((pattern.confidenceScore ?? 0) * 100).toFixed(0) + + return `# ${title} + +## Description + +${description} + +## Domain + +${domain} + +## When to Use + +This specialized skill is recommended when the task involves **${domain}** patterns with the following tools: + +${toolList} + +## Instructions + +1. Analyze the task context to determine if this ${domain} skill applies. +2. Use the preferred tools in the recommended sequence. +3. Follow domain-specific best practices for ${domain}. +4. Validate output against the expected ${domain} patterns. + +## Preferred Tools + +${toolList} + +## Confidence + +This skill was auto-generated from observed patterns with **${confidencePct}%** confidence (frequency: ${pattern.frequency}). +` + } +} diff --git a/src/services/self-improving/InsightsEngine.ts b/src/services/self-improving/InsightsEngine.ts new file mode 100644 index 0000000000..09d43bdd8b --- /dev/null +++ b/src/services/self-improving/InsightsEngine.ts @@ -0,0 +1,163 @@ +import { randomUUID } from "node:crypto" +import * as path from "node:path" +import * as fs from "node:fs/promises" + +export interface SessionInsight { + id: string + sessionId: string + type: "token_usage" | "tool_usage" | "cost_estimate" | "error_pattern" | "performance" + timestamp: number + data: Record + summary: string +} + +export interface InsightsReport { + sessionId: string + startTime: number + endTime: number + totalTokens: number + totalCost: number + toolUsageCount: number + errorCount: number + topTools: Array<{ name: string; count: number }> + insights: SessionInsight[] +} + +export class InsightsEngine { + private insights: SessionInsight[] = [] + private sessionId: string + private storagePath: string + private startTime: number + private toolUsageCounts: Map = new Map() + private errorCount = 0 + private totalTokens = 0 + private totalCost = 0 + + constructor(storagePath: string) { + this.sessionId = randomUUID() + this.storagePath = path.join(storagePath, "insights") + this.startTime = Date.now() + } + + async initialize(): Promise { + await fs.mkdir(this.storagePath, { recursive: true }) + } + + recordToolUsage(toolName: string, tokens?: number, cost?: number): void { + this.toolUsageCounts.set(toolName, (this.toolUsageCounts.get(toolName) || 0) + 1) + if (tokens) { + this.totalTokens += tokens + } + + if (cost) { + this.totalCost += cost + } + + this.insights.push({ + id: randomUUID(), + sessionId: this.sessionId, + type: "tool_usage", + timestamp: Date.now(), + data: { toolName, tokens, cost }, + summary: `Tool "${toolName}" used`, + }) + } + + recordError(errorType: string, details?: string): void { + this.errorCount++ + this.insights.push({ + id: randomUUID(), + sessionId: this.sessionId, + type: "error_pattern", + timestamp: Date.now(), + data: { errorType, details }, + summary: `Error: ${errorType}${details ? ` - ${details}` : ""}`, + }) + } + + recordTokenUsage(tokens: number, cost: number, context: string): void { + this.totalTokens += tokens + this.totalCost += cost + this.insights.push({ + id: randomUUID(), + sessionId: this.sessionId, + type: "token_usage", + timestamp: Date.now(), + data: { tokens, cost, context }, + summary: `Used ${tokens} tokens ($${cost.toFixed(4)}) for ${context}`, + }) + } + + recordPerformance(operation: string, durationMs: number): void { + this.insights.push({ + id: randomUUID(), + sessionId: this.sessionId, + type: "performance", + timestamp: Date.now(), + data: { operation, durationMs }, + summary: `${operation} took ${durationMs}ms`, + }) + } + + getTopTools(limit = 5): Array<{ name: string; count: number }> { + return [...this.toolUsageCounts.entries()] + .map(([name, count]) => ({ name, count })) + .sort((a, b) => b.count - a.count) + .slice(0, limit) + } + + generateReport(): InsightsReport { + return { + sessionId: this.sessionId, + startTime: this.startTime, + endTime: Date.now(), + totalTokens: this.totalTokens, + totalCost: this.totalCost, + toolUsageCount: [...this.toolUsageCounts.values()].reduce((a, b) => a + b, 0), + errorCount: this.errorCount, + topTools: this.getTopTools(), + insights: [...this.insights], + } + } + + async persistReport(): Promise { + const report = this.generateReport() + const fileName = `session-${this.sessionId}-${Date.now()}.json` + const filePath = path.join(this.storagePath, fileName) + await fs.writeFile(filePath, JSON.stringify(report, null, 2), "utf-8") + return filePath + } + + async getRecentReports(limit = 10): Promise { + try { + const files = await fs.readdir(this.storagePath) + const jsonFiles = files + .filter((f) => f.endsWith(".json")) + .sort() + .reverse() + .slice(0, limit) + + const reports: InsightsReport[] = [] + for (const file of jsonFiles) { + try { + const content = await fs.readFile(path.join(this.storagePath, file), "utf-8") + reports.push(JSON.parse(content)) + } catch { + continue + } + } + return reports + } catch { + return [] + } + } + + getSessionId(): string { + return this.sessionId + } + + dispose(): void { + this.insights = [] + this.toolUsageCounts.clear() + } +} diff --git a/src/services/self-improving/KeywordConflictResolver.ts b/src/services/self-improving/KeywordConflictResolver.ts new file mode 100644 index 0000000000..ad57244340 --- /dev/null +++ b/src/services/self-improving/KeywordConflictResolver.ts @@ -0,0 +1,80 @@ +import type { Requirement, ConflictResolution, ConflictResolver } from "./types" + +/** + * Keyword-based conflict resolver using Jaccard similarity. + * Falls back to this when LLM-based resolution is unavailable. + */ +export class KeywordConflictResolver implements ConflictResolver { + readonly name = "keyword" + + async resolve( + newRequirement: Requirement, + existingRequirements: Requirement[], + _newMessageIndex: number, + _allMessages: string[], + ): Promise { + const supersedes: string[] = [] + const newWords = this.getSignificantWords(newRequirement.text) + + for (const existing of existingRequirements) { + const existingWords = this.getSignificantWords(existing.text) + const overlap = this.calculateOverlap(newWords, existingWords) + if (overlap >= 0.4) { + supersedes.push(existing.id) + } + } + + return { + supersedes, + confidence: supersedes.length > 0 ? 0.6 : 0.9, + reason: + supersedes.length > 0 + ? `Keyword overlap detected (Jaccard similarity >= 0.4)` + : "No significant keyword overlap with existing requirements", + } + } + + /** + * Extract significant words from text (lowercase, remove common words) + */ + getSignificantWords(text: string): string[] { + const stopWords = new Set([ + "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", + "of", "with", "by", "from", "as", "is", "was", "are", "were", "be", + "been", "being", "have", "has", "had", "do", "does", "did", "will", + "would", "could", "should", "may", "might", "shall", "can", "need", + "must", "this", "that", "these", "those", "it", "its", "they", "them", + "their", "we", "us", "our", "you", "your", "he", "she", "him", "her", + "his", "not", "no", "nor", "so", "if", "then", "than", "too", "very", + "just", "about", "above", "after", "again", "all", "also", "any", + "because", "before", "between", "both", "each", "few", "more", "most", + "other", "some", "such", "only", "own", "same", "into", "over", "under", + "up", "out", "off", "down", "here", "there", "when", "where", "why", + "how", "what", "which", "who", "whom", "please", "make", "like", + ]) + + return text + .toLowerCase() + .replace(/[^a-z0-9\s]/g, "") + .split(/\s+/) + .filter((w) => w.length > 2 && !stopWords.has(w)) + } + + /** + * Calculate overlap ratio between two word sets (Jaccard similarity) + */ + calculateOverlap(words1: string[], words2: string[]): number { + if (words1.length === 0 || words2.length === 0) return 0 + + const set1 = new Set(words1) + const set2 = new Set(words2) + + let intersection = 0 + for (const w of set1) { + if (set2.has(w)) intersection++ + } + + const union = new Set([...set1, ...set2]).size + return union === 0 ? 0 : intersection / union + } +} diff --git a/src/services/self-improving/LLMConflictResolver.test.ts b/src/services/self-improving/LLMConflictResolver.test.ts new file mode 100644 index 0000000000..5b90ca60fc --- /dev/null +++ b/src/services/self-improving/LLMConflictResolver.test.ts @@ -0,0 +1,252 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { LLMConflictResolver } from "./LLMConflictResolver" +import type { Requirement } from "./types" + +vi.mock("../../utils/single-completion-handler") + +describe("LLMConflictResolver", () => { + let resolver: LLMConflictResolver + + beforeEach(() => { + resolver = new LLMConflictResolver({ apiProvider: "openai" } as any) + }) + + it("should have name 'llm'", () => { + expect(resolver.name).toBe("llm") + }) + + it("should return empty supersedes when no existing requirements", async () => { + const newReq: Requirement = { + id: "1", + text: "Build authentication", + category: "functional", + status: "pending", + messageIndex: 0, + } + const result = await resolver.resolve(newReq, [], 0, []) + expect(result.supersedes).toEqual([]) + expect(result.confidence).toBe(1.0) + expect(result.reason).toBe("No existing requirements to compare") + }) + + describe("keyword fast path (no LLM call)", () => { + it("should use keyword result directly when Jaccard >= 0.7 (clear match)", async () => { + const { singleCompletionHandler } = await import("../../utils/single-completion-handler") + const llmSpy = vi.mocked(singleCompletionHandler) + + const existing: Requirement[] = [ + { + id: "existing-1", + text: "Build authentication with JWT for login", + category: "functional", + status: "pending", + messageIndex: 0, + }, + ] + const newReq: Requirement = { + id: "new-1", + text: "Build authentication with JWT", + category: "functional", + status: "pending", + messageIndex: 1, + } + const result = await resolver.resolve(newReq, existing, 1, []) + // Jaccard(["build","authentication","jwt","login"], ["build","authentication","jwt"]) = 3/4 = 0.75 >= 0.7 + expect(result.supersedes).toEqual(["existing-1"]) + expect(result.confidence).toBe(0.9) + expect(result.reason).toContain("Keyword overlap detected") + // LLM should NOT be called + expect(llmSpy).not.toHaveBeenCalled() + }) + + it("should use keyword result directly when Jaccard <= 0.3 (clear non-match)", async () => { + const { singleCompletionHandler } = await import("../../utils/single-completion-handler") + const llmSpy = vi.mocked(singleCompletionHandler) + + const existing: Requirement[] = [ + { + id: "existing-1", + text: "Build authentication with JWT", + category: "functional", + status: "pending", + messageIndex: 0, + }, + ] + const newReq: Requirement = { + id: "new-1", + text: "Add PostgreSQL database schema", + category: "functional", + status: "pending", + messageIndex: 1, + } + const result = await resolver.resolve(newReq, existing, 1, []) + // Jaccard(["build","authentication","jwt"], ["add","postgresql","database","schema"]) = 0/7 = 0 <= 0.3 + expect(result.supersedes).toEqual([]) + expect(result.confidence).toBe(0.95) + expect(result.reason).toContain("No significant keyword overlap") + // LLM should NOT be called + expect(llmSpy).not.toHaveBeenCalled() + }) + }) + + describe("LLM fallback path (ambiguous Jaccard 0.3-0.7)", () => { + it("should call LLM when Jaccard is in ambiguous range", async () => { + const { singleCompletionHandler } = await import("../../utils/single-completion-handler") + vi.mocked(singleCompletionHandler).mockResolvedValue( + JSON.stringify({ supersedes: ["existing-1"], reason: "Same feature, OAuth replaces JWT" }), + ) + + const existing: Requirement[] = [ + { + id: "existing-1", + text: "Build authentication with JWT", + category: "functional", + status: "pending", + messageIndex: 0, + }, + ] + const newReq: Requirement = { + id: "new-1", + text: "Build authentication with OAuth", + category: "functional", + status: "pending", + messageIndex: 1, + } + const result = await resolver.resolve(newReq, existing, 1, ["- Build authentication with JWT"]) + // Jaccard("build authentication jwt", "build authentication oauth") = 2/4 = 0.5 → ambiguous + expect(result.supersedes).toEqual(["existing-1"]) + expect(result.confidence).toBe(0.8) + expect(result.reason).toBe("Same feature, OAuth replaces JWT") + expect(singleCompletionHandler).toHaveBeenCalledTimes(1) + }) + + it("should return empty supersedes when LLM says no overlap", async () => { + const { singleCompletionHandler } = await import("../../utils/single-completion-handler") + vi.mocked(singleCompletionHandler).mockResolvedValue( + JSON.stringify({ supersedes: [], reason: "No semantic overlap detected" }), + ) + + const existing: Requirement[] = [ + { + id: "existing-1", + text: "Build authentication with JWT", + category: "functional", + status: "pending", + messageIndex: 0, + }, + ] + const newReq: Requirement = { + id: "new-1", + text: "Build authentication with OAuth", + category: "functional", + status: "pending", + messageIndex: 1, + } + const result = await resolver.resolve(newReq, existing, 1, ["- Build authentication with JWT"]) + expect(result.supersedes).toEqual([]) + expect(result.confidence).toBe(0.8) + expect(result.reason).toBe("No semantic overlap detected") + }) + + it("should fallback to keyword heuristic on LLM call failure", async () => { + const { singleCompletionHandler } = await import("../../utils/single-completion-handler") + vi.mocked(singleCompletionHandler).mockRejectedValue(new Error("API timeout")) + + const existing: Requirement[] = [ + { + id: "existing-1", + text: "Build authentication with JWT", + category: "functional", + status: "pending", + messageIndex: 0, + }, + ] + const newReq: Requirement = { + id: "new-1", + text: "Build authentication with OAuth", + category: "functional", + status: "pending", + messageIndex: 1, + } + const result = await resolver.resolve(newReq, existing, 1, ["- Build authentication with JWT"]) + // Fallback: ambiguous pairs are included with lower confidence + expect(result.supersedes).toEqual(["existing-1"]) + expect(result.confidence).toBe(0.4) + expect(result.reason).toContain("LLM call failed") + expect(result.reason).toContain("API timeout") + }) + + it("should parse JSON embedded in markdown response", async () => { + const { singleCompletionHandler } = await import("../../utils/single-completion-handler") + vi.mocked(singleCompletionHandler).mockResolvedValue( + 'Here is my analysis:\n```json\n{"supersedes": ["existing-1"], "reason": "OAuth replaces JWT"}\n```', + ) + + const existing: Requirement[] = [ + { + id: "existing-1", + text: "Build authentication with JWT", + category: "functional", + status: "pending", + messageIndex: 0, + }, + ] + const newReq: Requirement = { + id: "new-1", + text: "Build authentication with OAuth", + category: "functional", + status: "pending", + messageIndex: 1, + } + const result = await resolver.resolve(newReq, existing, 1, ["- Build authentication with JWT"]) + expect(result.supersedes).toEqual(["existing-1"]) + expect(result.reason).toBe("OAuth replaces JWT") + }) + }) + + describe("mixed scenarios", () => { + it("should combine clear keyword matches with LLM results for ambiguous pairs", async () => { + const { singleCompletionHandler } = await import("../../utils/single-completion-handler") + vi.mocked(singleCompletionHandler).mockResolvedValue( + JSON.stringify({ supersedes: ["existing-2"], reason: "OAuth replaces JWT" }), + ) + + const existing: Requirement[] = [ + { + id: "existing-1", + text: "Use React 18 for frontend", + category: "functional", + status: "pending", + messageIndex: 0, + }, + { + id: "existing-2", + text: "Build authentication with JWT", + category: "functional", + status: "pending", + messageIndex: 0, + }, + { + id: "existing-3", + text: "Add PostgreSQL database", + category: "functional", + status: "pending", + messageIndex: 0, + }, + ] + const newReq: Requirement = { + id: "new-1", + text: "Build authentication with OAuth", + category: "functional", + status: "pending", + messageIndex: 1, + } + const result = await resolver.resolve(newReq, existing, 1, []) + // existing-1: Jaccard("use react 18 frontend", "build authentication oauth") = 0/7 = 0 → skip + // existing-2: Jaccard("build authentication jwt", "build authentication oauth") = 2/4 = 0.5 → ambiguous → LLM + // existing-3: Jaccard("add postgresql database", "build authentication oauth") = 0/7 = 0 → skip + expect(result.supersedes).toEqual(["existing-2"]) + expect(result.confidence).toBe(0.8) + }) + }) +}) diff --git a/src/services/self-improving/LLMConflictResolver.ts b/src/services/self-improving/LLMConflictResolver.ts new file mode 100644 index 0000000000..29b8807a47 --- /dev/null +++ b/src/services/self-improving/LLMConflictResolver.ts @@ -0,0 +1,196 @@ +import type { Requirement, ConflictResolution, ConflictResolver } from "./types" +import type { ProviderSettings } from "@roo-code/types" +import { singleCompletionHandler } from "../../utils/single-completion-handler" +import { KeywordConflictResolver } from "./KeywordConflictResolver" + +/** + * Two-tier conflict resolver: + * 1. Fast keyword path via Jaccard similarity (KeywordConflictResolver) + * 2. LLM fallback for ambiguous similarity range (0.3 < Jaccard < 0.7) + * + * Clear match (>= 0.7) → keyword result directly (no LLM call) + * Clear non-match (<= 0.3) → keyword result directly (no LLM call) + * Ambiguous (0.3–0.7) → LLM for semantic analysis + */ +export class LLMConflictResolver implements ConflictResolver { + readonly name = "llm" + + /** Jaccard threshold below which requirements are considered unrelated */ + private static readonly CLEAR_NON_MATCH_THRESHOLD = 0.3 + + /** Jaccard threshold above which requirements are considered a clear match */ + private static readonly CLEAR_MATCH_THRESHOLD = 0.7 + + private readonly keywordResolver: KeywordConflictResolver + + constructor( + private readonly apiConfiguration: ProviderSettings, + ) { + this.keywordResolver = new KeywordConflictResolver() + } + + async resolve( + newRequirement: Requirement, + existingRequirements: Requirement[], + newMessageIndex: number, + allMessages: string[], + ): Promise { + if (existingRequirements.length === 0) { + return { supersedes: [], confidence: 1.0, reason: "No existing requirements to compare" } + } + + // Phase 1: Keyword analysis for each existing requirement + const newWords = this.keywordResolver.getSignificantWords(newRequirement.text) + const clearSupersedes: string[] = [] + const ambiguousPairs: Array<{ existing: Requirement; similarity: number }> = [] + + for (const existing of existingRequirements) { + const existingWords = this.keywordResolver.getSignificantWords(existing.text) + const similarity = this.keywordResolver.calculateOverlap(newWords, existingWords) + + if (similarity >= LLMConflictResolver.CLEAR_MATCH_THRESHOLD) { + // Clear keyword match — supersede without LLM + clearSupersedes.push(existing.id) + } else if (similarity > LLMConflictResolver.CLEAR_NON_MATCH_THRESHOLD) { + // Ambiguous range — needs LLM analysis + ambiguousPairs.push({ existing, similarity }) + } + // similarity <= 0.3: clear non-match, skip entirely + } + + // Phase 2: LLM analysis for ambiguous pairs only + let llmSupersedes: string[] = [] + let llmReason = "" + + if (ambiguousPairs.length > 0) { + try { + const result = await this.callLlm( + newRequirement, + ambiguousPairs, + newMessageIndex, + allMessages, + ) + llmSupersedes = result.supersedes + llmReason = result.reason + } catch (error) { + // LLM call failed — fall back to keyword heuristic for ambiguous pairs + const fallbackIds = ambiguousPairs.map((p) => p.existing.id) + return { + supersedes: [...clearSupersedes, ...fallbackIds], + confidence: 0.4, + reason: `LLM call failed, fell back to keyword heuristic: ${error instanceof Error ? error.message : String(error)}`, + } + } + } + + const allSupersedes = [...new Set([...clearSupersedes, ...llmSupersedes])] + const confidence = this.calculateConfidence( + clearSupersedes.length, + ambiguousPairs.length, + llmSupersedes.length, + ) + + return { + supersedes: allSupersedes, + confidence, + reason: llmReason || this.buildKeywordReason(clearSupersedes), + } + } + + /** + * Calculate overall confidence based on how many decisions came from keyword vs LLM. + */ + private calculateConfidence( + clearCount: number, + ambiguousCount: number, + llmSupersedeCount: number, + ): number { + const totalDecisions = clearCount + ambiguousCount + if (totalDecisions === 0) return 0.95 + + // Keyword decisions have high confidence (0.9), LLM decisions moderate (0.8) + const keywordWeight = clearCount / totalDecisions + const llmWeight = ambiguousCount / totalDecisions + return Math.round((keywordWeight * 0.9 + llmWeight * 0.8) * 100) / 100 + } + + /** + * Build a reason string when only keyword analysis was used. + */ + private buildKeywordReason(supersedes: string[]): string { + if (supersedes.length > 0) { + return `Keyword overlap detected (Jaccard similarity >= ${LLMConflictResolver.CLEAR_MATCH_THRESHOLD})` + } + return "No significant keyword overlap with existing requirements" + } + + /** + * Call LLM to resolve ambiguous requirement pairs. + */ + private async callLlm( + newRequirement: Requirement, + ambiguousPairs: Array<{ existing: Requirement; similarity: number }>, + newMessageIndex: number, + allMessages: string[], + ): Promise<{ supersedes: string[]; reason: string }> { + const recentMessages = allMessages + .slice(Math.max(0, allMessages.length - 5)) + .map((m, i) => `[Message ${i + 1}]: ${m.slice(0, 200)}`) + .join("\n\n") + + const ambiguousList = ambiguousPairs + .map( + (p, i) => + `[${i + 1}] ID: ${p.existing.id}\n Text: "${p.existing.text}"\n Category: ${p.existing.category}\n From message: ${p.existing.messageIndex}\n Keyword similarity: ${p.similarity.toFixed(2)}`, + ) + .join("\n\n") + + const prompt = `You are a requirements conflict resolution system. Your job is to determine if a NEW user requirement SUPERSEDES (replaces, overrides, or contradicts) any EXISTING requirements. + +RULES: +- A later requirement supersedes an earlier one when they address the same feature, behavior, or constraint +- The most recent message has the HIGHEST priority — later instructions override earlier ones +- If the new requirement is about a completely different topic, it does NOT supersede anything +- If the new requirement explicitly contradicts an existing one (e.g., "don't do X" vs "do X"), it supersedes +- If the new requirement is a refinement or clarification of an existing one, it supersedes +- Consider semantic meaning, not just keywords +- Return ONLY the IDs of existing requirements that are superseded + +CONTEXT (recent user messages): +${recentMessages} + +EXISTING REQUIREMENTS (ambiguous — keyword similarity was inconclusive): +${ambiguousList} + +NEW REQUIREMENT (from message ${newMessageIndex}): +"${newRequirement.text}" +Category: ${newRequirement.category} + +Does this new requirement supersede any of the existing requirements? Respond with a JSON object: +{ + "supersedes": ["id1", "id2"], + "reason": "Brief explanation of the decision" +} + +If none are superseded, return: { "supersedes": [], "reason": "No semantic overlap detected" }` + + const response = await singleCompletionHandler(this.apiConfiguration, prompt) + return this.parseResponse(response) + } + + private parseResponse(response: string): { supersedes: string[]; reason: string } { + try { + const jsonMatch = response.match(/\{[\s\S]*\}/) + if (jsonMatch) { + const parsed = JSON.parse(jsonMatch[0]) + return { + supersedes: Array.isArray(parsed.supersedes) ? parsed.supersedes : [], + reason: typeof parsed.reason === "string" ? parsed.reason : "", + } + } + } catch { + // If parsing fails, return empty + } + return { supersedes: [], reason: "Failed to parse LLM response" } + } +} diff --git a/src/services/self-improving/LearningStore.ts b/src/services/self-improving/LearningStore.ts new file mode 100644 index 0000000000..5bc1db4827 --- /dev/null +++ b/src/services/self-improving/LearningStore.ts @@ -0,0 +1,462 @@ +import * as fs from "fs/promises" +import * as path from "path" +import crypto from "crypto" + +import { safeWriteJson } from "../../utils/safeWriteJson" +import type { ImprovementAction, LearnedPattern, LearningConfig, LearningEvent, LearningState, Logger } from "./types" +import { EMPTY_STATE } from "./types" +import type { CodeIndexManager } from "../code-index/manager" +import type { Experiments } from "./types" + +/** + * File names for the learning store + */ +const STATE_FILE = "state.json" +const PATTERNS_DIR = "patterns" +const ARCHIVE_DIR = "archive" +const PATTERN_INDEX_FILE = "_index.json" + +/** + * LearningStore - atomic file-based persistence for learning state. + * + * Storage layout: + * globalStorage/self-improving/ + * state.json - canonical LearningState metadata + counters + * patterns/ - per-pattern source-of-truth files + * _index.json - compact index of active/stale/archived patterns + * .json - individual pattern data + * archive/ - cold storage for archived patterns + * .json + */ +export class LearningStore { + private readonly baseDir: string + private readonly patternsDir: string + private readonly archiveDir: string + private state: LearningState + private readonly logger: Logger + private initialized = false + private codeIndexManager: CodeIndexManager | undefined + private getExperiments: (() => Experiments | undefined) | undefined + + constructor(baseDir: string, logger: Logger) { + this.baseDir = path.join(baseDir, "self-improving") + this.patternsDir = path.join(this.baseDir, PATTERNS_DIR) + this.archiveDir = path.join(this.baseDir, ARCHIVE_DIR) + this.state = this.createEmptyState() + this.logger = logger + } + + /** + * Set the CodeIndexManager instance for vector-search-based pattern dedup/retrieval. + */ + setCodeIndexManager(manager: CodeIndexManager | undefined): void { + this.codeIndexManager = manager + } + + /** + * Set the experiments accessor for feature gating. + */ + setExperimentsAccessor(getExperiments: () => Experiments | undefined): void { + this.getExperiments = getExperiments + } + + /** + * Initialize the store - create directories and load persisted state. + * If state is corrupted or missing, falls back to empty defaults. + */ + async initialize(): Promise { + if (this.initialized) { + return + } + + try { + await fs.mkdir(this.patternsDir, { recursive: true }) + await fs.mkdir(this.archiveDir, { recursive: true }) + await this.loadState() + this.initialized = true + } catch (error) { + this.logger.appendLine( + `[LearningStore] Initialization error: ${error instanceof Error ? error.message : String(error)}`, + ) + this.state = this.createEmptyState() + this.initialized = true + } + } + + /** + * Load state from disk with graceful degradation. + */ + private async loadState(): Promise { + const statePath = path.join(this.baseDir, STATE_FILE) + + try { + const raw = await fs.readFile(statePath, "utf-8") + const parsed = JSON.parse(raw) as Partial + + if (parsed && typeof parsed === "object" && parsed.version === 1) { + this.state = this.mergeWithDefaults(parsed) + await this.loadPatternFiles() + this.logger.appendLine( + `[LearningStore] Loaded state: ${this.state.patterns.length} patterns, ${this.state.recentEvents.length} events`, + ) + return + } + + this.logger.appendLine("[LearningStore] Invalid state version, using defaults") + } catch (error: unknown) { + const errorCode = typeof error === "object" && error !== null && "code" in error ? error.code : undefined + if (errorCode === "ENOENT") { + this.logger.appendLine("[LearningStore] No existing state, starting fresh") + } else { + this.logger.appendLine( + `[LearningStore] Corrupted state (${error instanceof Error ? error.message : String(error)}), using defaults`, + ) + } + } + + this.state = this.createEmptyState() + } + + private async loadPatternFiles(): Promise { + this.state.patterns = await this.hydratePatternSet(this.patternsDir, this.state.patterns) + this.state.archivedPatterns = (await this.hydratePatternSet(this.archiveDir, this.state.archivedPatterns)).map( + (pattern) => ({ + ...pattern, + state: "archived", + }), + ) + } + + private async hydratePatternSet( + directoryPath: string, + manifestPatterns: readonly LearnedPattern[], + ): Promise { + const hydratedPatterns: LearnedPattern[] = [] + + for (const manifestPattern of manifestPatterns) { + const persistedPattern = await this.readPatternFile(directoryPath, manifestPattern.id) + hydratedPatterns.push(persistedPattern ?? manifestPattern) + } + + return hydratedPatterns + } + + private async readPatternFile(directoryPath: string, patternId: string): Promise { + try { + const raw = await fs.readFile(path.join(directoryPath, `${patternId}.json`), "utf-8") + const parsed = JSON.parse(raw) as LearnedPattern + return parsed?.id === patternId ? parsed : null + } catch (error) { + const errorCode = typeof error === "object" && error !== null && "code" in error ? error.code : undefined + if (errorCode !== "ENOENT") { + this.logger.appendLine( + `[LearningStore] Failed to read pattern ${patternId}.json: ${error instanceof Error ? error.message : String(error)}`, + ) + } + return null + } + } + + /** + * Merge parsed state with defaults to handle schema evolution. + */ + private mergeWithDefaults(parsed: Partial): LearningState { + const config = { ...EMPTY_STATE.config, ...(parsed.config ?? {}) } + + return { + version: 1, + config, + counters: { + userTurnsSinceReview: parsed.counters?.userTurnsSinceReview ?? 0, + toolIterationsSinceReview: parsed.counters?.toolIterationsSinceReview ?? 0, + }, + patterns: Array.isArray(parsed.patterns) ? parsed.patterns : [], + archivedPatterns: Array.isArray(parsed.archivedPatterns) ? parsed.archivedPatterns : [], + recentEvents: Array.isArray(parsed.recentEvents) ? parsed.recentEvents.slice(-config.maxStoredEvents) : [], + pendingActions: Array.isArray(parsed.pendingActions) ? parsed.pendingActions : [], + telemetry: { + promptEnrichmentUses: parsed.telemetry?.promptEnrichmentUses ?? 0, + toolPreferenceUses: parsed.telemetry?.toolPreferenceUses ?? 0, + errorAvoidanceUses: parsed.telemetry?.errorAvoidanceUses ?? 0, + skillSuggestionCount: parsed.telemetry?.skillSuggestionCount ?? 0, + lastReviewAt: parsed.telemetry?.lastReviewAt, + lastCuratorRunAt: parsed.telemetry?.lastCuratorRunAt, + }, + } + } + + /** + * Persist the full state to disk with state.json committed last. + */ + async persist(): Promise { + if (!this.initialized) { + return + } + + try { + this.enforceBounds() + + await this.persistPatternFiles(this.patternsDir, this.state.patterns) + await this.persistPatternFiles(this.archiveDir, this.state.archivedPatterns) + await this.writePatternIndex() + await safeWriteJson(path.join(this.baseDir, STATE_FILE), this.state, { prettyPrint: true }) + } catch (error) { + this.logger.appendLine( + `[LearningStore] Persist error: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + + private async persistPatternFiles(directoryPath: string, patterns: readonly LearnedPattern[]): Promise { + const expectedNames = new Set(patterns.map((pattern) => `${pattern.id}.json`)) + + await Promise.all( + patterns.map((pattern) => + safeWriteJson(path.join(directoryPath, `${pattern.id}.json`), pattern, { prettyPrint: true }), + ), + ) + + try { + const existingEntries = await fs.readdir(directoryPath, { withFileTypes: true }) + await Promise.all( + existingEntries + .filter( + (entry) => + entry.isFile() && + entry.name.endsWith(".json") && + entry.name !== PATTERN_INDEX_FILE && + !expectedNames.has(entry.name), + ) + .map((entry) => fs.rm(path.join(directoryPath, entry.name), { force: true })), + ) + } catch (error) { + this.logger.appendLine( + `[LearningStore] Pattern cleanup error: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + + private async writePatternIndex(): Promise { + await safeWriteJson( + path.join(this.patternsDir, PATTERN_INDEX_FILE), + { + version: 1, + updatedAt: Date.now(), + activePatternIds: this.state.patterns.map((pattern) => pattern.id), + archivedPatternIds: this.state.archivedPatterns.map((pattern) => pattern.id), + }, + { prettyPrint: true }, + ) + } + + /** + * Enforce storage bounds (max patterns, max events). + */ + private enforceBounds(): void { + const maxPatterns = this.state.config.maxStoredPatterns + const maxEvents = this.state.config.maxStoredEvents + + if (this.state.patterns.length > maxPatterns) { + this.state.patterns = [...this.state.patterns] + .sort((a, b) => a.confidenceScore - b.confidenceScore) + .slice(-maxPatterns) + } + + if (this.state.archivedPatterns.length > maxPatterns) { + this.state.archivedPatterns = [...this.state.archivedPatterns] + .sort((a, b) => a.lastSeenAt - b.lastSeenAt) + .slice(-maxPatterns) + } + + if (this.state.recentEvents.length > maxEvents) { + this.state.recentEvents = this.state.recentEvents.slice(-maxEvents) + } + } + + // ──── Getters ──── + + getState(): Readonly { + return this.state + } + + getConfig(): Readonly { + return this.state.config + } + + getPatterns(): readonly LearnedPattern[] { + return this.state.patterns + } + + getArchivedPatterns(): readonly LearnedPattern[] { + return this.state.archivedPatterns + } + + getRecentEvents(): readonly LearningEvent[] { + return this.state.recentEvents + } + + getPendingActions(): readonly ImprovementAction[] { + return this.state.pendingActions + } + + getTelemetry(): Readonly { + return this.state.telemetry + } + + getCounters(): Readonly { + return this.state.counters + } + + // ──── Mutations ──── + + setConfig(config: Partial): void { + this.state.config = { ...this.state.config, ...config } + } + + addEvent(event: LearningEvent): void { + this.state.recentEvents.push(event) + } + + addPattern(pattern: LearnedPattern): void { + const existing = this.state.patterns.findIndex((candidate) => candidate.id === pattern.id) + if (existing >= 0) { + this.state.patterns[existing] = pattern + return + } + + this.state.patterns.push(pattern) + } + + updatePattern(id: string, updates: Partial): void { + const index = this.state.patterns.findIndex((pattern) => pattern.id === id) + if (index >= 0) { + this.state.patterns[index] = { ...this.state.patterns[index], ...updates } + } + } + + removePattern(id: string): void { + this.state.patterns = this.state.patterns.filter((pattern) => pattern.id !== id) + } + + archivePattern(id: string): void { + const index = this.state.patterns.findIndex((pattern) => pattern.id === id) + if (index >= 0) { + const pattern = { ...this.state.patterns[index], state: "archived" as const } + this.state.archivedPatterns.push(pattern) + this.state.patterns.splice(index, 1) + } + } + + addAction(action: ImprovementAction): void { + this.state.pendingActions.push(action) + } + + removeAction(id: string): void { + this.state.pendingActions = this.state.pendingActions.filter((action) => action.id !== id) + } + + incrementUserTurns(): void { + this.state.counters.userTurnsSinceReview++ + } + + incrementToolIterations(delta = 1): void { + this.state.counters.toolIterationsSinceReview += delta + } + + resetCounters(): void { + this.state.counters.userTurnsSinceReview = 0 + this.state.counters.toolIterationsSinceReview = 0 + } + + updateTelemetry(updates: Partial): void { + this.state.telemetry = { ...this.state.telemetry, ...updates } + } + + /** + * Reset all learning state to defaults. + */ + async reset(): Promise { + this.state = this.createEmptyState() + await this.persist() + } + + private createEmptyState(): LearningState { + return { + ...EMPTY_STATE, + config: { ...EMPTY_STATE.config }, + counters: { ...EMPTY_STATE.counters }, + patterns: [], + archivedPatterns: [], + recentEvents: [], + pendingActions: [], + telemetry: { ...EMPTY_STATE.telemetry }, + } + } + + /** + * Search for similar patterns using vector search. + * Uses CodeIndexManager to find semantically similar patterns for dedup/retrieval. + * Gated behind selfImprovingCodeIndex experiment flag. + */ + async searchSimilarPatterns(query: string): Promise { + const experiments = this.getExperiments?.() + if (experiments?.selfImprovingCodeIndex === false) { + return [] + } + + if (!this.codeIndexManager) { + return [] + } + + try { + const results = await this.codeIndexManager.searchIndex(query) + if (!results || results.length === 0) { + return [] + } + + // Map vector search results to LearnedPattern instances + const patterns: LearnedPattern[] = [] + for (const result of results) { + const payload = result.payload + if (!payload?.codeChunk) { + continue + } + + const filePath = payload.filePath ?? "unknown" + const startLine = payload.startLine ?? 0 + const endLine = payload.endLine ?? 0 + + patterns.push({ + id: crypto.randomUUID(), + patternType: "code-index", + state: "active", + summary: `[${filePath}:${startLine}-${endLine}] ${payload.codeChunk.slice(0, 180)}`, + confidenceScore: Math.min(1, result.score), + frequency: 1, + successRate: 0.5, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: ["CODE_INDEX_HIT"], + context: { + toolNames: [], + errorKeys: [], + }, + }) + } + + return patterns + } catch (error) { + this.logger.appendLine( + `[LearningStore] searchSimilarPatterns error: ${error instanceof Error ? error.message : String(error)}`, + ) + return [] + } + } + + /** + * Generate a unique ID for patterns, events, and actions. + */ + static generateId(): string { + return crypto.randomUUID() + } +} diff --git a/src/services/self-improving/MemoryBackend.ts b/src/services/self-improving/MemoryBackend.ts new file mode 100644 index 0000000000..44a47e67ed --- /dev/null +++ b/src/services/self-improving/MemoryBackend.ts @@ -0,0 +1,42 @@ +import type { MemoryEntry } from "@roo-code/types" + +/** + * MemoryBackend — abstract interface for memory storage backends. + * + * Both the built-in MemoryStore and the optional agentmemory adapter + * implement this interface, allowing the SelfImprovingManager to + * switch between backends transparently. + */ +export interface MemoryBackend { + /** Initialize the backend */ + initialize(): Promise + + /** Store a memory entry */ + store(entry: Omit): Promise + + /** Search memory entries by query */ + search(query: string, maxResults?: number): Promise + + /** Recall recent memory entries */ + recall(maxResults?: number): Promise + + /** Remove a memory entry by ID */ + forget(id: string): Promise + + /** Remove entries matching a substring */ + forgetByContent(substring: string): Promise + + /** Get backend statistics */ + getStats(): Promise<{ entryCount: number; backend: string }> + + /** Clear all entries */ + clear(): Promise + + /** Dispose the backend */ + dispose(): Promise +} + +/** + * MemoryBackendType — supported backend implementations + */ +export type MemoryBackendType = "builtin" | "agentmemory" diff --git a/src/services/self-improving/MemoryBackendFactory.ts b/src/services/self-improving/MemoryBackendFactory.ts new file mode 100644 index 0000000000..b2e04923ba --- /dev/null +++ b/src/services/self-improving/MemoryBackendFactory.ts @@ -0,0 +1,32 @@ +import { AgentMemoryAdapter } from "./AgentMemoryAdapter" +import type { MemoryBackend, MemoryBackendType } from "./MemoryBackend" +import { MemoryStore } from "./MemoryStore" +import type { Logger } from "./types" + +/** + * MemoryBackendFactory — creates the appropriate memory backend + * based on configuration. + * + * Supports: + * - "builtin" (default): Zoo-Code's own MemoryStore + * - "agentmemory": agentmemory REST API adapter + */ +export class MemoryBackendFactory { + /** + * Create a memory backend. + * + * @param type - Backend type ("builtin" | "agentmemory") + * @param baseDir - Base directory for built-in storage + * @param logger - Logger instance + * @param agentMemoryUrl - Optional agentmemory server URL + */ + static create(type: MemoryBackendType, baseDir: string, logger: Logger, agentMemoryUrl?: string): MemoryBackend { + switch (type) { + case "agentmemory": + return new AgentMemoryAdapter(logger, agentMemoryUrl) + case "builtin": + default: + return new MemoryStore(baseDir, logger) + } + } +} diff --git a/src/services/self-improving/MemoryStore.ts b/src/services/self-improving/MemoryStore.ts new file mode 100644 index 0000000000..58f62b2e92 --- /dev/null +++ b/src/services/self-improving/MemoryStore.ts @@ -0,0 +1,605 @@ +import * as fs from "fs/promises" +import * as path from "path" +import crypto from "crypto" + +import { safeWriteJson } from "../../utils/safeWriteJson" +import type { MemoryContext, MemoryEntry } from "@roo-code/types" +import type { MemoryBackend, MemoryBackendType } from "./MemoryBackend" +import type { Logger } from "./types" + +/** + * Store type for memory categorization. + * Mirrors Hermes' MEMORY.md (environment) vs USER.md (user profile) split. + */ +export type MemoryStoreType = "environment" | "userProfile" + +const MEMORY_SOURCES: ReadonlySet = new Set(["learning", "user", "system", "review"]) + +/** + * MemoryStore - real bounded memory subsystem. + * + * Implements Hermes' dual-store approach: + * - environment: durable operational facts, project knowledge, learned patterns + * - userProfile: user preferences, corrections, style feedback + * + * Key design: + * - Frozen snapshot at session start (prompt stability) + * - Live writes go to disk but NOT to active snapshot + * - Exact-duplicate rejection on load + * - Substring-based replace and remove + * - Bounded retention per store + */ +export class MemoryStore implements MemoryBackend { + private readonly baseDir: string + private readonly logger: Logger + private environment: MemoryEntry[] = [] + private userProfile: MemoryEntry[] = [] + private environmentSnapshot: MemoryEntry[] = [] + private userProfileSnapshot: MemoryEntry[] = [] + private revision = 0 + private initialized = false + + private static readonly MAX_ENVIRONMENT_ENTRIES = 50 + private static readonly MAX_USER_PROFILE_ENTRIES = 20 + private static readonly MAX_ENTRY_LENGTH = 2000 + private static readonly MAX_SNAPSHOT_ENVIRONMENT_ENTRIES = 5 + private static readonly MAX_SNAPSHOT_USER_PROFILE_ENTRIES = 5 + + constructor(baseDir: string, logger: Logger) { + this.baseDir = path.join(baseDir, "self-improving", "memory") + this.logger = logger + } + + get backendType(): MemoryBackendType { + return "builtin" + } + + /** + * Initialize the memory store - load persisted entries from disk. + */ + async initialize(): Promise { + if (this.initialized) { + return + } + + try { + await fs.mkdir(this.baseDir, { recursive: true }) + await this.loadFromDisk() + this.logger.appendLine( + `[MemoryStore] Initialized: ${this.environment.length} environment, ${this.userProfile.length} user profile entries`, + ) + } catch (error) { + this.logger.appendLine( + `[MemoryStore] Initialization error: ${error instanceof Error ? error.message : String(error)}`, + ) + } finally { + this.initialized = true + } + } + + async store(entry: Omit): Promise { + return this.addEnvironmentEntry(entry.content, { + source: entry.source, + tags: entry.tags, + expiresAt: entry.expiresAt, + }) + } + + async search(query: string, maxResults: number = 10): Promise { + await this.ensureInitialized() + + const lowerQuery = query.toLowerCase() + const allEntries = [...this.environment, ...this.userProfile] + return allEntries + .filter((entry) => entry.content.toLowerCase().includes(lowerQuery)) + .slice(0, maxResults) + .map((entry) => this.cloneEntry(entry)) + } + + async recall(maxResults: number = 20): Promise { + await this.ensureInitialized() + + return [...this.environment, ...this.userProfile] + .sort((left, right) => { + const leftTimestamp = left.updatedAt ?? left.createdAt + const rightTimestamp = right.updatedAt ?? right.createdAt + return rightTimestamp - leftTimestamp + }) + .slice(0, maxResults) + .map((entry) => this.cloneEntry(entry)) + } + + async forget(id: string): Promise { + await this.ensureInitialized() + + const envIdx = this.environment.findIndex((entry) => entry.id === id) + if (envIdx >= 0) { + this.environment.splice(envIdx, 1) + await this.persistStore("environment") + return true + } + + const userIdx = this.userProfile.findIndex((entry) => entry.id === id) + if (userIdx >= 0) { + this.userProfile.splice(userIdx, 1) + await this.persistStore("userProfile") + return true + } + + return false + } + + async forgetByContent(substring: string): Promise { + await this.ensureInitialized() + + const lowerSubstring = substring.trim().toLowerCase() + if (!lowerSubstring) { + return 0 + } + + let removed = 0 + + const envBefore = this.environment.length + this.environment = this.environment.filter((entry) => !entry.content.toLowerCase().includes(lowerSubstring)) + removed += envBefore - this.environment.length + + const userBefore = this.userProfile.length + this.userProfile = this.userProfile.filter((entry) => !entry.content.toLowerCase().includes(lowerSubstring)) + removed += userBefore - this.userProfile.length + + if (removed > 0) { + await this.persistStore("environment") + await this.persistStore("userProfile") + } + + return removed + } + + /** + * Load entries from disk with duplicate rejection. + */ + private async loadFromDisk(): Promise { + this.environment = await this.loadStoreFile("environment") + this.userProfile = await this.loadStoreFile("userProfile") + this.takeSnapshot() + } + + /** + * Load a single store file with validation and dedup. + */ + private async loadStoreFile(type: MemoryStoreType): Promise { + try { + const raw = await fs.readFile(this.getFilePath(type), "utf-8") + const parsed = JSON.parse(raw) + + if (!Array.isArray(parsed)) { + return [] + } + + const seen = new Set() + const deduped: MemoryEntry[] = [] + + for (const candidate of parsed) { + const entry = this.sanitizePersistedEntry(candidate) + if (!entry) { + continue + } + + const contentKey = this.normalizeContent(entry.content) + if (seen.has(contentKey)) { + continue + } + + seen.add(contentKey) + deduped.push(entry) + } + + return this.enforceBounds(type, deduped) + } catch (error: unknown) { + const errorCode = typeof error === "object" && error !== null && "code" in error ? error.code : undefined + if (errorCode !== "ENOENT") { + this.logger.appendLine( + `[MemoryStore] Load error for ${this.getFilePath(type)}: ${error instanceof Error ? error.message : String(error)}`, + ) + } + + return [] + } + } + + /** + * Take a frozen snapshot of current memory for prompt injection. + * Live writes update the working store but NOT the snapshot. + */ + takeSnapshot(): void { + this.environmentSnapshot = this.environment.map((entry) => this.cloneEntry(entry)) + this.userProfileSnapshot = this.userProfile.map((entry) => this.cloneEntry(entry)) + this.revision += 1 + } + + /** + * Get the frozen snapshot context for prompt injection. + */ + getSnapshotContext(): MemoryContext { + return { + entries: this.buildSnapshotEntries(), + revision: this.revision, + generatedAt: Date.now(), + } + } + + /** + * Get snapshot as formatted string for prompt injection. + */ + getSnapshotString(): string { + const context = this.getSnapshotContext() + if (context.entries.length === 0) return "" + + const lines = context.entries.map((entry) => { + // Sanitize: single line, strip control characters, no markdown headings + const sanitized = entry.content + .split("") + .filter((c) => { + const code = c.charCodeAt(0) + return code >= 32 || code === 9 || code === 10 || code === 13 + }) + .join("") + .replace(/\n/g, " ") + .replace(/^#+\s*/gm, "") + .trim() + const tags = entry.tags?.length ? ` [${entry.tags.join(", ")}]` : "" + return `- ${sanitized}${tags}` + }) + + return `\n## Learned Context\n${lines.join("\n")}\n` + } + + // ──── Environment store operations ──── + + /** + * Add an entry to the environment store. + * Rejects exact duplicates. Persists to disk but does NOT update the snapshot. + */ + async addEnvironmentEntry( + content: string, + options?: { + source?: MemoryEntry["source"] + tags?: string[] + expiresAt?: number + }, + ): Promise { + return this.addEntry("environment", content, options) + } + + /** + * Replace entries in the environment store that contain a substring. + * If no match is found, adds as new entry. + */ + async replaceEnvironmentEntry( + substring: string, + newContent: string, + options?: { + source?: MemoryEntry["source"] + tags?: string[] + }, + ): Promise { + return this.replaceEntry("environment", substring, newContent, options) + } + + /** + * Remove entries from the environment store that contain a substring. + */ + async removeEnvironmentEntry(substring: string): Promise { + return this.removeEntry("environment", substring) + } + + // ──── User profile store operations ──── + + /** + * Add an entry to the user profile store. + */ + async addUserProfileEntry( + content: string, + options?: { + source?: MemoryEntry["source"] + tags?: string[] + expiresAt?: number + }, + ): Promise { + return this.addEntry("userProfile", content, options) + } + + /** + * Replace entries in the user profile store that contain a substring. + */ + async replaceUserProfileEntry( + substring: string, + newContent: string, + options?: { + source?: MemoryEntry["source"] + tags?: string[] + }, + ): Promise { + return this.replaceEntry("userProfile", substring, newContent, options) + } + + /** + * Remove entries from the user profile store that contain a substring. + */ + async removeUserProfileEntry(substring: string): Promise { + return this.removeEntry("userProfile", substring) + } + + // ──── Generic store operations ──── + + private getStore(type: MemoryStoreType): MemoryEntry[] { + return type === "environment" ? this.environment : this.userProfile + } + + private setStore(type: MemoryStoreType, entries: MemoryEntry[]): void { + if (type === "environment") { + this.environment = entries + return + } + + this.userProfile = entries + } + + private getMaxEntries(type: MemoryStoreType): number { + return type === "environment" ? MemoryStore.MAX_ENVIRONMENT_ENTRIES : MemoryStore.MAX_USER_PROFILE_ENTRIES + } + + private getFilePath(type: MemoryStoreType): string { + return path.join(this.baseDir, type === "environment" ? "environment.json" : "user-profile.json") + } + + private async ensureInitialized(): Promise { + if (!this.initialized) { + await this.initialize() + } + } + + private async addEntry( + type: MemoryStoreType, + content: string, + options?: { + source?: MemoryEntry["source"] + tags?: string[] + expiresAt?: number + }, + ): Promise { + await this.ensureInitialized() + + const trimmed = content.trim() + if (!trimmed || trimmed.length > MemoryStore.MAX_ENTRY_LENGTH) { + return null + } + + const normalized = this.normalizeContent(trimmed) + const store = this.getStore(type) + if (store.some((entry) => this.normalizeContent(entry.content) === normalized)) { + return null + } + + const now = Date.now() + const entry: MemoryEntry = { + id: crypto.randomUUID(), + content: trimmed, + source: this.normalizeSource(options?.source), + createdAt: now, + updatedAt: now, + relevanceScore: 1, + tags: this.normalizeTags(options?.tags), + expiresAt: typeof options?.expiresAt === "number" ? options.expiresAt : undefined, + } + + this.setStore(type, this.enforceBounds(type, [...store, entry])) + await this.persistStore(type) + + return this.cloneEntry(entry) + } + + private async replaceEntry( + type: MemoryStoreType, + substring: string, + newContent: string, + options?: { + source?: MemoryEntry["source"] + tags?: string[] + }, + ): Promise { + await this.ensureInitialized() + + const trimmedContent = newContent.trim() + if (!trimmedContent || trimmedContent.length > MemoryStore.MAX_ENTRY_LENGTH) { + throw new Error("Replacement memory content must be non-empty and within bounds") + } + + const normalizedSubstring = substring.trim().toLowerCase() + const store = this.getStore(type) + const remaining = + normalizedSubstring.length > 0 + ? store.filter((entry) => !entry.content.toLowerCase().includes(normalizedSubstring)) + : [...store] + + const duplicate = remaining.find( + (entry) => this.normalizeContent(entry.content) === this.normalizeContent(trimmedContent), + ) + + if (duplicate) { + this.setStore(type, this.enforceBounds(type, remaining)) + if (remaining.length !== store.length) { + await this.persistStore(type) + } + + return this.cloneEntry(duplicate) + } + + const now = Date.now() + const entry: MemoryEntry = { + id: crypto.randomUUID(), + content: trimmedContent, + source: this.normalizeSource(options?.source), + createdAt: now, + updatedAt: now, + relevanceScore: 1, + tags: this.normalizeTags(options?.tags), + } + + this.setStore(type, this.enforceBounds(type, [...remaining, entry])) + await this.persistStore(type) + + return this.cloneEntry(entry) + } + + private async removeEntry(type: MemoryStoreType, substring: string): Promise { + await this.ensureInitialized() + + const normalizedSubstring = substring.trim().toLowerCase() + if (!normalizedSubstring) { + return false + } + + const store = this.getStore(type) + const remaining = store.filter((entry) => !entry.content.toLowerCase().includes(normalizedSubstring)) + const removed = remaining.length !== store.length + + if (!removed) { + return false + } + + this.setStore(type, remaining) + await this.persistStore(type) + + return true + } + + private async persistStore(type: MemoryStoreType): Promise { + try { + await safeWriteJson(this.getFilePath(type), this.getStore(type), { prettyPrint: true }) + } catch (error) { + this.logger.appendLine( + `[MemoryStore] Persist error for ${type}: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + + private sanitizePersistedEntry(value: unknown): MemoryEntry | null { + if (!value || typeof value !== "object") { + return null + } + + const candidate = value as Partial + const trimmedContent = typeof candidate.content === "string" ? candidate.content.trim() : "" + if (!trimmedContent) { + return null + } + + const now = Date.now() + const createdAt = typeof candidate.createdAt === "number" ? candidate.createdAt : now + const updatedAt = typeof candidate.updatedAt === "number" ? candidate.updatedAt : createdAt + + return { + id: typeof candidate.id === "string" && candidate.id.trim().length > 0 ? candidate.id : crypto.randomUUID(), + content: trimmedContent.slice(0, MemoryStore.MAX_ENTRY_LENGTH), + source: this.normalizeSource(candidate.source), + createdAt, + updatedAt, + relevanceScore: + typeof candidate.relevanceScore === "number" + ? Math.min(1, Math.max(0, candidate.relevanceScore)) + : undefined, + tags: this.normalizeTags(candidate.tags), + expiresAt: typeof candidate.expiresAt === "number" ? candidate.expiresAt : undefined, + } + } + + private buildSnapshotEntries(): MemoryEntry[] { + const environmentEntries = this.environmentSnapshot + .slice(-MemoryStore.MAX_SNAPSHOT_ENVIRONMENT_ENTRIES) + .map((entry) => this.cloneEntry(entry)) + const userProfileEntries = this.userProfileSnapshot + .slice(-MemoryStore.MAX_SNAPSHOT_USER_PROFILE_ENTRIES) + .map((entry) => this.cloneEntry(entry)) + + return [...environmentEntries, ...userProfileEntries] + } + + private enforceBounds(type: MemoryStoreType, entries: MemoryEntry[]): MemoryEntry[] { + const maxEntries = this.getMaxEntries(type) + if (entries.length <= maxEntries) { + return entries + } + + return [...entries].sort((left, right) => left.createdAt - right.createdAt).slice(-maxEntries) + } + + private normalizeContent(content: string): string { + return content.trim().toLowerCase() + } + + private normalizeSource(source: MemoryEntry["source"] | undefined): MemoryEntry["source"] { + return source && MEMORY_SOURCES.has(source) ? source : "learning" + } + + private normalizeTags(tags: string[] | undefined): string[] | undefined { + if (!Array.isArray(tags)) { + return undefined + } + + const normalized = Array.from(new Set(tags.map((tag) => tag.trim()).filter((tag) => tag.length > 0))) + + return normalized.length > 0 ? normalized : undefined + } + + private cloneEntry(entry: MemoryEntry): MemoryEntry { + return { + ...entry, + tags: entry.tags ? [...entry.tags] : undefined, + } + } + + /** + * Get count of entries per store. + */ + async getStats(): Promise<{ entryCount: number; backend: string }> { + await this.ensureInitialized() + + return { + entryCount: this.environment.length + this.userProfile.length, + backend: "builtin", + } + } + + async clear(): Promise { + await this.reset() + } + + /** + * Reset all memory stores. + */ + async reset(): Promise { + await this.ensureInitialized() + + this.environment = [] + this.userProfile = [] + this.environmentSnapshot = [] + this.userProfileSnapshot = [] + this.revision = 0 + + try { + await Promise.all([ + safeWriteJson(this.getFilePath("environment"), [], { prettyPrint: true }), + safeWriteJson(this.getFilePath("userProfile"), [], { prettyPrint: true }), + ]) + } catch (error) { + this.logger.appendLine( + `[MemoryStore] Reset error: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + + async dispose(): Promise { + this.initialized = false + } +} diff --git a/src/services/self-improving/ModeFactoryService.ts b/src/services/self-improving/ModeFactoryService.ts new file mode 100644 index 0000000000..51648cd0c5 --- /dev/null +++ b/src/services/self-improving/ModeFactoryService.ts @@ -0,0 +1,296 @@ +import type { GroupEntry, ModeConfig } from "@roo-code/types" +import type { LearnedPattern } from "./types" +import type { Logger } from "./types" + +/** + * Minimum confidence for a pattern to be re-created as a mode on hot-reload. + */ +const MIN_RECREATE_CONFIDENCE = 0.3 + +/** + * Minimum frequency for a pattern to be re-created as a mode on hot-reload. + */ +const MIN_RECREATE_FREQUENCY = 2 + +/** + * Maps tool names to appropriate tool groups. + * Heuristic mapping based on common tool categories. + */ +const TOOL_TO_GROUP_MAP: Record = { + read_file: "read", + write_to_file: "edit", + apply_diff: "edit", + search_files: "read", + list_files: "read", + execute_command: "command", + use_mcp_tool: "mcp", + access_mcp_resource: "mcp", + ask_followup_question: "read", + attempt_completion: "read", + switch_mode: "modes", + new_task: "modes", + codebase_search: "read", + update_todo_list: "edit", + run_slash_command: "command", + skill: "command", + generate_image: "mcp", + custom_tool: "mcp", + read: "read", + write: "edit", + edit: "edit", + search: "read", + list: "read", + command: "command", + mcp: "mcp", + ask: "read", + complete: "read", +} + +type ToolGroup = "read" | "edit" | "command" | "mcp" | "modes" + +export class ModeFactoryService { + private logger: Logger + private customModesManager: { updateCustomMode(slug: string, config: ModeConfig): Promise } | null = null + private getPatterns: (() => LearnedPattern[]) | null = null + private pendingRecreateCalls: Array<() => void> = [] + + /** Re-entrancy guard: non-null while _recreateModes() is in flight */ + private recreatePromise: Promise | null = null + /** Debounce timer for collapsing rapid successive recreateModes() calls */ + private recreateTimer: ReturnType | null = null + private readonly RECREATE_DEBOUNCE_MS = 500 + + constructor(logger: Logger) { + this.logger = logger + } + + setCustomModesManager(manager: { updateCustomMode(slug: string, config: ModeConfig): Promise }): void { + this.customModesManager = manager + } + + /** + * Register a callback to retrieve patterns from the learning store. + * Required for hot-reload mode recreation. + * Flushes any queued recreateModes() calls that arrived before the provider was set. + */ + setPatternProvider(provider: () => LearnedPattern[]): void { + this.getPatterns = provider + const pending = [...this.pendingRecreateCalls] + this.pendingRecreateCalls = [] + for (const call of pending) { + call() + } + } + + /** + * Re-create modes from current patterns. + * Called when .roomodes changes to re-apply auto-created modes + * that may have been overwritten by the reload. + * If the pattern provider is not yet set, queues the call for retry. + */ + /** + * Re-create modes from current patterns. + * Called when .roomodes changes to re-apply auto-created modes + * that may have been overwritten by the reload. + * If the pattern provider is not yet set, queues the call for retry. + * + * Debounces rapid successive calls and guards against re-entrancy + * to prevent infinite recreation loops when writing to .roomodes + * triggers the file watcher. + */ + async recreateModes(): Promise { + // Debounce: collapse rapid successive calls + if (this.recreateTimer) { + clearTimeout(this.recreateTimer) + } + + return new Promise((resolve) => { + this.recreateTimer = setTimeout(async () => { + // Re-entrancy guard: if already recreating, return existing promise + if (this.recreatePromise) { + const result = await this.recreatePromise + resolve(result) + return + } + + this.recreatePromise = this._recreateModes() + const result = await this.recreatePromise + this.recreatePromise = null + resolve(result) + }, this.RECREATE_DEBOUNCE_MS) + }) + } + + /** + * Internal implementation of recreateModes — debounced and guarded + * against re-entrancy by the public wrapper. + */ + private async _recreateModes(): Promise { + if (!this.getPatterns) { + this.logger.appendLine("[ModeFactory] Cannot recreate modes: pattern provider not set, queuing for retry") + return new Promise((resolve) => { + this.pendingRecreateCalls.push(() => { + this.recreateModes().then(resolve) + }) + }) + } + + const allPatterns = this.getPatterns() + const candidates = allPatterns.filter((p) => { + if (!p.context?.toolNames || p.context.toolNames.length === 0) return false + if ((p.confidenceScore ?? 0) < MIN_RECREATE_CONFIDENCE) return false + if ((p.frequency ?? 0) < MIN_RECREATE_FREQUENCY) return false + return true + }) + + if (candidates.length === 0) { + this.logger.appendLine("[ModeFactory] No candidate patterns for mode recreation") + return [] + } + + this.logger.appendLine(`[ModeFactory] Recreating ${candidates.length} modes from patterns`) + return this.createModesFromPatterns(candidates) + } + + /** + * Derive a custom mode config from a learned pattern. + * Returns null if the pattern doesn't have enough data to create a meaningful mode. + */ + deriveModeFromPattern(pattern: LearnedPattern): ModeConfig | null { + const toolNames = pattern.context?.toolNames + if (!toolNames || toolNames.length === 0) { + this.logger.appendLine(`[ModeFactory] Pattern ${pattern.id} has no tool names, skipping mode creation`) + return null + } + + const slug = this.generateSlug(pattern) + if (!slug) return null + + const name = this.generateName(pattern) + const roleDefinition = this.generateRoleDefinition(pattern) + const groups = this.deriveGroups(toolNames) + + if (groups.length === 0) { + this.logger.appendLine( + `[ModeFactory] Pattern ${pattern.id} has no valid tool groups, skipping mode creation`, + ) + return null + } + + return { + slug, + name, + roleDefinition, + groups, + source: "project", + } + } + + /** + * Create or update a custom mode from a pattern via CustomModesManager. + */ + async createModeFromPattern(pattern: LearnedPattern): Promise { + if (!this.customModesManager) { + this.logger.appendLine("[ModeFactory] CustomModesManager not set, cannot create mode") + return null + } + + const config = this.deriveModeFromPattern(pattern) + if (!config) return null + + try { + await this.customModesManager.updateCustomMode(config.slug, config) + this.logger.appendLine(`[ModeFactory] Created/updated custom mode: ${config.slug} (${config.name})`) + return config.slug + } catch (error) { + this.logger.appendLine( + `[ModeFactory] Failed to create mode: ${error instanceof Error ? error.message : String(error)}`, + ) + return null + } + } + + /** + * Batch create modes from multiple patterns. + * Returns array of successfully created mode slugs. + */ + async createModesFromPatterns(patterns: LearnedPattern[]): Promise { + const created: string[] = [] + for (const pattern of patterns) { + const slug = await this.createModeFromPattern(pattern) + if (slug) created.push(slug) + } + return created + } + + private generateSlug(pattern: LearnedPattern): string | null { + const toolNames = pattern.context?.toolNames + if (!toolNames || toolNames.length === 0) return null + + const base = toolNames.slice(0, 2).join("-") + const sanitized = base.replace(/[^a-zA-Z0-9-]/g, "-").toLowerCase() + const truncated = sanitized.slice(0, 64) + return truncated.replace(/^-+|-+$/g, "") || "auto-mode" + } + + private generateName(pattern: LearnedPattern): string { + const toolNames = pattern.context?.toolNames + if (toolNames && toolNames.length > 0) { + const toolLabels = toolNames.map((t) => t.replace(/_/g, " ").replace(/\b\w/g, (c) => c.toUpperCase())) + return `${toolLabels.slice(0, 3).join(" + ")} Specialist` + } + return `Auto Mode (${pattern.patternType})` + } + + private generateRoleDefinition(pattern: LearnedPattern): string { + const toolNames = pattern.context?.toolNames || [] + const errorKeys = pattern.context?.errorKeys || [] + const modes = pattern.context?.modes || [] + const successRate = pattern.successRate ?? 0.5 + const confidence = pattern.confidenceScore ?? 0.5 + + let role = `You are a specialized mode optimized for:\n\n` + + if (toolNames.length > 0) { + role += `## Tools\n` + role += `You specialize in using: ${toolNames.join(", ")}\n\n` + } + + if (pattern.patternType === "error" && errorKeys.length > 0) { + role += `## Error Avoidance\n` + role += `You are designed to avoid these known error patterns: ${errorKeys.join(", ")}\n\n` + } + + if (modes.length > 0) { + role += `## Context\n` + role += `This mode was learned from work in: ${modes.join(", ")}\n\n` + } + + role += `## Performance\n` + role += `- Success rate: ${(successRate * 100).toFixed(0)}%\n` + role += `- Confidence score: ${(confidence * 100).toFixed(0)}%\n` + role += `- Frequency: ${pattern.frequency} occurrences\n\n` + + role += `## Instructions\n` + role += `Follow the established patterns and best practices that led to the creation of this mode. ` + + if (pattern.summary) { + role += `\n\nPattern summary: ${pattern.summary}` + } + + return role + } + + private deriveGroups(toolNames: string[]): GroupEntry[] { + const groupSet = new Set() + + for (const toolName of toolNames) { + const group = TOOL_TO_GROUP_MAP[toolName] + if (group) { + groupSet.add(group) + } + } + + return Array.from(groupSet) + } +} diff --git a/src/services/self-improving/PatternAnalyzer.ts b/src/services/self-improving/PatternAnalyzer.ts new file mode 100644 index 0000000000..e7c6200934 --- /dev/null +++ b/src/services/self-improving/PatternAnalyzer.ts @@ -0,0 +1,559 @@ +import crypto from "crypto" + +import type { Experiments, LearnedPattern, LearningEvent } from "./types" +import type { CodeIndexManager } from "../code-index/manager" + +interface PatternAnalyzerOptions { + getExperiments?: () => Experiments | undefined +} + +/** + * PatternAnalyzer - extracts learned patterns from event streams + * using deterministic heuristics (frequency analysis, correction tracking, + * success correlation). + * + * Adapted from Hermes' symbolic pattern extraction approach. + */ +export class PatternAnalyzer { + private readonly getExperiments: () => Experiments | undefined + private codeIndexManager: CodeIndexManager | undefined + + constructor(options: PatternAnalyzerOptions = {}) { + this.getExperiments = options.getExperiments ?? (() => undefined) + } + + /** + * Set the CodeIndexManager instance for vector-search-based pattern retrieval. + */ + setCodeIndexManager(manager: CodeIndexManager | undefined): void { + this.codeIndexManager = manager + } + + /** + * Analyze a batch of events and return new/updated patterns. + * This is the main entry point called during review cycles. + */ + async analyze(events: LearningEvent[], existingPatterns: LearnedPattern[]): Promise { + const patterns: LearnedPattern[] = [] + const now = Date.now() + + const correctionPatterns = this.extractCorrectionPatterns(events, existingPatterns, now) + patterns.push(...correctionPatterns) + + const successPatterns = this.extractSuccessPatterns(events, existingPatterns, now) + patterns.push(...successPatterns) + + const toolPatterns = this.extractToolPatterns(events, existingPatterns, now) + patterns.push(...toolPatterns) + + const codeIndexPatterns = await this.extractCodeIndexPatterns(events, existingPatterns, now) + patterns.push(...codeIndexPatterns) + + const experiments = this.getExperiments() + if (experiments?.selfImprovingPromptQuality !== false) { + const promptQualityPatterns = this.extractPromptQualityPatterns(events, existingPatterns, now) + patterns.push(...promptQualityPatterns) + } + + if (experiments?.selfImprovingPromptQuality !== false) { + const patternRepeatPatterns = this.extractPatternRepeatPatterns(events, existingPatterns, now) + patterns.push(...patternRepeatPatterns) + } + + return patterns + } + + /** + * Extract error-avoidance patterns from correction/failure events. + */ + private extractCorrectionPatterns( + events: LearningEvent[], + existingPatterns: LearnedPattern[], + now: number, + ): LearnedPattern[] { + const patterns: LearnedPattern[] = [] + const correctionEvents = events.filter( + (event) => + event.signal === "USER_CORRECTION" || (event.signal === "TASK_FAILURE" && event.outcome.corrected), + ) + + const byErrorKey = new Map() + for (const event of correctionEvents) { + const key = event.context.errorKey || "unknown" + const bucket = byErrorKey.get(key) ?? [] + bucket.push(event) + byErrorKey.set(key, bucket) + } + + for (const [errorKey, errorEvents] of byErrorKey) { + const frequency = errorEvents.length + const existing = existingPatterns.find( + (pattern) => pattern.patternType === "error" && pattern.context.errorKeys?.includes(errorKey), + ) + + if (existing) { + patterns.push({ + ...existing, + frequency: existing.frequency + frequency, + lastSeenAt: now, + confidenceScore: Math.min(1, existing.confidenceScore + frequency * 0.05), + successRate: Math.max(0, existing.successRate - frequency * 0.02), + }) + } else if (frequency >= 2) { + patterns.push({ + id: crypto.randomUUID(), + patternType: "error", + state: "active", + summary: `Avoid: repeated ${errorKey} errors detected`, + confidenceScore: Math.min(0.5, frequency * 0.1), + frequency, + successRate: 0.3, + firstSeenAt: now, + lastSeenAt: now, + sourceSignals: ["USER_CORRECTION", "TASK_FAILURE"], + context: { + errorKeys: [errorKey], + toolNames: this.collectToolNames(errorEvents), + }, + }) + } + } + + return patterns + } + + /** + * Extract success patterns from task success events. + */ + private extractSuccessPatterns( + events: LearningEvent[], + existingPatterns: LearnedPattern[], + now: number, + ): LearnedPattern[] { + const patterns: LearnedPattern[] = [] + const successEvents = events.filter((event) => event.signal === "TASK_SUCCESS") + + if (successEvents.length < 2) { + return patterns + } + + const byToolSet = new Map() + for (const event of successEvents) { + const toolKey = [...(event.context.toolNames ?? [])].sort().join(",") + if (!toolKey) { + continue + } + + const bucket = byToolSet.get(toolKey) ?? [] + bucket.push(event) + byToolSet.set(toolKey, bucket) + } + + for (const [toolKey, toolEvents] of byToolSet) { + const frequency = toolEvents.length + const existing = existingPatterns.find( + (pattern) => pattern.patternType === "tool" && this.hasMatchingToolNames(pattern, toolKey.split(",")), + ) + + if (existing) { + patterns.push({ + ...existing, + frequency: existing.frequency + frequency, + lastSeenAt: now, + confidenceScore: Math.min(1, existing.confidenceScore + frequency * 0.03), + successRate: Math.min(1, existing.successRate + frequency * 0.02), + }) + } else if (frequency >= 2) { + patterns.push({ + id: crypto.randomUUID(), + patternType: "tool", + state: "active", + summary: `Effective tool combination: ${toolKey}`, + confidenceScore: Math.min(0.6, frequency * 0.1), + frequency, + successRate: 0.6, + firstSeenAt: now, + lastSeenAt: now, + sourceSignals: ["TASK_SUCCESS"], + context: { + toolNames: toolKey.split(","), + }, + }) + } + } + + return patterns + } + + /** + * Extract tool preference patterns. + */ + private extractToolPatterns( + events: LearningEvent[], + existingPatterns: LearnedPattern[], + now: number, + ): LearnedPattern[] { + const patterns: LearnedPattern[] = [] + const toolCounts = new Map() + + for (const event of events) { + for (const toolName of event.context.toolNames ?? []) { + const counts = toolCounts.get(toolName) ?? { success: 0, failure: 0 } + if (event.signal === "TASK_SUCCESS") { + counts.success++ + } else if (event.signal === "TASK_FAILURE") { + counts.failure++ + } + toolCounts.set(toolName, counts) + } + } + + for (const [toolName, counts] of toolCounts) { + const total = counts.success + counts.failure + if (total < 3) { + continue + } + + const successRate = counts.success / total + const existing = existingPatterns.find( + (pattern) => pattern.patternType === "prompt" && this.hasMatchingToolNames(pattern, [toolName]), + ) + + if (existing) { + const combinedFrequency = existing.frequency + total + const existingSuccesses = existing.successRate * existing.frequency + const combinedSuccessRate = (existingSuccesses + counts.success) / combinedFrequency + + patterns.push({ + ...existing, + frequency: combinedFrequency, + lastSeenAt: now, + successRate: combinedSuccessRate, + confidenceScore: Math.min(1, existing.confidenceScore + 0.02), + }) + } else if (successRate > 0.7) { + patterns.push({ + id: crypto.randomUUID(), + patternType: "prompt", + state: "active", + summary: `Prefer ${toolName} for reliable results`, + confidenceScore: Math.min(0.5, successRate * 0.5), + frequency: total, + successRate, + firstSeenAt: now, + lastSeenAt: now, + sourceSignals: ["TASK_SUCCESS"], + context: { + toolNames: [toolName], + }, + }) + } + } + + return patterns + } + + /** + * Extract code index correlation patterns. + * Uses vector search to find patterns related to current event context + * when selfImprovingCodeIndex experiment is enabled. + */ + private async extractCodeIndexPatterns( + events: LearningEvent[], + existingPatterns: LearnedPattern[], + now: number, + ): Promise { + const experiments = this.getExperiments() + const codeIndexEvents = events.filter((event) => event.signal === "CODE_INDEX_HIT") + + // When code index integration is enabled, use vector search for richer patterns + if (experiments?.selfImprovingCodeIndex !== false && this.codeIndexManager) { + return this.extractCodeIndexPatternsWithVectorSearch(events, existingPatterns, now, codeIndexEvents) + } + + // Fallback: original heuristic-based extraction + if (codeIndexEvents.length < 3) { + return [] + } + + const totalHits = codeIndexEvents.reduce((sum, event) => sum + (event.context.codeIndex?.hits ?? 0), 0) + const averageHits = totalHits / codeIndexEvents.length + if (averageHits <= 0) { + return [] + } + + const summary = `Code indexing correlates with task outcomes (avg ${averageHits.toFixed(1)} hits/event)` + const existing = existingPatterns.find( + (pattern) => pattern.patternType === "code-index" && pattern.summary === summary, + ) + + if (existing) { + return [ + { + ...existing, + frequency: existing.frequency + codeIndexEvents.length, + lastSeenAt: now, + confidenceScore: Math.min(1, existing.confidenceScore + averageHits * 0.01), + }, + ] + } + + return [ + { + id: crypto.randomUUID(), + patternType: "code-index", + state: "active", + summary, + confidenceScore: Math.min(0.5, averageHits * 0.05), + frequency: codeIndexEvents.length, + successRate: 0.6, + firstSeenAt: now, + lastSeenAt: now, + sourceSignals: ["CODE_INDEX_HIT"], + context: { + toolNames: [], + errorKeys: [], + }, + }, + ] + } + + /** + * Vector-search-enhanced code index pattern extraction. + * Searches for patterns related to current event context via CodeIndexManager. + */ + private async extractCodeIndexPatternsWithVectorSearch( + events: LearningEvent[], + existingPatterns: LearnedPattern[], + now: number, + codeIndexEvents: LearningEvent[], + ): Promise { + const patterns: LearnedPattern[] = [] + + // Build search queries from event context + const searchQueries: string[] = [] + for (const event of events) { + const toolContext = event.context.toolNames?.join(" ") ?? "" + const errorContext = event.context.errorKey ?? "" + const query = [toolContext, errorContext, event.outcome.summary ?? ""].filter(Boolean).join(" ") + if (query.length > 5) { + searchQueries.push(query) + } + } + + // Deduplicate queries (take up to 3 most relevant) + const uniqueQueries = [...new Set(searchQueries)].slice(0, 3) + + for (const query of uniqueQueries) { + try { + const results = await this.codeIndexManager!.searchIndex(query) + if (!results || results.length === 0) { + continue + } + + for (const result of results) { + const payload = result.payload + if (!payload?.codeChunk) { + continue + } + + const filePath = payload.filePath ?? "unknown" + const startLine = payload.startLine ?? 0 + const endLine = payload.endLine ?? 0 + const summary = `[CodeIndex:${filePath}:${startLine}-${endLine}] ${payload.codeChunk.slice(0, 150)}` + + const existing = existingPatterns.find( + (p) => p.patternType === "code-index" && p.summary === summary, + ) + + if (existing) { + patterns.push({ + ...existing, + frequency: existing.frequency + 1, + lastSeenAt: now, + confidenceScore: Math.min(1, existing.confidenceScore + result.score * 0.05), + }) + } else { + patterns.push({ + id: crypto.randomUUID(), + patternType: "code-index", + state: "active", + summary, + confidenceScore: Math.min(0.5, result.score * 0.5), + frequency: 1, + successRate: 0.5, + firstSeenAt: now, + lastSeenAt: now, + sourceSignals: ["CODE_INDEX_HIT"], + context: { + toolNames: [], + errorKeys: [], + }, + }) + } + } + } catch (error) { + // Log and continue with other queries + console.error(`[PatternAnalyzer] Vector search error for query "${query}":`, error) + } + } + + return patterns + } + + /** + * Extract patterns from PROMPT_QUALITY events. + * Identifies prompt fingerprints that consistently yield high/low quality scores. + */ + private extractPromptQualityPatterns( + events: LearningEvent[], + existingPatterns: LearnedPattern[], + now: number, + ): LearnedPattern[] { + const patterns: LearnedPattern[] = [] + const qualityEvents = events.filter((event) => event.signal === "PROMPT_QUALITY") + + if (qualityEvents.length < 2) { + return patterns + } + + const byFingerprint = new Map() + for (const event of qualityEvents) { + const fp = event.context.promptFingerprint ?? "unknown" + const bucket = byFingerprint.get(fp) ?? { total: 0, count: 0 } + bucket.total += event.outcome.confidenceDelta ?? 0 + bucket.count++ + byFingerprint.set(fp, bucket) + } + + for (const [fingerprint, stats] of byFingerprint) { + if (stats.count < 2) { + continue + } + + const avgDelta = stats.total / stats.count + const isPositive = avgDelta > 0 + const existing = existingPatterns.find( + (pattern) => pattern.patternType === "prompt" && pattern.context.promptFingerprint === fingerprint, + ) + + if (existing) { + patterns.push({ + ...existing, + frequency: existing.frequency + stats.count, + lastSeenAt: now, + confidenceScore: Math.min(1, existing.confidenceScore + Math.abs(avgDelta) * 0.1), + successRate: isPositive + ? Math.min(1, existing.successRate + 0.02) + : Math.max(0, existing.successRate - 0.02), + }) + } else if (stats.count >= 3) { + patterns.push({ + id: crypto.randomUUID(), + patternType: "prompt", + state: "active", + summary: isPositive + ? `Prompt fingerprint ${fingerprint.slice(0, 16)} yields quality improvements` + : `Prompt fingerprint ${fingerprint.slice(0, 16)} degrades quality`, + confidenceScore: Math.min(0.5, Math.abs(avgDelta) * 2), + frequency: stats.count, + successRate: isPositive ? 0.6 : 0.3, + firstSeenAt: now, + lastSeenAt: now, + sourceSignals: ["PROMPT_QUALITY"], + context: { + promptFingerprint: fingerprint, + }, + }) + } + } + + return patterns + } + + /** + * Extract patterns from PATTERN_REPEAT events. + * Identifies patterns that are being reused successfully. + */ + private extractPatternRepeatPatterns( + events: LearningEvent[], + existingPatterns: LearnedPattern[], + now: number, + ): LearnedPattern[] { + const patterns: LearnedPattern[] = [] + const repeatEvents = events.filter((event) => event.signal === "PATTERN_REPEAT") + + if (repeatEvents.length < 2) { + return patterns + } + + const byPatternId = new Map() + for (const event of repeatEvents) { + const patternId = event.context.promptFingerprint ?? "unknown" + const bucket = byPatternId.get(patternId) ?? [] + bucket.push(event) + byPatternId.set(patternId, bucket) + } + + for (const [patternId, patternEvents] of byPatternId) { + const frequency = patternEvents.length + const existing = existingPatterns.find( + (pattern) => pattern.id === patternId || pattern.context.promptFingerprint === patternId, + ) + + if (existing) { + patterns.push({ + ...existing, + frequency: existing.frequency + frequency, + lastSeenAt: now, + confidenceScore: Math.min(1, existing.confidenceScore + frequency * 0.03), + successRate: Math.min(1, existing.successRate + frequency * 0.01), + }) + } else if (frequency >= 3) { + patterns.push({ + id: crypto.randomUUID(), + patternType: "prompt", + state: "active", + summary: `Pattern ${patternId.slice(0, 16)} reused ${frequency} times — reinforcing`, + confidenceScore: Math.min(0.4, frequency * 0.05), + frequency, + successRate: 0.5, + firstSeenAt: now, + lastSeenAt: now, + sourceSignals: ["PATTERN_REPEAT"], + context: { + promptFingerprint: patternId, + }, + }) + } + } + + return patterns + } + + /** + * Collect unique tool names from a set of events. + */ + private collectToolNames(events: LearningEvent[]): string[] { + const names = new Set() + for (const event of events) { + for (const name of event.context.toolNames ?? []) { + names.add(name) + } + } + + return [...names] + } + + private hasMatchingToolNames(pattern: LearnedPattern, toolNames: string[]): boolean { + const existingToolNames = pattern.context.toolNames + if (!existingToolNames || existingToolNames.length !== toolNames.length) { + return false + } + + const normalizedExisting = [...existingToolNames].sort() + const normalizedIncoming = [...toolNames].sort() + + return normalizedExisting.every((toolName, index) => toolName === normalizedIncoming[index]) + } +} diff --git a/src/services/self-improving/PreventionEngine.ts b/src/services/self-improving/PreventionEngine.ts new file mode 100644 index 0000000000..27586510df --- /dev/null +++ b/src/services/self-improving/PreventionEngine.ts @@ -0,0 +1,166 @@ +import { ErrorClassifier, ClassifiedError, ErrorCategory } from "./ErrorClassifier" +import { ToolCallValidator, ValidationResult } from "./ToolCallValidator" +import { CascadeTracker } from "./CascadeTracker" +import type { CodeIndexAdapter } from "./CodeIndexAdapter" +import type { VectorStoreSearchResult } from "../code-index/interfaces/vector-store" + +export interface PreventionContext { + preValidation: ValidationResult + cascadeWarning: string | null + preventionHints: string[] + recentErrors: Array<{ toolName: string; category: string }> +} + +export class PreventionEngine { + private errorClassifier: ErrorClassifier + private toolCallValidator: ToolCallValidator + private cascadeTracker: CascadeTracker + private codeIndexAdapter: CodeIndexAdapter | undefined + + constructor(codeIndexAdapter?: CodeIndexAdapter) { + this.errorClassifier = new ErrorClassifier() + this.toolCallValidator = new ToolCallValidator() + this.cascadeTracker = new CascadeTracker() + this.codeIndexAdapter = codeIndexAdapter + } + + setCodeIndexAdapter(adapter: CodeIndexAdapter | undefined): void { + this.codeIndexAdapter = adapter + } + + /** + * Format a single VectorStoreSearchResult into a human-readable context line. + */ + private formatSearchResult(result: VectorStoreSearchResult): string { + const filePath = result.payload?.filePath ?? String(result.id) + const startLine = result.payload?.startLine + const endLine = result.payload?.endLine + const snippet = result.payload?.codeChunk + const lineRange = startLine !== undefined && endLine !== undefined + ? ` (lines ${startLine}-${endLine})` + : startLine !== undefined + ? ` (line ${startLine})` + : "" + const snippetStr = snippet ? `: ${snippet.slice(0, 200).replace(/\n/g, " ")}` : "" + return `- ${filePath}${lineRange}${snippetStr}` + } + + /** + * Enrich a user message with relevant code index search results. + * Non-blocking — returns original message on any error or empty results. + * Gated behind selfImprovingCodeIndex experiment flag. + */ + async enrichContextWithCodeIndex(userMessage: string): Promise { + if (!this.codeIndexAdapter || !this.codeIndexAdapter.isAvailable()) { + return userMessage + } + + try { + const results = await this.codeIndexAdapter.searchVectorStore(userMessage) + if (!results || results.length === 0) { + return userMessage + } + + const contextLines = results.map((r) => this.formatSearchResult(r)) + const contextBlock = [ + "Relevant existing code from codebase:", + ...contextLines, + ].join("\n") + + return `${userMessage}\n\n${contextBlock}` + } catch (error) { + // Graceful fallback — log and return original message + return userMessage + } + } + + /** + * Called BEFORE every tool call to get prevention context. + * Returns validation warnings, cascade warnings, and hints + * that can be injected into the model's context. + */ + getPreventionContext( + toolName: string, + params: Record, + ): PreventionContext { + const preValidation = this.toolCallValidator.validate(toolName, params) + const cascadeWarning = this.cascadeTracker.getCascadeSuggestion() + const recentErrors = this.cascadeTracker.getRecentErrors(toolName, 3) + const preventionHints = this.toolCallValidator.getPreventionHints( + toolName, + recentErrors.map((e) => ({ toolName: e.toolName, category: e.category })), + ) + + return { + preValidation, + cascadeWarning, + preventionHints, + recentErrors: recentErrors.map((e) => ({ + toolName: e.toolName, + category: e.category, + })), + } + } + + /** + * Called AFTER every tool call to record the result. + * Returns a ClassifiedError if there was an error, or null on success. + */ + recordToolResult( + toolName: string, + error: string | null, + _params: Record, + ): ClassifiedError | null { + if (!error) { + // Success — check if we were in a cascade and can clear it + this.cascadeTracker.reset() + return null + } + + const classified = this.errorClassifier.classify(error, toolName) + this.cascadeTracker.recordError(toolName, classified.category, error) + return classified + } + + /** + * Generate a prevention message to inject into the model's context. + * Returns a formatted string or null if nothing to report. + */ + generatePreventionMessage(context: PreventionContext): string | null { + const parts: string[] = [] + + if (context.cascadeWarning) { + parts.push(context.cascadeWarning) + } + + if (context.preValidation.warnings.length > 0) { + parts.push( + `⚠️ Pre-validation warnings: ${context.preValidation.warnings.join("; ")}`, + ) + } + + if (context.preValidation.suggestions.length > 0) { + parts.push( + `💡 Suggestions: ${context.preValidation.suggestions.join("; ")}`, + ) + } + + if (context.preventionHints.length > 0) { + parts.push(...context.preventionHints) + } + + return parts.length > 0 ? parts.join("\n") : null + } + + getCascadeTracker(): CascadeTracker { + return this.cascadeTracker + } + + getErrorClassifier(): ErrorClassifier { + return this.errorClassifier + } + + getToolCallValidator(): ToolCallValidator { + return this.toolCallValidator + } +} diff --git a/src/services/self-improving/QuestionEvaluatorService.ts b/src/services/self-improving/QuestionEvaluatorService.ts new file mode 100644 index 0000000000..0592c68032 --- /dev/null +++ b/src/services/self-improving/QuestionEvaluatorService.ts @@ -0,0 +1,343 @@ +import type { Logger } from "./types" +import type { ReviewTeamService } from "./ReviewTeamService" +import type { CodeIndexManager } from "../code-index/manager" +import type { Experiments } from "./types" + +export interface QuestionEvaluationConfig { + enabled: boolean + useFullTeam: boolean // use ReviewTeamService when Full Team is enabled + useContextualAnalysis: boolean // do contextual analysis when Full Auto is enabled + doResearchBeforeDeciding: boolean // spawn subtask for deeper research + minChoicesForEvaluation: number // minimum choices to trigger evaluation (default 2) +} + +export interface QuestionEvaluation { + question: string + choices: { text: string; mode: string | null }[] + selectedIndex: number + selectedText: string + reasoning: string + evaluatedBy: "full-team" | "contextual" | "research" | "fallback" +} + +const DEFAULT_CONFIG: QuestionEvaluationConfig = { + enabled: true, + useFullTeam: true, + useContextualAnalysis: true, + doResearchBeforeDeciding: false, + minChoicesForEvaluation: 2, +} + +export class QuestionEvaluatorService { + private logger: Logger + private config: QuestionEvaluationConfig + private reviewTeam: ReviewTeamService | null = null + private codeIndexManager: CodeIndexManager | undefined + private getExperiments: (() => Experiments | undefined) | undefined + + constructor(logger: Logger, config?: Partial) { + this.logger = logger + this.config = { ...DEFAULT_CONFIG, ...config } + } + + /** + * Set the CodeIndexManager instance for vector-search-based question similarity. + */ + setCodeIndexManager(manager: CodeIndexManager | undefined): void { + this.codeIndexManager = manager + } + + /** + * Set the experiments accessor for feature gating. + */ + setExperimentsAccessor(getExperiments: () => Experiments | undefined): void { + this.getExperiments = getExperiments + } + + getConfig(): QuestionEvaluationConfig { + return { ...this.config } + } + + updateConfig(updates: Partial): void { + this.config = { ...this.config, ...updates } + this.logger.appendLine(`[QuestionEvaluator] Config updated: ${JSON.stringify(updates)}`) + } + + setReviewTeam(team: ReviewTeamService): void { + this.reviewTeam = team + } + + /** + * Evaluate all choices and select the best one. + * Returns the index of the best choice. + */ + async evaluateBestChoice( + question: string, + choices: { text: string; mode: string | null }[], + context?: { taskHistory?: string[]; workspaceFiles?: string[] }, + ): Promise { + if (!this.config.enabled || choices.length < this.config.minChoicesForEvaluation) { + // Fallback: select first choice + return { + question, + choices, + selectedIndex: 0, + selectedText: this.resolveSelectedText(choices, 0), + reasoning: "Evaluation disabled or too few choices", + evaluatedBy: "fallback", + } + } + + // Strategy 1: Full Team evaluation (if enabled and ReviewTeamService is available) + if (this.config.useFullTeam && this.reviewTeam) { + const evaluation = await this.evaluateWithFullTeam(question, choices, context) + if (evaluation) return evaluation + } + + // Strategy 2: Contextual analysis (if enabled) + if (this.config.useContextualAnalysis) { + const evaluation = this.evaluateContextually(question, choices, context) + if (evaluation) return evaluation + } + + // Strategy 3: Research (if enabled) — would spawn a subtask + if (this.config.doResearchBeforeDeciding) { + this.logger.appendLine("[QuestionEvaluator] Research mode enabled but subtask spawning not yet implemented") + } + + // Final fallback: first choice + return { + question, + choices, + selectedIndex: 0, + selectedText: this.resolveSelectedText(choices, 0), + reasoning: "No evaluation strategy produced a result", + evaluatedBy: "fallback", + } + } + + /** + * Evaluate choices using the Full Team (ReviewTeamService). + * Each persona evaluates each choice and votes. + */ + private async evaluateWithFullTeam( + question: string, + choices: { text: string; mode: string | null }[], + context?: { taskHistory?: string[]; workspaceFiles?: string[] }, + ): Promise { + if (!this.reviewTeam) return null + + this.logger.appendLine( + `[QuestionEvaluator] Evaluating ${choices.length} choices with Full Team for: "${question.substring(0, 60)}..."`, + ) + + // Score each choice using the review team personas + const scores: { index: number; score: number; reasoning: string }[] = [] + + for (let i = 0; i < choices.length; i++) { + const choice = choices[i] + + // Create a pseudo-pattern for the review team to evaluate + const pseudoPattern = { + id: `question-choice-${i}`, + patternType: "tool" as const, + state: "active" as const, + summary: `Choice ${i + 1}: ${choice.text.substring(0, 100)}`, + confidenceScore: 0.5, + frequency: 1, + successRate: 0.5, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { + toolNames: ["ask_followup_question"], + errorKeys: [], + modes: choice.mode ? [choice.mode] : [], + }, + } + + const verdict = await this.reviewTeam.reviewPattern(pseudoPattern) + scores.push({ + index: i, + score: verdict.score, + reasoning: verdict.summary, + }) + } + + // Sort by score descending, pick the best + scores.sort((a, b) => b.score - a.score) + const best = scores[0] + + this.logger.appendLine( + `[QuestionEvaluator] Full Team selected choice ${best.index + 1} (score: ${(best.score * 100).toFixed(0)}%)`, + ) + + return { + question, + choices, + selectedIndex: best.index, + selectedText: this.resolveSelectedText(choices, best.index), + reasoning: best.reasoning, + evaluatedBy: "full-team", + } + } + + /** + * Contextual analysis: evaluate choices based on the question context. + * Uses heuristics like: + * - Prefer choices with mode switches when the question involves delegation + * - Prefer specific answers over vague ones + * - Prefer answers that match the question's intent + */ + private evaluateContextually( + question: string, + choices: { text: string; mode: string | null }[], + context?: { taskHistory?: string[]; workspaceFiles?: string[] }, + ): QuestionEvaluation | null { + const scores = choices.map((choice, index) => { + let score = 0.5 // neutral start + const reasons: string[] = [] + + // Prefer choices with mode switches when question involves delegation + if (choice.mode && (question.toLowerCase().includes("mode") || question.toLowerCase().includes("switch"))) { + score += 0.2 + reasons.push("Mode switch matches delegation intent") + } + + // Prefer longer, more specific answers + if (choice.text.length > 50) { + score += 0.1 + reasons.push("Specific/detailed answer") + } else if (choice.text.length < 10) { + score -= 0.1 + reasons.push("Too brief/vague") + } + + // Prefer answers that contain actionable verbs + const actionableVerbs = [ + "implement", + "create", + "fix", + "build", + "write", + "add", + "update", + "refactor", + "analyze", + "research", + ] + if (actionableVerbs.some((v) => choice.text.toLowerCase().includes(v))) { + score += 0.1 + reasons.push("Actionable answer") + } + + // Penalize "no" or negative answers when question is asking for action + if (choice.text.toLowerCase().startsWith("no") || choice.text.toLowerCase().startsWith("don't")) { + score -= 0.15 + reasons.push("Negative/avoidance answer") + } + + // Prefer answers that reference the question's key terms + const questionWords = question + .toLowerCase() + .split(/\s+/) + .filter((w) => w.length > 4) + const matchingWords = questionWords.filter((w) => choice.text.toLowerCase().includes(w)) + if (matchingWords.length > 0) { + score += 0.05 * Math.min(matchingWords.length, 3) + reasons.push(`Matches ${matchingWords.length} key terms from question`) + } + + return { index, score, reasoning: reasons.join("; ") || "Neutral evaluation" } + }) + + // Sort by score descending + scores.sort((a, b) => b.score - a.score) + const best = scores[0] + + this.logger.appendLine( + `[QuestionEvaluator] Contextual analysis selected choice ${best.index + 1} (score: ${(best.score * 100).toFixed(0)}%)`, + ) + + return { + question, + choices, + selectedIndex: best.index, + selectedText: this.resolveSelectedText(choices, best.index), + reasoning: best.reasoning, + evaluatedBy: "contextual", + } + } + + /** + * Search for similar past questions using vector search. + * Uses CodeIndexManager to find semantically similar questions and their resolutions. + * Gated behind selfImprovingCodeIndex experiment flag. + */ + async searchSimilarQuestions( + query: string, + ): Promise> { + const experiments = this.getExperiments?.() + if (experiments?.selfImprovingCodeIndex === false) { + return [] + } + + if (!this.codeIndexManager) { + return [] + } + + try { + const results = await this.codeIndexManager.searchIndex(query) + if (!results || results.length === 0) { + return [] + } + + return results + .filter((r) => r.payload?.codeChunk) + .map((r) => ({ + question: r.payload?.codeChunk?.slice(0, 200) ?? "", + resolution: `Found in ${r.payload?.filePath ?? "unknown"}:${r.payload?.startLine ?? 0}`, + score: r.score, + })) + } catch (error) { + this.logger.appendLine( + `[QuestionEvaluator] searchSimilarQuestions error: ${error instanceof Error ? error.message : String(error)}`, + ) + return [] + } + } + + /** + * Resolve the selected text with a fallback chain: + * 1. Preferred choice's text (if non-empty) + * 2. First choice's text (if non-empty) + * 3. Empty string (last resort — should not happen in practice) + * + * This prevents empty responses when LLM evaluation returns malformed results + * with empty text fields. The first choice is the same as what auto-approve + * timeout would have selected anyway. + */ + private resolveSelectedText( + choices: { text: string; mode: string | null }[], + preferredIndex: number, + ): string { + const preferred = choices[preferredIndex]?.text + if (preferred && preferred.trim().length > 0) { + return preferred + } + const fallback = choices[0]?.text + if (fallback && fallback.trim().length > 0) { + return fallback + } + return "" + } + + getStatus(): Record { + return { + enabled: this.config.enabled, + useFullTeam: this.config.useFullTeam, + useContextualAnalysis: this.config.useContextualAnalysis, + doResearchBeforeDeciding: this.config.doResearchBeforeDeciding, + } + } +} diff --git a/src/services/self-improving/RequirementsVerifier.test.ts b/src/services/self-improving/RequirementsVerifier.test.ts new file mode 100644 index 0000000000..ceca568214 --- /dev/null +++ b/src/services/self-improving/RequirementsVerifier.test.ts @@ -0,0 +1,572 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { RequirementsVerifier } from "./RequirementsVerifier" +import { KeywordConflictResolver } from "./KeywordConflictResolver" +import type { ConflictResolver, ConflictResolution, Requirement } from "./types" + +vi.mock("../../utils/single-completion-handler") + +describe("RequirementsVerifier", () => { + let verifier: RequirementsVerifier + let logger: { appendLine: ReturnType } + + beforeEach(() => { + logger = { appendLine: vi.fn() } + verifier = new RequirementsVerifier(logger) + }) + + describe("config", () => { + it("should use defaults when no config provided", () => { + const v = new RequirementsVerifier() + const config = v.getConfig() + expect(config.mandatory).toBe(true) + expect(config.autoExtract).toBe(true) + expect(config.requireAllVerified).toBe(true) + }) + + it("should merge partial config with defaults", () => { + const v = new RequirementsVerifier(undefined, { mandatory: false }) + const config = v.getConfig() + expect(config.mandatory).toBe(false) + expect(config.autoExtract).toBe(true) + expect(config.requireAllVerified).toBe(true) + }) + + it("should update config via updateConfig", () => { + verifier.updateConfig({ requireAllVerified: false }) + const config = verifier.getConfig() + expect(config.requireAllVerified).toBe(false) + expect(config.mandatory).toBe(true) + }) + }) + + describe("conflict resolver", () => { + it("should default to KeywordConflictResolver", () => { + const resolver = verifier.getConflictResolver() + expect(resolver).toBeInstanceOf(KeywordConflictResolver) + expect(resolver.name).toBe("keyword") + }) + + it("should accept custom conflict resolver via constructor", () => { + const mockResolver: ConflictResolver = { + name: "mock", + resolve: vi.fn().mockResolvedValue({ supersedes: [], confidence: 1, reason: "mock" }), + } + const v = new RequirementsVerifier(undefined, undefined, mockResolver) + expect(v.getConflictResolver()).toBe(mockResolver) + }) + + it("should allow runtime resolver swap via setConflictResolver", () => { + const mockResolver: ConflictResolver = { + name: "mock", + resolve: vi.fn().mockResolvedValue({ supersedes: [], confidence: 1, reason: "mock" }), + } + verifier.setConflictResolver(mockResolver) + expect(verifier.getConflictResolver()).toBe(mockResolver) + expect(verifier.getConflictResolver().name).toBe("mock") + }) + + it("should use custom resolver during processUserMessages", async () => { + const mockResolver: ConflictResolver = { + name: "mock", + resolve: vi.fn().mockResolvedValue({ supersedes: [], confidence: 1, reason: "mock" }), + } + const v = new RequirementsVerifier(logger, undefined, mockResolver) + const reqs = await v.processUserMessages(["- Build authentication"]) + expect(mockResolver.resolve).toHaveBeenCalledTimes(1) + expect(reqs).toHaveLength(1) + }) + }) + + describe("extractFromPrompt", () => { + it("should extract requirements from bullet points", () => { + const prompt = ` +- Implement user authentication +- Add database schema +- Write API tests + `.trim() + const reqs = verifier.extractFromPrompt(prompt) + expect(reqs).toHaveLength(3) + expect(reqs[0].text).toBe("Implement user authentication") + expect(reqs[1].text).toBe("Add database schema") + expect(reqs[2].text).toBe("Write API tests") + expect(reqs.every((r) => r.status === "pending")).toBe(true) + }) + + it("should extract requirements from numbered lists", () => { + const prompt = ` +1. Set up CI/CD pipeline +2. Configure monitoring +3. Deploy to production + `.trim() + const reqs = verifier.extractFromPrompt(prompt) + expect(reqs).toHaveLength(3) + expect(reqs[0].text).toBe("Set up CI/CD pipeline") + expect(reqs[1].text).toBe("Configure monitoring") + expect(reqs[2].text).toBe("Deploy to production") + }) + + it("should extract requirements from keyword sentences", () => { + const prompt = "The system must handle 10k concurrent users. It should encrypt all data at rest. We need to implement rate limiting." + const reqs = verifier.extractFromPrompt(prompt) + expect(reqs.length).toBeGreaterThanOrEqual(1) + expect(reqs.some((r) => r.text.includes("handle 10k concurrent users"))).toBe(true) + }) + + it("should treat plain prompt as one goal requirement", () => { + const prompt = "Build a todo app" + const reqs = verifier.extractFromPrompt(prompt) + expect(reqs).toHaveLength(1) + expect(reqs[0].text).toBe("Build a todo app") + expect(reqs[0].category).toBe("goal") + }) + + it("should detect category headers", () => { + const prompt = ` + ## Security + - Encrypt all passwords + ## Constraint + - Response time under 200ms + `.trim() + const reqs = verifier.extractFromPrompt(prompt) + expect(reqs).toHaveLength(2) + expect(reqs[0].category).toBe("security") + expect(reqs[1].category).toBe("constraint") + }) + + it("should return empty array for empty prompt", () => { + const reqs = verifier.extractFromPrompt("") + expect(reqs).toHaveLength(0) + }) + + it("should assign messageIndex to extracted requirements", () => { + const reqs = verifier.extractFromPrompt("- Do something", 5) + expect(reqs).toHaveLength(1) + expect(reqs[0].messageIndex).toBe(5) + }) + }) + + describe("addRequirement", () => { + it("should add a requirement manually", () => { + const req = verifier.addRequirement("Test requirement", "constraint") + expect(req.text).toBe("Test requirement") + expect(req.category).toBe("constraint") + expect(req.status).toBe("pending") + expect(req.id).toBeTruthy() + expect(req.messageIndex).toBe(0) + }) + + it("should default to functional category", () => { + const req = verifier.addRequirement("Some requirement") + expect(req.category).toBe("functional") + }) + }) + + describe("verifyRequirement", () => { + it("should mark requirement as verified with evidence", () => { + const req = verifier.addRequirement("Test") + const result = verifier.verifyRequirement(req.id, "code-review", "Code reviewed and approved") + expect(result).toBe(true) + + const updated = verifier.getAllRequirements()[0] + expect(updated.status).toBe("verified") + expect(updated.verifiedBy).toBe("code-review") + expect(updated.evidence).toBe("Code reviewed and approved") + expect(updated.verifiedAt).toBeGreaterThan(0) + }) + + it("should return false for unknown id", () => { + const result = verifier.verifyRequirement("nonexistent", "manual", "n/a") + expect(result).toBe(false) + }) + }) + + describe("failRequirement", () => { + it("should mark requirement as failed with evidence", () => { + const req = verifier.addRequirement("Test") + const result = verifier.failRequirement(req.id, "Build failed") + expect(result).toBe(true) + + const updated = verifier.getAllRequirements()[0] + expect(updated.status).toBe("failed") + expect(updated.evidence).toBe("Build failed") + }) + + it("should return false for unknown id", () => { + const result = verifier.failRequirement("nonexistent", "n/a") + expect(result).toBe(false) + }) + }) + + describe("getAllRequirements", () => { + it("should return all requirements", () => { + verifier.addRequirement("Req 1") + verifier.addRequirement("Req 2") + expect(verifier.getAllRequirements()).toHaveLength(2) + }) + + it("should return empty array when no requirements", () => { + expect(verifier.getAllRequirements()).toHaveLength(0) + }) + }) + + describe("getRequirementsByStatus", () => { + it("should filter by status", () => { + const r1 = verifier.addRequirement("Req 1") + verifier.addRequirement("Req 2") + verifier.verifyRequirement(r1.id, "manual", "done") + + const verified = verifier.getRequirementsByStatus("verified") + const pending = verifier.getRequirementsByStatus("pending") + + expect(verified).toHaveLength(1) + expect(verified[0].text).toBe("Req 1") + expect(pending).toHaveLength(1) + expect(pending[0].text).toBe("Req 2") + }) + + it("should filter superseded requirements", () => { + verifier.addRequirement("Req 1") + verifier.addRequirement("Req 2") + + // Manually supersede one + const all = verifier.getAllRequirements() + all[0].status = "superseded" + + const superseded = verifier.getRequirementsByStatus("superseded") + expect(superseded).toHaveLength(1) + expect(superseded[0].text).toBe("Req 1") + }) + }) + + describe("getActiveRequirements", () => { + it("should return only non-superseded requirements", () => { + const r1 = verifier.addRequirement("Req 1") + verifier.addRequirement("Req 2") + + // Manually supersede r1 + r1.status = "superseded" + + const active = verifier.getActiveRequirements() + expect(active).toHaveLength(1) + expect(active[0].text).toBe("Req 2") + }) + + it("should return all when none superseded", () => { + verifier.addRequirement("Req 1") + verifier.addRequirement("Req 2") + + const active = verifier.getActiveRequirements() + expect(active).toHaveLength(2) + }) + }) + + describe("verify", () => { + it("should return passed=true when no requirements", async () => { + const result = await verifier.verify() + expect(result.passed).toBe(true) + expect(result.total).toBe(0) + expect(result.summary).toBe("No requirements extracted") + }) + + it("should return passed=true when all verified", async () => { + const r1 = verifier.addRequirement("Req 1") + const r2 = verifier.addRequirement("Req 2") + verifier.verifyRequirement(r1.id, "test", "Tests pass") + verifier.verifyRequirement(r2.id, "code-review", "Reviewed") + + const result = await verifier.verify() + expect(result.passed).toBe(true) + expect(result.total).toBe(2) + expect(result.verified).toHaveLength(2) + expect(result.failed).toHaveLength(0) + expect(result.pending).toHaveLength(0) + }) + + it("should return passed=false when any active requirement failed", async () => { + const r1 = verifier.addRequirement("Req 1") + verifier.failRequirement(r1.id, "Failed") + + const result = await verifier.verify() + expect(result.passed).toBe(false) + expect(result.total).toBe(1) + expect(result.failed).toHaveLength(1) + expect(result.summary).toContain("requirements failed") + }) + + it("should return passed=false when pending and requireAllVerified=true", async () => { + verifier.addRequirement("Req 1") + + const result = await verifier.verify() + expect(result.passed).toBe(false) + expect(result.pending).toHaveLength(1) + }) + + it("should return passed=true when pending and requireAllVerified=false", async () => { + verifier.updateConfig({ requireAllVerified: false }) + verifier.addRequirement("Req 1") + + const result = await verifier.verify() + expect(result.passed).toBe(true) + expect(result.pending).toHaveLength(1) + }) + + it("should ignore superseded requirements in verification", async () => { + const r1 = verifier.addRequirement("Req 1") + verifier.addRequirement("Req 2") + + // Supersede r1 — it should not block completion + r1.status = "superseded" + + const result = await verifier.verify() + expect(result.passed).toBe(false) // Req 2 is still pending + expect(result.total).toBe(2) // Total includes superseded + expect(result.pending).toHaveLength(1) // Only Req 2 is pending + }) + + it("should pass when only superseded requirements remain unverified", async () => { + const r1 = verifier.addRequirement("Req 1") + r1.status = "superseded" + + const result = await verifier.verify() + expect(result.passed).toBe(true) // No active requirements + expect(result.total).toBe(1) + expect(result.verified).toHaveLength(0) + expect(result.pending).toHaveLength(0) + }) + }) + + describe("processUserMessages", () => { + it("should extract requirements from a single message", async () => { + const messages = ["- Build authentication"] + const reqs = await verifier.processUserMessages(messages) + expect(reqs).toHaveLength(1) + expect(reqs[0].text).toBe("Build authentication") + expect(reqs[0].messageIndex).toBe(0) + }) + + it("should accumulate requirements across multiple messages", async () => { + const messages = [ + "- Build authentication", + "- Add logging", + ] + const reqs = await verifier.processUserMessages(messages) + expect(reqs).toHaveLength(2) + expect(reqs[0].text).toBe("Build authentication") + expect(reqs[0].messageIndex).toBe(0) + expect(reqs[1].text).toBe("Add logging") + expect(reqs[1].messageIndex).toBe(1) + }) + + it("should supersede earlier requirement when later message overlaps", async () => { + const messages = [ + "- Build authentication with JWT", + "- Build authentication with OAuth", + ] + const reqs = await verifier.processUserMessages(messages) + expect(reqs).toHaveLength(2) + + // First requirement should be superseded + expect(reqs[0].status).toBe("superseded") + expect(reqs[0].supersededBy).toBe(reqs[1].id) + + // Second requirement should be active and reference the superseded one + expect(reqs[1].status).toBe("pending") + expect(reqs[1].supersedes).toBe(reqs[0].id) + }) + + it("should NOT supersede when topics are different", async () => { + const messages = [ + "- Build authentication with JWT", + "- Add database schema for users", + ] + const reqs = await verifier.processUserMessages(messages) + expect(reqs).toHaveLength(2) + + // Both should remain active (different topics) + expect(reqs[0].status).toBe("pending") + expect(reqs[1].status).toBe("pending") + expect(reqs[0].supersededBy).toBeUndefined() + expect(reqs[1].supersedes).toBeUndefined() + }) + + it("should handle explicit supersede keywords", async () => { + const messages = [ + "- Build authentication with JWT bearer tokens", + "Actually, use OAuth bearer tokens for authentication instead of JWT", + ] + const reqs = await verifier.processUserMessages(messages) + expect(reqs).toHaveLength(2) + + // First should be superseded due to semantic overlap + expect(reqs[0].status).toBe("superseded") + expect(reqs[1].status).toBe("pending") + }) + + it("should return empty array for empty messages", async () => { + const reqs = await verifier.processUserMessages([]) + expect(reqs).toHaveLength(0) + }) + + it("should only process new messages on subsequent calls", async () => { + // First call + const reqs1 = await verifier.processUserMessages(["- Build authentication"]) + expect(reqs1).toHaveLength(1) + expect(verifier.getProcessedMessageCount()).toBe(1) + + // Second call with same messages — should not re-process + const reqs2 = await verifier.processUserMessages(["- Build authentication"]) + expect(reqs2).toHaveLength(1) + expect(verifier.getProcessedMessageCount()).toBe(1) + + // Third call with new messages appended + const reqs3 = await verifier.processUserMessages([ + "- Build authentication", + "- Add logging", + ]) + expect(reqs3).toHaveLength(2) + expect(verifier.getProcessedMessageCount()).toBe(2) + }) + + it("should supersede across multiple batches", async () => { + // First batch + await verifier.processUserMessages(["- Build authentication with JWT tokens"]) + expect(verifier.getProcessedMessageCount()).toBe(1) + + // Second batch — pass full accumulated list so new message is detected + const reqs = await verifier.processUserMessages([ + "- Build authentication with JWT tokens", + "- Build authentication with OAuth tokens", + ]) + expect(reqs).toHaveLength(2) + expect(verifier.getProcessedMessageCount()).toBe(2) + + // First requirement should be superseded + expect(reqs[0].status).toBe("superseded") + expect(reqs[1].status).toBe("pending") + }) + }) + + describe("getProcessedMessageCount", () => { + it("should start at 0", () => { + expect(verifier.getProcessedMessageCount()).toBe(0) + }) + + it("should increment after processing messages", async () => { + await verifier.processUserMessages(["- Req 1", "- Req 2"]) + expect(verifier.getProcessedMessageCount()).toBe(2) + }) + }) + + describe("reset", () => { + it("should clear all requirements and message count", async () => { + verifier.addRequirement("Req 1") + verifier.addRequirement("Req 2") + await verifier.processUserMessages(["- Req 3"]) + expect(verifier.getAllRequirements()).toHaveLength(3) + expect(verifier.getProcessedMessageCount()).toBe(1) + + verifier.reset() + expect(verifier.getAllRequirements()).toHaveLength(0) + expect(verifier.getProcessedMessageCount()).toBe(0) + }) + }) +}) + +describe("KeywordConflictResolver", () => { + let resolver: KeywordConflictResolver + + beforeEach(() => { + resolver = new KeywordConflictResolver() + }) + + it("should have name 'keyword'", () => { + expect(resolver.name).toBe("keyword") + }) + + it("should return empty supersedes when no existing requirements", async () => { + const newReq: Requirement = { + id: "1", + text: "Build authentication", + category: "functional", + status: "pending", + messageIndex: 0, + } + const result = await resolver.resolve(newReq, [], 0, []) + expect(result.supersedes).toEqual([]) + expect(result.confidence).toBe(0.9) + }) + + it("should detect overlapping requirements", async () => { + const existing: Requirement[] = [ + { + id: "existing-1", + text: "Build authentication with JWT", + category: "functional", + status: "pending", + messageIndex: 0, + }, + ] + const newReq: Requirement = { + id: "new-1", + text: "Build authentication with OAuth", + category: "functional", + status: "pending", + messageIndex: 1, + } + const result = await resolver.resolve(newReq, existing, 1, []) + expect(result.supersedes).toEqual(["existing-1"]) + expect(result.confidence).toBe(0.6) + }) + + it("should NOT detect overlap for different topics", async () => { + const existing: Requirement[] = [ + { + id: "existing-1", + text: "Build authentication with JWT", + category: "functional", + status: "pending", + messageIndex: 0, + }, + ] + const newReq: Requirement = { + id: "new-1", + text: "Add database schema for users", + category: "functional", + status: "pending", + messageIndex: 1, + } + const result = await resolver.resolve(newReq, existing, 1, []) + expect(result.supersedes).toEqual([]) + expect(result.confidence).toBe(0.9) + }) + + it("should handle multiple existing requirements", async () => { + const existing: Requirement[] = [ + { + id: "existing-1", + text: "Build authentication with JWT", + category: "functional", + status: "pending", + messageIndex: 0, + }, + { + id: "existing-2", + text: "Add database schema", + category: "functional", + status: "pending", + messageIndex: 0, + }, + ] + const newReq: Requirement = { + id: "new-1", + text: "Build authentication with OAuth", + category: "functional", + status: "pending", + messageIndex: 1, + } + const result = await resolver.resolve(newReq, existing, 1, []) + expect(result.supersedes).toEqual(["existing-1"]) + expect(result.supersedes).not.toContain("existing-2") + }) +}) + diff --git a/src/services/self-improving/RequirementsVerifier.ts b/src/services/self-improving/RequirementsVerifier.ts new file mode 100644 index 0000000000..d2a1d1d117 --- /dev/null +++ b/src/services/self-improving/RequirementsVerifier.ts @@ -0,0 +1,301 @@ +import crypto from "crypto" +import type { Logger, Requirement, RequirementsVerificationResult, ConflictResolver } from "./types" +import { KeywordConflictResolver } from "./KeywordConflictResolver" + +export interface RequirementsVerifierConfig { + /** Whether requirements verification is mandatory (blocks completion) */ + mandatory: boolean + /** Whether to auto-extract requirements from prompt */ + autoExtract: boolean + /** Whether to require all requirements to be verified before completion */ + requireAllVerified: boolean +} + +const DEFAULT_CONFIG: RequirementsVerifierConfig = { + mandatory: true, + autoExtract: true, + requireAllVerified: true, +} + +export class RequirementsVerifier { + private config: RequirementsVerifierConfig + private requirements: Map = new Map() + private processedMessageCount = 0 + private conflictResolver: ConflictResolver + private allMessages: string[] = [] + private lastVerifyResult?: RequirementsVerificationResult + + constructor( + private readonly logger?: Logger, + config?: Partial, + conflictResolver?: ConflictResolver, + ) { + this.config = { ...DEFAULT_CONFIG, ...config } + this.conflictResolver = conflictResolver ?? new KeywordConflictResolver() + } + + /** + * Replace the conflict resolver at runtime. + */ + setConflictResolver(resolver: ConflictResolver): void { + this.conflictResolver = resolver + this.logger?.appendLine(`[RequirementsVerifier] Conflict resolver set to: ${resolver.name}`) + } + + /** + * Get the current conflict resolver. + */ + getConflictResolver(): ConflictResolver { + return this.conflictResolver + } + + updateConfig(config: Partial): void { + this.config = { ...this.config, ...config } + this.logger?.appendLine(`[RequirementsVerifier] Config updated: ${JSON.stringify(config)}`) + } + + getConfig(): RequirementsVerifierConfig { + return { ...this.config } + } + + /** + * Process ALL user messages from the session (chronological order). + * Extracts requirements from each message and resolves conflicts + * where later messages supersede earlier ones. + */ + async processUserMessages(messages: string[]): Promise { + if (messages.length === 0) return [] + + this.logger?.appendLine(`[RequirementsVerifier] Processing ${messages.length} user messages`) + + // Store all messages for conflict resolution context + this.allMessages = messages + + // Only process new messages since last call + const newMessageCount = messages.length - this.processedMessageCount + if (newMessageCount <= 0) { + return this.getAllRequirements() + } + + const messagesToProcess = messages.slice(this.processedMessageCount) + + for (let i = 0; i < messagesToProcess.length; i++) { + const globalIndex = this.processedMessageCount + i + const message = messagesToProcess[i] + const extracted = this.extractFromPrompt(message, globalIndex) + + // Run conflict resolution against existing requirements + await this.resolveConflicts(extracted, globalIndex) + + // Add new requirements + for (const req of extracted) { + this.requirements.set(req.id, req) + } + } + + this.processedMessageCount = messages.length + + const all = this.getAllRequirements() + const active = all.filter((r) => r.status !== "superseded") + this.logger?.appendLine( + `[RequirementsVerifier] ${all.length} total requirements (${active.length} active, ${all.length - active.length} superseded)`, + ) + + return all + } + + /** + * Resolve conflicts between newly extracted requirements and existing ones. + * Uses the pluggable conflict resolver to determine supersession. + */ + private async resolveConflicts(newRequirements: Requirement[], newMessageIndex: number): Promise { + const existingActive = this.getActiveRequirements() + + for (const newReq of newRequirements) { + const resolution = await this.conflictResolver.resolve(newReq, existingActive, newMessageIndex, this.allMessages) + + for (const supersededId of resolution.supersedes) { + const existing = this.requirements.get(supersededId) + if (existing && existing.status !== "superseded") { + existing.status = "superseded" + existing.supersededBy = newReq.id + newReq.supersedes = existing.id + this.logger?.appendLine( + `[RequirementsVerifier] ${this.conflictResolver.name} resolver: "${existing.text.slice(0, 60)}..." superseded by "${newReq.text.slice(0, 60)}..." (confidence: ${resolution.confidence})`, + ) + } + } + } + } + + /** + * Extract requirements from a single user message. + */ + extractFromPrompt(prompt: string, messageIndex: number = 0): Requirement[] { + const extracted: Requirement[] = [] + const lines = prompt.split("\n") + let currentCategory: Requirement["category"] = "functional" + + for (const line of lines) { + const trimmed = line.trim() + if (!trimmed) continue + + // Detect category headers + const categoryMatch = trimmed.match( + /^(?:#+\s*)?(functional|non-functional|constraint|goal|edge.case|security|compliance|performance|reliability)/i, + ) + if (categoryMatch) { + const cat = categoryMatch[1].toLowerCase().replace(/[\s-]/g, "-") + if (cat === "edge-case" || cat === "edge.case") currentCategory = "edge-case" + else if (cat === "non-functional") currentCategory = "non-functional" + else currentCategory = cat as Requirement["category"] + continue + } + + // Extract bullet points and numbered items + const itemMatch = trimmed.match(/^[-*•]\s+(.+)/) + const numMatch = trimmed.match(/^\d+[.)]\s+(.+)/) + const reqText = itemMatch?.[1] || numMatch?.[1] + + if (reqText) { + extracted.push(this.createRequirement(reqText, currentCategory, messageIndex)) + continue + } + + // Extract sentences with requirement keywords + const keywordMatch = trimmed.match( + /(?:must|should|need|require|shall|will|ensure|verify|check|validate|support|implement|add|create|build|fix|refactor)\s.+[.!]/i, + ) + if (keywordMatch && trimmed.length > 10 && trimmed.length < 500) { + extracted.push(this.createRequirement(trimmed, currentCategory, messageIndex)) + } + } + + // If no structured requirements found, treat the whole prompt as one requirement + if (extracted.length === 0 && prompt.trim().length > 0) { + extracted.push(this.createRequirement(prompt.trim(), "goal", messageIndex)) + } + + return extracted + } + + /** + * Manually add a requirement + */ + addRequirement(text: string, category: Requirement["category"] = "functional"): Requirement { + const req = this.createRequirement(text, category, this.processedMessageCount) + this.requirements.set(req.id, req) + return req + } + + /** + * Mark a requirement as verified with evidence + */ + verifyRequirement(id: string, verifiedBy: Requirement["verifiedBy"], evidence: string): boolean { + const req = this.requirements.get(id) + if (!req) return false + + req.status = "verified" + req.verifiedBy = verifiedBy + req.evidence = evidence + req.verifiedAt = Date.now() + return true + } + + /** + * Mark a requirement as failed + */ + failRequirement(id: string, evidence: string): boolean { + const req = this.requirements.get(id) + if (!req) return false + + req.status = "failed" + req.evidence = evidence + req.verifiedAt = Date.now() + return true + } + + /** + * Get all requirements (including superseded ones for audit trail) + */ + getAllRequirements(): Requirement[] { + return Array.from(this.requirements.values()) + } + + /** + * Get only active (non-superseded) requirements + */ + getActiveRequirements(): Requirement[] { + return this.getAllRequirements().filter((r) => r.status !== "superseded") + } + + /** + * Get requirements by status + */ + getRequirementsByStatus(status: Requirement["status"]): Requirement[] { + return this.getAllRequirements().filter((r) => r.status === status) + } + + /** + * Run full verification — checks only ACTIVE (non-superseded) requirements + */ + getStatus(): Record { + return { + enabled: true, + requirementCount: this.requirements.size, + activeCount: this.getActiveRequirements().length, + supersededCount: Array.from(this.requirements.values()).filter(r => r.status === 'superseded').length, + lastVerifyResult: this.lastVerifyResult, + } + } + + async verify(): Promise { + const all = this.getAllRequirements() + const active = this.getActiveRequirements() + const verified = active.filter((r) => r.status === "verified") + const failed = active.filter((r) => r.status === "failed") + const pending = active.filter((r) => r.status === "pending" || r.status === "skipped") + const superseded = all.filter((r) => r.status === "superseded") + + const passed = failed.length === 0 && (pending.length === 0 || !this.config.requireAllVerified) + + let summary: string + if (all.length === 0) { + summary = "No requirements extracted" + } else if (passed) { + summary = `${active.length} active requirements: ${verified.length} verified, ${failed.length} failed, ${pending.length} pending (${superseded.length} superseded)` + } else { + summary = `${failed.length}/${active.length} active requirements failed: ${failed.map((r) => r.text.slice(0, 80)).join("; ")}` + } + + this.lastVerifyResult = { passed, total: all.length, verified, failed, pending, summary } + + return { passed, total: all.length, verified, failed, pending, summary } + } + + /** + * Reset all requirements + */ + reset(): void { + this.requirements.clear() + this.processedMessageCount = 0 + this.allMessages = [] + } + + /** + * Get the number of processed messages + */ + getProcessedMessageCount(): number { + return this.processedMessageCount + } + + private createRequirement(text: string, category: Requirement["category"], messageIndex: number): Requirement { + return { + id: crypto.randomUUID(), + text, + category, + status: "pending", + messageIndex, + } + } +} diff --git a/src/services/self-improving/ResilienceService.ts b/src/services/self-improving/ResilienceService.ts new file mode 100644 index 0000000000..0e4383265a --- /dev/null +++ b/src/services/self-improving/ResilienceService.ts @@ -0,0 +1,332 @@ +import type { Logger } from "./types" +import type { ClassifiedError } from "./ErrorClassifier" +import { ErrorCategory } from "./ErrorClassifier" +import type { CodeIndexAdapter } from "./CodeIndexAdapter" +import type { VectorStoreSearchResult } from "../code-index/interfaces/vector-store" + +export interface ResilienceConfig { + enabled: boolean + maxRetries: number + baseDelayMs: number + maxDelayMs: number + jitterFactor: number + autoRecover: boolean + recoveryCommands: string[] + persistState: boolean +} + +export interface RecoveryState { + consecutiveFailures: number + lastFailureType: string | null + lastFailureTime: number | null + lastSuccessfulTool: string | null + recoveryAttempts: number + isInRecoveryMode: boolean +} + +const DEFAULT_CONFIG: ResilienceConfig = { + enabled: true, + maxRetries: 5, + baseDelayMs: 2000, + maxDelayMs: 60000, + jitterFactor: 0.1, + autoRecover: true, + recoveryCommands: [ + "break down the task into smaller steps", + "simplify the approach", + "try a different strategy", + "verify tool parameters before calling", + ], + persistState: true, +} + +export class ResilienceService { + private logger: Logger + private config: ResilienceConfig + private state: RecoveryState + private codeIndexAdapter: CodeIndexAdapter | undefined + + constructor(logger: Logger, config?: Partial) { + this.logger = logger + this.config = { ...DEFAULT_CONFIG, ...config } + this.state = this.getInitialState() + } + + setCodeIndexAdapter(adapter: CodeIndexAdapter | undefined): void { + this.codeIndexAdapter = adapter + } + + getConfig(): ResilienceConfig { + return { ...this.config } + } + + updateConfig(updates: Partial): void { + this.config = { ...this.config, ...updates } + this.logger.appendLine(`[Resilience] Config updated: ${JSON.stringify(updates)}`) + } + + getState(): RecoveryState { + return { ...this.state } + } + + /** + * Called when a "having trouble" or streaming failure occurs. + * Returns the delay in ms before the next retry, or -1 if max retries exceeded. + */ + onStreamingFailure(): number { + if (!this.config.enabled) { + return -1 + } + + this.state.consecutiveFailures++ + this.state.lastFailureType = "streaming_failed" + this.state.lastFailureTime = Date.now() + this.state.isInRecoveryMode = true + + if (this.state.consecutiveFailures > this.config.maxRetries) { + this.logger.appendLine( + `[Resilience] Max retries (${this.config.maxRetries}) exceeded. Entering recovery mode.`, + ) + return -1 // Signal to enter recovery mode + } + + const delay = this.calculateBackoff(this.state.consecutiveFailures) + this.logger.appendLine( + `[Resilience] Streaming failure #${this.state.consecutiveFailures}. Retrying in ${delay}ms.`, + ) + return delay + } + + /** + * Called when a tool parameter validation error occurs (e.g., missing required parameter). + * Returns a recovery action suggestion or null. + */ + onToolParameterError( + toolName: string, + missingParam: string, + ): { action: "retry" | "recover" | "abort"; delay?: number; suggestion?: string } | null { + if (!this.config.enabled) { + return null + } + + this.state.consecutiveFailures++ + this.state.lastFailureType = "tool_parameter_error" + this.state.lastFailureTime = Date.now() + this.state.lastSuccessfulTool = toolName + + this.logger.appendLine( + `[Resilience] Tool parameter error: ${toolName} missing '${missingParam}'. Failure #${this.state.consecutiveFailures}.`, + ) + + // Record this as a learning event for the self-improving system + this.recordToolError(toolName, missingParam) + + if (this.state.consecutiveFailures > this.config.maxRetries) { + return { + action: "abort", + suggestion: `Tool ${toolName} repeatedly missing required parameter '${missingParam}'`, + } + } + + const delay = this.calculateBackoff(this.state.consecutiveFailures) + return { + action: "retry", + delay, + suggestion: `Ensure '${missingParam}' parameter is provided when calling ${toolName}`, + } + } + + /** + * Check if the streaming failure is due to a large response (not model error). + * Large responses occur when the model tries to deliver a comprehensive result + * that exceeds API limits — this is not a model error and should not trigger recovery. + */ + isLargeResponseFailure(error: string): boolean { + const largeResponseIndicators = [ + "response too large", + "response too long", + "max_tokens", + "maximum context length", + "too many tokens", + "content too large", + "stream.*timeout", + "timeout.*stream", + "413", + "payload too large", + ] + return largeResponseIndicators.some((indicator) => new RegExp(indicator, "i").test(error)) + } + + /** + * Handle a large response failure — suggest shortening instead of triggering recovery. + * Does NOT increment consecutiveFailures since this isn't a model error. + */ + onLargeResponseFailure(): string { + return "The response was too large. Shorten the response and try again. Consider summarizing or splitting into smaller chunks." + } + + /** + * Called when the model is attempting to deliver a final result (attempt_completion). + * Resets recovery state to prevent false positive recovery from large response failures. + */ + onDeliveryAttempt(): void { + this.state.consecutiveFailures = 0 + this.state.isInRecoveryMode = false + this.state.recoveryAttempts = 0 + } + + /** + * Called when a task succeeds — resets recovery state. + */ + onTaskSuccess(): void { + if (this.state.consecutiveFailures > 0) { + this.logger.appendLine( + `[Resilience] Task succeeded after ${this.state.consecutiveFailures} failures. Resetting state.`, + ) + } + this.state = this.getInitialState() + } + + /** + * Get a recovery command suggestion based on current state. + */ + getRecoverySuggestion(): string { + if (!this.state.isInRecoveryMode) { + return "" + } + + const index = Math.min(this.state.recoveryAttempts, this.config.recoveryCommands.length - 1) + this.state.recoveryAttempts++ + + const suggestion = this.config.recoveryCommands[index] ?? this.config.recoveryCommands[0] + return suggestion + } + + /** + * Check if the system is in recovery mode. + */ + isInRecoveryMode(): boolean { + return this.state.isInRecoveryMode + } + + /** + * Exit recovery mode (called when a task succeeds after recovery). + */ + exitRecoveryMode(): void { + this.state.isInRecoveryMode = false + this.state.recoveryAttempts = 0 + this.logger.appendLine("[Resilience] Exited recovery mode.") + } + + /** + * Record a tool error for the self-improving system to learn from. + */ + private recordToolError(toolName: string, missingParam: string): void { + this.logger.appendLine(`[Resilience] Recording tool error for learning: ${toolName}.${missingParam}`) + } + + /** + * Calculate exponential backoff with jitter. + */ + private calculateBackoff(attempt: number): number { + const exponentialDelay = Math.min(this.config.baseDelayMs * Math.pow(2, attempt - 1), this.config.maxDelayMs) + const jitter = exponentialDelay * this.config.jitterFactor * Math.random() + return Math.floor(exponentialDelay + jitter) + } + + private getInitialState(): RecoveryState { + return { + consecutiveFailures: 0, + lastFailureType: null, + lastFailureTime: null, + lastSuccessfulTool: null, + recoveryAttempts: 0, + isInRecoveryMode: false, + } + } + + /** + * Format a single VectorStoreSearchResult into a human-readable context line. + */ + private formatSearchResult(result: VectorStoreSearchResult): string { + const filePath = result.payload?.filePath ?? String(result.id) + const startLine = result.payload?.startLine + const endLine = result.payload?.endLine + const snippet = result.payload?.codeChunk + const lineRange = + startLine !== undefined && endLine !== undefined + ? ` (lines ${startLine}-${endLine})` + : startLine !== undefined + ? ` (line ${startLine})` + : "" + const snippetStr = snippet ? `: ${snippet.slice(0, 200).replace(/\n/g, " ")}` : "" + return `- ${filePath}${lineRange}${snippetStr}` + } + + /** + * Generate a recovery context block based on the classified error and original message. + * For MODEL_THOUGHT_FAILURE with recoveryAction "break_down_task", queries the code index + * for relevant context and injects a contextual guidance block. + * Non-blocking — returns original message on any error or when no enrichment is needed. + * Gated behind recoveryContext experiment flag. + */ + async generateRecoveryContext( + classifiedError: ClassifiedError, + originalMessage: string, + experiments?: Record, + ): Promise { + // Only enrich for MODEL_THOUGHT_FAILURE with break_down_task recovery + if ( + classifiedError.category !== ErrorCategory.MODEL_THOUGHT_FAILURE || + classifiedError.recoveryAction !== "break_down_task" + ) { + return originalMessage + } + + // Check experiment gate + if (experiments?.recoveryContext === false) { + return originalMessage + } + + // Try to enrich with code index context + if (this.codeIndexAdapter?.isAvailable()) { + try { + const results = await this.codeIndexAdapter.searchVectorStore(originalMessage) + if (results && results.length > 0) { + const contextLines = results.map((r) => this.formatSearchResult(r)) + const contextBlock = [ + "[Context Recovery] The previous attempt failed. Here is relevant context from the codebase to help:", + ...contextLines, + ].join("\n") + + this.logger.appendLine( + `[Resilience] Recovery context generated: ${results.length} code index results`, + ) + return `${originalMessage}\n\n${contextBlock}` + } + } catch (error) { + // Graceful fallback — log and return original message + this.logger.appendLine( + `[Resilience] Recovery context generation error: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + + // Fallback: inject a simpler contextual guidance block without code index + const fallbackGuidance = + "[Context Recovery] The previous attempt failed. Consider breaking the task into smaller, more focused steps. Try using a simpler approach or different tool." + return `${originalMessage}\n\n${fallbackGuidance}` + } + + getStatus(): Record { + return { + enabled: this.config.enabled, + maxRetries: this.config.maxRetries, + autoRecover: this.config.autoRecover, + consecutiveFailures: this.state.consecutiveFailures, + isInRecoveryMode: this.state.isInRecoveryMode, + lastFailureType: this.state.lastFailureType, + recoveryAttempts: this.state.recoveryAttempts, + } + } +} diff --git a/src/services/self-improving/ReviewPromptFactory.ts b/src/services/self-improving/ReviewPromptFactory.ts new file mode 100644 index 0000000000..016efe1ec9 --- /dev/null +++ b/src/services/self-improving/ReviewPromptFactory.ts @@ -0,0 +1,111 @@ +/** + * Review type + */ +export type ReviewType = "memory" | "skill" | "combined" + +/** + * Review prompt result + */ +export interface ReviewPrompt { + type: ReviewType + systemPrompt: string + userPrompt: string +} + +/** + * ReviewPromptFactory — generates structured review prompts. + */ +export class ReviewPromptFactory { + createMemoryReviewPrompt(transcriptSummary: string): ReviewPrompt { + return { + type: "memory", + systemPrompt: `You are a memory review specialist. Your task is to review the recent conversation transcript and identify durable facts that should be saved to long-term memory. + +## Guidelines +- Save facts that are likely to be useful across multiple sessions +- Save user preferences, project conventions, and environment details +- Do NOT save transient information (one-off commands, temporary errors) +- Do NOT save information that is already in memory +- Prefer concise, actionable facts over verbose descriptions +- Each fact should be a single, clear statement + +## Output Format +For each fact you want to save, output: +FACT: +CATEGORY: +REASON: `, + userPrompt: `Review the following conversation transcript and identify durable facts to save to memory. + +${transcriptSummary} + +Output your recommendations following the specified format.`, + } + } + + createSkillReviewPrompt(transcriptSummary: string): ReviewPrompt { + return { + type: "skill", + systemPrompt: `You are a skill review specialist. Your task is to review the recent conversation transcript and identify reusable procedures that should be saved as skills. + +## Guidelines +- Create skills for procedures that are repeated or likely to be repeated +- Update existing skills if the transcript reveals improvements +- Prefer class-level skills over one-off task narratives +- Avoid creating skills for transient environment failures +- Each skill should have a clear, single responsibility +- Support files (scripts, templates) should be separate from the main skill definition + +## Priority Order +1. Update an existing loaded skill if the transcript reveals improvements +2. Create an umbrella skill that groups related procedures +3. Add a support file (script, template) to an existing skill +4. Create a new standalone skill + +## Output Format +For each skill action, output: +ACTION: +SKILL_NAME: +DESCRIPTION: +CONTENT: +REASON: `, + userPrompt: `Review the following conversation transcript and identify reusable procedures to save as skills. + +${transcriptSummary} + +Output your recommendations following the specified format.`, + } + } + + createCombinedReviewPrompt(transcriptSummary: string): ReviewPrompt { + return { + type: "combined", + systemPrompt: `You are a self-improvement review specialist. Your task is to review the recent conversation transcript and identify both durable facts (memory) and reusable procedures (skills). + +## Memory Guidelines +- Memory is for facts: user preferences, project conventions, environment details +- Save facts that are durable and useful across sessions +- Each fact should be concise and actionable + +## Skill Guidelines +- Skills are for procedures: repeatable workflows, command sequences, code patterns +- Create skills for procedures likely to be repeated +- Update existing skills with improvements from the transcript +- Prefer class-level skills over one-off narratives + +## Output Format +For memory facts: +MEMORY_FACT: +MEMORY_CATEGORY: + +For skill actions: +SKILL_ACTION: +SKILL_NAME: +SKILL_CONTENT: `, + userPrompt: `Review the following conversation transcript and identify both durable facts (memory) and reusable procedures (skills). + +${transcriptSummary} + +Output your recommendations following the specified format.`, + } + } +} diff --git a/src/services/self-improving/ReviewTeamService.ts b/src/services/self-improving/ReviewTeamService.ts new file mode 100644 index 0000000000..5116852228 --- /dev/null +++ b/src/services/self-improving/ReviewTeamService.ts @@ -0,0 +1,675 @@ +import * as fs from "fs/promises" +import * as path from "path" + +import { safeWriteJson } from "../../utils/safeWriteJson" +import type { Experiments, LearnedPattern, ImprovementAction } from "./types" +import type { Logger } from "./types" +import type { CodeIndexManager } from "../code-index/manager" + +export interface ReviewTeamConfig { + enabled: boolean + innovatorWeight: number // default 0.3 + contrarianWeight: number // default 0.3 + devilsAdvocateWeight: number // default 0.3 + deciderThreshold: number // default 0.6 — minimum weighted score to pass + requireUnanimous: boolean // default false — if true, all must approve + minConfidenceForReview: number // default 0.2 — skip review for very low confidence + storageBasePath?: string // path for persisting counts + getExperiments?: () => Experiments | undefined +} + +export interface ReviewVerdict { + approved: boolean + score: number + innovatorVote: VoteResult + contrarianVote: VoteResult + devilsAdvocateVote: VoteResult + deciderVote: VoteResult + summary: string + timestamp: Date +} + +export interface VoteResult { + approved: boolean + confidence: number // 0-1 + reasoning: string +} + +const DEFAULT_CONFIG: ReviewTeamConfig = { + enabled: true, + innovatorWeight: 0.3, + contrarianWeight: 0.3, + devilsAdvocateWeight: 0.3, + deciderThreshold: 0.6, + requireUnanimous: false, + minConfidenceForReview: 0.2, +} + +interface PersistedCounts { + approvedPatternCount: number + approvedActionCount: number +} + +const COUNTS_FILE = "review-team-counts.json" + +export class ReviewTeamService { + private logger: Logger + private config: ReviewTeamConfig + private approvedPatternCount: number = 0 + private approvedActionCount: number = 0 + private initialized = false + private initPromise: Promise | null = null + private codeIndexManager: CodeIndexManager | undefined + private readonly COLD_START_GRACE_ACTIONS = 20 + private actionCount = 0 + + constructor(logger: Logger, config?: Partial) { + this.logger = logger + this.config = { ...DEFAULT_CONFIG, ...config } + } + + /** + * Set the CodeIndexManager instance for vector-search-based pattern evidence lookup. + */ + setCodeIndexManager(manager: CodeIndexManager | undefined): void { + this.codeIndexManager = manager + } + + /** + * Initialize the service — load persisted counts if storage is configured + * and the SELF_IMPROVING_PERSIST_COUNTS experiment is enabled. + */ + async initialize(): Promise { + if (this.initialized) { + return + } + + if (!this.initPromise) { + this.initPromise = this.doInitialize() + } + + await this.initPromise + } + + private async doInitialize(): Promise { + try { + const experiments = this.config.getExperiments?.() + if (experiments?.selfImprovingPersistCounts === false) { + this.initialized = true + return + } + + if (!this.config.storageBasePath) { + this.initialized = true + return + } + + const countsPath = path.join(this.config.storageBasePath, COUNTS_FILE) + try { + const data = await fs.readFile(countsPath, "utf-8") + const parsed = JSON.parse(data) as PersistedCounts + this.approvedPatternCount = parsed.approvedPatternCount ?? 0 + this.approvedActionCount = parsed.approvedActionCount ?? 0 + this.logger.appendLine( + `[ReviewTeamService] Loaded counts: ${this.approvedPatternCount} patterns, ${this.approvedActionCount} actions`, + ) + } catch { + // File doesn't exist yet — start from zero + this.logger.appendLine("[ReviewTeamService] No persisted counts found, starting fresh") + } + } catch (error) { + this.logger.appendLine( + `[ReviewTeamService] Failed to load counts: ${error instanceof Error ? error.message : String(error)}`, + ) + } finally { + this.initialized = true + this.initPromise = null + } + } + + /** + * Persist current counts to disk if storage is configured. + */ + private async persistCounts(): Promise { + const experiments = this.config.getExperiments?.() + if (experiments?.selfImprovingPersistCounts === false) { + return + } + + if (!this.config.storageBasePath) { + return + } + + try { + const countsPath = path.join(this.config.storageBasePath, COUNTS_FILE) + const data: PersistedCounts = { + approvedPatternCount: this.approvedPatternCount, + approvedActionCount: this.approvedActionCount, + } + await safeWriteJson(countsPath, data) + } catch (error) { + this.logger.appendLine( + `[ReviewTeamService] Failed to persist counts: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + + getApprovedPatternCount(): number { + return this.approvedPatternCount + } + + setApprovedPatternCount(count: number): void { + this.approvedPatternCount = count + this.persistCounts() + } + + getApprovedActionCount(): number { + return this.approvedActionCount + } + + setApprovedActionCount(count: number): void { + this.approvedActionCount = count + this.persistCounts() + } + + getConfig(): ReviewTeamConfig { + return { ...this.config } + } + + updateConfig(updates: Partial): void { + this.config = { ...this.config, ...updates } + this.logger.appendLine(`[ReviewTeam] Config updated: ${JSON.stringify(updates)}`) + } + + /** + * Review a single pattern through all 4 personas. + */ + async reviewPattern(pattern: LearnedPattern): Promise { + if (!this.config.enabled) { + return this.passThroughVerdict("Review team disabled") + } + + if ((pattern.confidenceScore ?? 0) < this.config.minConfidenceForReview) { + return this.passThroughVerdict( + `Confidence ${pattern.confidenceScore} below threshold ${this.config.minConfidenceForReview}`, + ) + } + + const innovatorVote = this.innovatorReview(pattern) + const contrarianVote = this.contrarianReview(pattern) + const devilsAdvocateVote = this.devilsAdvocateReview(pattern) + const deciderVote = this.deciderReview(pattern, [innovatorVote, contrarianVote, devilsAdvocateVote]) + + const weightedScore = this.calculateWeightedScore([innovatorVote, contrarianVote, devilsAdvocateVote]) + // Lower threshold for new patterns to avoid chicken-and-egg problem + const threshold = pattern.frequency < 3 ? 0.4 : this.config.deciderThreshold + // First-pattern boost: when no patterns have ever been approved, lower threshold + // to seed the system and break the chicken-and-egg problem + const isFirstPattern = this.approvedPatternCount === 0 + const effectiveThreshold = isFirstPattern ? Math.min(threshold, 0.3) : threshold + // Also override decider for first pattern — decider's 0.5 threshold is too high for cold-start + const effectiveDeciderApproved = isFirstPattern ? weightedScore >= effectiveThreshold : deciderVote.approved + const approved = this.config.requireUnanimous + ? innovatorVote.approved && contrarianVote.approved && devilsAdvocateVote.approved && deciderVote.approved + : weightedScore >= effectiveThreshold && effectiveDeciderApproved + + if (approved) { + this.approvedPatternCount++ + } + + const summary = this.generateSummary(pattern, approved, weightedScore, [ + innovatorVote, + contrarianVote, + devilsAdvocateVote, + deciderVote, + ]) + + return { + approved, + score: weightedScore, + innovatorVote, + contrarianVote, + devilsAdvocateVote, + deciderVote, + summary, + timestamp: new Date(), + } + } + + /** + * Review multiple patterns and return only approved ones. + */ + async reviewPatterns(patterns: LearnedPattern[]): Promise<{ + approved: LearnedPattern[] + rejected: LearnedPattern[] + verdicts: ReviewVerdict[] + }> { + const approved: LearnedPattern[] = [] + const rejected: LearnedPattern[] = [] + const verdicts: ReviewVerdict[] = [] + + for (const pattern of patterns) { + const verdict = await this.reviewPattern(pattern) + verdicts.push(verdict) + if (verdict.approved) { + approved.push(pattern) + } else { + rejected.push(pattern) + } + } + + this.logger.appendLine( + `[ReviewTeam] Reviewed ${patterns.length} patterns: ${approved.length} approved, ${rejected.length} rejected`, + ) + + return { approved, rejected, verdicts } + } + + /** + * Review a single action through all 4 personas. + */ + async reviewAction(action: ImprovementAction): Promise { + if (!this.config.enabled) { + return this.passThroughVerdict("Review team disabled") + } + + const innovatorVote = this.innovatorReviewAction(action) + const contrarianVote = this.contrarianReviewAction(action) + const devilsAdvocateVote = this.devilsAdvocateReviewAction(action) + const deciderVote = this.deciderReviewAction(action, [innovatorVote, contrarianVote, devilsAdvocateVote]) + + const weightedScore = this.calculateWeightedScore([innovatorVote, contrarianVote, devilsAdvocateVote]) + // Cold-start mitigation: when no actions have ever been approved, lower threshold + // to seed the system and break the chicken-and-egg problem. + // Max weighted score ~0.533 — can't reach default 0.6 threshold without boost. + const isFirstAction = this.approvedActionCount === 0 + let effectiveThreshold = isFirstAction + ? Math.min(this.config.deciderThreshold, 0.3) + : this.config.deciderThreshold + // Broader cold-start grace period: first 20 actions get a lower threshold + // to avoid excessive rejection during system warm-up + if (this.actionCount < this.COLD_START_GRACE_ACTIONS) { + effectiveThreshold = Math.max(effectiveThreshold * 0.5, 0.3) + } + this.actionCount++ + // Override decider for first action — decider's 0.5 threshold is too high for cold-start + const effectiveDeciderApproved = isFirstAction ? weightedScore >= effectiveThreshold : deciderVote.approved + const approved = this.config.requireUnanimous + ? innovatorVote.approved && contrarianVote.approved && devilsAdvocateVote.approved && deciderVote.approved + : weightedScore >= effectiveThreshold && effectiveDeciderApproved + + if (approved) { + this.approvedActionCount++ + } else { + this.logger.appendLine( + `[ReviewTeam] Rejected action: ${action.actionType} (score: ${weightedScore.toFixed(2)}, threshold: ${effectiveThreshold.toFixed(2)})`, + ) + } + + const summary = `Action ${action.actionType} (${action.id}): ${approved ? "APPROVED" : "REJECTED"} (score: ${(weightedScore * 100).toFixed(0)}%)` + + return { + approved, + score: weightedScore, + innovatorVote, + contrarianVote, + devilsAdvocateVote, + deciderVote, + summary, + timestamp: new Date(), + } + } + + /** + * Review multiple actions and return only approved ones. + */ + async reviewActions(actions: ImprovementAction[]): Promise<{ + approved: ImprovementAction[] + rejected: ImprovementAction[] + verdicts: ReviewVerdict[] + }> { + const approved: ImprovementAction[] = [] + const rejected: ImprovementAction[] = [] + const verdicts: ReviewVerdict[] = [] + + for (const action of actions) { + const verdict = await this.reviewAction(action) + verdicts.push(verdict) + if (verdict.approved) { + approved.push(action) + } else { + rejected.push(action) + } + } + + this.logger.appendLine( + `[ReviewTeam] Reviewed ${actions.length} actions: ${approved.length} approved, ${rejected.length} rejected`, + ) + + return { approved, rejected, verdicts } + } + + /** + * Search for similar approved patterns using vector search. + * Finds evidence of "has this pattern worked before?" to inform persona scores. + * Gated behind selfImprovingCodeIndex experiment flag. + */ + async searchSimilarApprovedPatterns(pattern: LearnedPattern): Promise> { + const experiments = this.config.getExperiments?.() + if (experiments?.selfImprovingCodeIndex === false) { + return [] + } + + if (!this.codeIndexManager) { + return [] + } + + try { + const query = [ + pattern.summary, + ...(pattern.context?.toolNames ?? []), + ...(pattern.context?.errorKeys ?? []), + ] + .filter(Boolean) + .join(" ") + + const results = await this.codeIndexManager.searchIndex(query) + if (!results || results.length === 0) { + return [] + } + + return results + .filter((r) => r.payload?.codeChunk) + .map((r) => ({ + summary: r.payload?.codeChunk?.slice(0, 200) ?? "", + score: r.score, + })) + } catch (error) { + this.logger.appendLine( + `[ReviewTeamService] searchSimilarApprovedPatterns error: ${error instanceof Error ? error.message : String(error)}`, + ) + return [] + } + } + + // ===== INNOVATOR ===== + + private innovatorReview(pattern: LearnedPattern): VoteResult { + let score = 0.5 // neutral start + const reasons: string[] = [] + + // Novel tool combinations are innovative + if (pattern.context?.toolNames && pattern.context.toolNames.length >= 3) { + score += 0.2 + reasons.push("Novel multi-tool combination") + } + + // High success rate with new patterns + if ((pattern.successRate ?? 0) > 0.8 && (pattern.frequency ?? 0) >= 3) { + score += 0.15 + reasons.push("High success rate with sufficient frequency") + } + + // Error patterns with clear avoidance strategies + if (pattern.patternType === "error" && pattern.context?.errorKeys && pattern.context.errorKeys.length > 0) { + score += 0.1 + reasons.push("Actionable error avoidance pattern") + } + + // Low frequency patterns need more evidence + if ((pattern.frequency ?? 0) < 2) { + score -= 0.05 + reasons.push("Low frequency — needs more evidence") + } + + // Very low confidence + if ((pattern.confidenceScore ?? 0) < 0.2) { + score -= 0.2 + reasons.push("Very low confidence score") + } + + return { + approved: score >= 0.5, + confidence: Math.max(0, Math.min(1, score)), + reasoning: reasons.length > 0 ? reasons.join("; ") : "No strong signals", + } + } + + private innovatorReviewAction(action: ImprovementAction): VoteResult { + let score = 0.5 + const reasons: string[] = [] + + if (action.actionType === "SKILL_CREATE" || action.actionType === "SKILL_CREATE_FROM_SCRATCH") { + score += 0.15 + reasons.push("Skill creation enables reusable knowledge") + } + if (action.actionType === "PROMPT_ENRICHMENT") { + score += 0.1 + reasons.push("Prompt enrichment improves future interactions") + } + + return { + approved: score >= 0.5, + confidence: Math.max(0, Math.min(1, score)), + reasoning: reasons.length > 0 ? reasons.join("; ") : "Standard action", + } + } + + // ===== CONTRARIAN ===== + + private contrarianReview(pattern: LearnedPattern): VoteResult { + let score = 0.5 // neutral start + const reasons: string[] = [] + + // Low frequency patterns might be coincidental + if ((pattern.frequency ?? 0) < 3) { + score -= 0.05 + reasons.push("Low frequency — may be coincidental") + } + + // Very high confidence with low frequency is suspicious + if ((pattern.confidenceScore ?? 0) > 0.8 && (pattern.frequency ?? 0) < 5) { + score -= 0.2 + reasons.push("High confidence with low frequency — possible overfitting") + } + + // Single tool patterns might be too narrow + if (pattern.context?.toolNames && pattern.context.toolNames.length === 1) { + score -= 0.1 + reasons.push("Single tool pattern — may be too narrow") + } + + // Error patterns with no error keys lack specificity + if ( + pattern.patternType === "error" && + (!pattern.context?.errorKeys || pattern.context.errorKeys.length === 0) + ) { + score -= 0.15 + reasons.push("Error pattern without specific error keys") + } + + // High frequency with good confidence is reliable + if ((pattern.frequency ?? 0) >= 5 && (pattern.confidenceScore ?? 0) >= 0.5) { + score += 0.2 + reasons.push("High frequency with good confidence — reliable pattern") + } + + return { + approved: score >= 0.5, + confidence: Math.max(0, Math.min(1, score)), + reasoning: reasons.length > 0 ? reasons.join("; ") : "No strong concerns", + } + } + + private contrarianReviewAction(action: ImprovementAction): VoteResult { + let score = 0.5 + const reasons: string[] = [] + + if (action.actionType === "SKILL_CREATE" && !("content" in action.payload)) { + score -= 0.2 + reasons.push("Skill creation without content — may be premature") + } + if (action.actionType === "SKILL_CREATE_FROM_SCRATCH" && !("instructions" in action.payload)) { + score -= 0.2 + reasons.push("SKILL_CREATE_FROM_SCRATCH without instructions — may be incomplete") + } + if (action.actionType === "ERROR_AVOIDANCE" && !("errorKey" in action.payload)) { + score -= 0.15 + reasons.push("Error avoidance without specific error key") + } + + return { + approved: score >= 0.5, + confidence: Math.max(0, Math.min(1, score)), + reasoning: reasons.length > 0 ? reasons.join("; ") : "No strong concerns", + } + } + + // ===== DEVIL'S ADVOCATE ===== + + private devilsAdvocateReview(pattern: LearnedPattern): VoteResult { + let score = 0.5 // neutral start + const reasons: string[] = [] + + // Check for potential negative side effects + if (pattern.patternType === "tool" && pattern.context?.toolNames) { + const tools = pattern.context.toolNames + // Writing without reading first is risky + if (tools.includes("edit_file") && !tools.includes("read_file") && !tools.includes("search_files")) { + score -= 0.2 + reasons.push("Edit without prior read — risk of incorrect changes") + } + // Command execution without safety checks + if (tools.includes("execute_command") && !tools.includes("read_file")) { + score -= 0.1 + reasons.push("Command execution without context verification") + } + } + + // Stale patterns may be outdated + if (pattern.state === "stale") { + score -= 0.2 + reasons.push("Stale pattern — may be outdated") + } + + // Archived patterns should not be used + if (pattern.state === "archived") { + score -= 0.3 + reasons.push("Archived pattern — should not be applied") + } + + // Very high success rate with low frequency might be lucky + if ((pattern.successRate ?? 0) > 0.95 && (pattern.frequency ?? 0) < 5) { + score -= 0.15 + reasons.push("Near-perfect success with low frequency — may be lucky") + } + + return { + approved: score >= 0.5, + confidence: Math.max(0, Math.min(1, score)), + reasoning: reasons.length > 0 ? reasons.join("; ") : "No edge case concerns", + } + } + + private devilsAdvocateReviewAction(action: ImprovementAction): VoteResult { + let score = 0.5 + const reasons: string[] = [] + + if (action.actionType === "SKILL_CREATE") { + score -= 0.1 + reasons.push("Skill creation modifies agent behavior — verify necessity") + } + if (action.actionType === "ERROR_AVOIDANCE") { + score += 0.1 + reasons.push("Error avoidance is low-risk, high-value") + } + + return { + approved: score >= 0.5, + confidence: Math.max(0, Math.min(1, score)), + reasoning: reasons.length > 0 ? reasons.join("; ") : "No edge case concerns", + } + } + + // ===== THE DECIDER ===== + + private deciderReview(pattern: LearnedPattern, votes: VoteResult[]): VoteResult { + const avgScore = votes.reduce((sum, v) => sum + v.confidence, 0) / votes.length + const approvals = votes.filter((v) => v.approved).length + const reasons: string[] = [] + + // Unanimous approval + if (approvals === votes.length) { + reasons.push("Unanimous approval from all reviewers") + } + + // Split decision + if (approvals === 2 && votes.length === 3) { + reasons.push("Split decision — majority approves") + } + + // Majority rejection + if (approvals <= 1) { + reasons.push("Majority recommends rejection") + } + + // High confidence override + if (avgScore >= 0.8) { + reasons.push("High average confidence — overriding concerns") + } + + // Low confidence override + if (avgScore < 0.3) { + reasons.push("Very low average confidence — recommending rejection") + } + + return { + approved: avgScore >= 0.5, + confidence: avgScore, + reasoning: reasons.length > 0 ? reasons.join("; ") : "Standard review", + } + } + + private deciderReviewAction(action: ImprovementAction, votes: VoteResult[]): VoteResult { + return this.deciderReview({} as LearnedPattern, votes) + } + + private calculateWeightedScore(votes: VoteResult[]): number { + const weights = [this.config.innovatorWeight, this.config.contrarianWeight, this.config.devilsAdvocateWeight] + let totalWeight = 0 + let weightedSum = 0 + + for (let i = 0; i < votes.length; i++) { + const w = weights[i] ?? 0 + weightedSum += votes[i].confidence * w + totalWeight += w + } + + return totalWeight > 0 ? weightedSum / totalWeight : 0.5 + } + + private generateSummary(pattern: LearnedPattern, approved: boolean, score: number, votes: VoteResult[]): string { + const status = approved ? "APPROVED" : "REJECTED" + const voteSummary = votes + .map((v, i) => { + const names = ["Innovator", "Contrarian", "Devil's Advocate", "The Decider"] + return `${names[i]}: ${v.approved ? "✅" : "❌"} (${(v.confidence * 100).toFixed(0)}%)` + }) + .join(" | ") + + return `[${status}] Pattern ${pattern.id} (${pattern.patternType}): score=${(score * 100).toFixed(0)}% | ${voteSummary}` + } + + private passThroughVerdict(reason: string): ReviewVerdict { + return { + approved: true, + score: 1.0, + innovatorVote: { approved: true, confidence: 1.0, reasoning: reason }, + contrarianVote: { approved: true, confidence: 1.0, reasoning: reason }, + devilsAdvocateVote: { approved: true, confidence: 1.0, reasoning: reason }, + deciderVote: { approved: true, confidence: 1.0, reasoning: reason }, + summary: `Pass-through: ${reason}`, + timestamp: new Date(), + } + } +} diff --git a/src/services/self-improving/SelfImprovingManager.ts b/src/services/self-improving/SelfImprovingManager.ts new file mode 100644 index 0000000000..c31cebb477 --- /dev/null +++ b/src/services/self-improving/SelfImprovingManager.ts @@ -0,0 +1,1066 @@ +import path from "path" +import crypto from "crypto" + +import type { + CodeIndexInfo, + Experiments, + ImprovementAction, + LearnedPattern, + LearningEvent, + Logger, + PromptContext, + SelfImprovingManagerOptions, + SelfImprovingScope, + TaskEventInfo, +} from "./types" +import { experimentDefault } from "../../shared/experiments" +import { LearningStore } from "./LearningStore" +import { FeedbackCollector } from "./FeedbackCollector" +import { PatternAnalyzer } from "./PatternAnalyzer" +import { ImprovementApplier } from "./ImprovementApplier" +import { CodeIndexAdapter } from "./CodeIndexAdapter" +import type { MemoryBackend } from "./MemoryBackend" +import { MemoryBackendFactory } from "./MemoryBackendFactory" +import { MemoryStore } from "./MemoryStore" +import { SkillUsageStore } from "./SkillUsageStore" +import { ActionExecutor } from "./ActionExecutor" +import { CuratorService } from "./CuratorService" +import type { CuratorReport } from "./CuratorService" +import { ReviewPromptFactory } from "./ReviewPromptFactory" +import { TranscriptRecall } from "./TranscriptRecall" +import { InsightsEngine } from "./InsightsEngine" +import { AutoModeOrchestrator } from "./AutoModeOrchestrator" +import type { AutoModeConfig } from "./AutoModeOrchestrator" +import { ModeFactoryService } from "./ModeFactoryService" +import type { InsightsReport } from "./InsightsEngine" +import { ReviewTeamService } from "./ReviewTeamService" +import type { ReviewTeamConfig } from "./ReviewTeamService" +import { QuestionEvaluatorService } from "./QuestionEvaluatorService" +import { ResilienceService } from "./ResilienceService" +import { VerificationEngine } from "./VerificationEngine" +import { RequirementsVerifier } from "./RequirementsVerifier" +import { ToolErrorHealer } from "./ToolErrorHealer" +import { PreventionEngine } from "./PreventionEngine" +import type { CodeIndexManager } from "../code-index/manager" + +const SELF_IMPROVING_EXPERIMENT_ID = "selfImproving" +const REVIEW_CHECK_INTERVAL_MS = 60_000 + +type Runtime = { + store: LearningStore + feedbackCollector: FeedbackCollector + patternAnalyzer: PatternAnalyzer + improvementApplier: ImprovementApplier + codeIndexAdapter: CodeIndexAdapter +} + +export class SelfImprovingManager { + private readonly globalStoragePath: string + private readonly logger: Logger + private readonly getExperiments: () => Experiments | undefined + private readonly getMemoryBackend: SelfImprovingManagerOptions["getMemoryBackend"] + private readonly getAgentMemoryUrl: SelfImprovingManagerOptions["getAgentMemoryUrl"] + private readonly getSelfImprovingScope: SelfImprovingManagerOptions["getSelfImprovingScope"] + private readonly getAutoSkillsScope: SelfImprovingManagerOptions["getAutoSkillsScope"] + private readonly getWorkspacePath: SelfImprovingManagerOptions["getWorkspacePath"] + private readonly curatorConfig: SelfImprovingManagerOptions["curatorConfig"] + private readonly skillsManager: SelfImprovingManagerOptions["skillsManager"] + public memoryStore: MemoryBackend + public skillUsageStore: SkillUsageStore + public curatorService: CuratorService + public readonly reviewPromptFactory: ReviewPromptFactory + public transcriptRecall: TranscriptRecall + public readonly insightsEngine: InsightsEngine + private actionExecutor: ActionExecutor + private memoryBackendType: "builtin" | "agentmemory" + private agentMemoryUrl: string | undefined + private storageBasePath: string + private autoModeOrchestrator: AutoModeOrchestrator + private modeFactory: ModeFactoryService + private reviewTeam: ReviewTeamService + public questionEvaluator: QuestionEvaluatorService + public resilienceService: ResilienceService + public toolErrorHealer: ToolErrorHealer + public preventionEngine: PreventionEngine + public verificationEngine: VerificationEngine + public requirementsVerifier: RequirementsVerifier + private _codeIndexManager: CodeIndexManager | undefined + + private runtime: Runtime | undefined + private started = false + private reviewTimer: ReturnType | null = null + private curatorTimer: ReturnType | null = null + private promptRevision = 0 + private lastUserActivityAt = 0 + private reviewInFlight = false + private curatorInFlight = false + + constructor(options: SelfImprovingManagerOptions) { + this.globalStoragePath = options.globalStoragePath + this.logger = options.logger + this.getExperiments = options.getExperiments + this.getMemoryBackend = options.getMemoryBackend + this.getAgentMemoryUrl = options.getAgentMemoryUrl + this.getSelfImprovingScope = options.getSelfImprovingScope + this.getAutoSkillsScope = options.getAutoSkillsScope + this.getWorkspacePath = options.getWorkspacePath + this.curatorConfig = options.curatorConfig + this.skillsManager = options.skillsManager + this.memoryBackendType = this.resolveMemoryBackend(options.memoryBackend) + this.agentMemoryUrl = this.resolveAgentMemoryUrl(options.agentMemoryUrl) + this.storageBasePath = this.resolveStorageBasePath() + this.memoryStore = this.createMemoryStore() + this.skillUsageStore = this.createSkillUsageStore() + this.actionExecutor = this.createActionExecutor() + this.curatorService = this.createCuratorService() + this.reviewPromptFactory = new ReviewPromptFactory() + this.transcriptRecall = this.createTranscriptRecall() + this.insightsEngine = new InsightsEngine(this.globalStoragePath) + this.modeFactory = new ModeFactoryService(this.logger) + this.reviewTeam = new ReviewTeamService(this.logger, { + enabled: this.getExperiments()?.selfImprovingReviewTeam ?? true, + storageBasePath: this.storageBasePath, + getExperiments: () => this.getExperiments(), + }) + this.questionEvaluator = new QuestionEvaluatorService(this.logger, { + enabled: this.getExperiments()?.selfImprovingQuestionEvaluation ?? true, + useFullTeam: this.getExperiments()?.selfImprovingReviewTeam ?? true, + }) + this.questionEvaluator.setReviewTeam(this.reviewTeam) + + // Init dependencies first — needed by AutoModeOrchestrator below + this.resilienceService = new ResilienceService(this.logger, { + enabled: this.getExperiments()?.selfImprovingAutoMode ?? true, + }) + this.toolErrorHealer = new ToolErrorHealer(this.logger, { + enabled: this.getExperiments()?.selfImprovingAutoMode ?? true, + }) + this.preventionEngine = new PreventionEngine() + // CodeIndexAdapter will be wired later via setCodeIndexManager() or getOrCreateRuntime() + this.verificationEngine = new VerificationEngine(this.logger) + this.requirementsVerifier = new RequirementsVerifier(this.logger) + + this.autoModeOrchestrator = new AutoModeOrchestrator( + this.logger, + { + enabled: this.getExperiments()?.selfImprovingAutoMode ?? true, + reviewIntervalMs: 30000, + }, + this.toolErrorHealer, + this.resilienceService, + ) + this.autoModeOrchestrator.setModeFactory(this.modeFactory) + } + + /** + * Set the CodeIndexManager instance for vector-search integration. + * Passes the manager to all child services that need it. + * Gated behind selfImprovingCodeIndex experiment flag. + */ + setCodeIndexManager(manager: CodeIndexManager | undefined): void { + this._codeIndexManager = manager + + const experiments = this.getExperiments() + if (experiments?.selfImprovingCodeIndex === false) { + return + } + + // Wire to all child services + if (this.runtime) { + this.runtime.store.setCodeIndexManager(manager) + this.runtime.patternAnalyzer.setCodeIndexManager(manager) + this.runtime.codeIndexAdapter.setCodeIndexManager(manager!) + this.preventionEngine.setCodeIndexAdapter(this.runtime.codeIndexAdapter) + this.resilienceService.setCodeIndexAdapter(this.runtime.codeIndexAdapter) + } + this.reviewTeam.setCodeIndexManager(manager) + this.questionEvaluator.setCodeIndexManager(manager) + + this.logger.appendLine( + `[SelfImprovingManager] CodeIndexManager ${manager ? "wired" : "unwired"} to child services`, + ) + } + + setCustomModesManager(manager: any): void { + this.modeFactory.setCustomModesManager(manager) + // Wire pattern provider eagerly — don't gate behind runtime. + // ModeFactoryService queues recreateModes() calls if provider not yet set. + this.modeFactory.setPatternProvider(() => { + if (this.runtime?.store) { + return this.runtime.store.getPatterns() as LearnedPattern[] + } + return [] + }) + } + + /** + * Re-create auto modes from current patterns. + * Called when .roomodes changes to re-apply auto-created modes + * that may have been overwritten by the reload. + */ + async recreateModes(): Promise { + if (!this.started || !this.runtime) { + return [] + } + return this.modeFactory.recreateModes() + } + + static isExperimentEnabled(experiments: Experiments | undefined, persistedEnabled?: boolean): boolean { + // Check VS Code experiment flag first + if (experiments && experiments[SELF_IMPROVING_EXPERIMENT_ID] === true) { + return true + } + + // Fallback: check persisted LearningStore config (state.json enabled flag) + // This allows enabling self-improving without the VS Code experiment UI toggle + if (persistedEnabled === true) { + return true + } + + return false + } + + static isAutoSkillsEnabled(experiments: Experiments | undefined, persistedEnabled?: boolean): boolean { + if (!SelfImprovingManager.isExperimentEnabled(experiments, persistedEnabled)) { + return false + } + + // Check auto-skills from experiments (explicitly set) + if (experiments && "selfImprovingAutoSkills" in experiments) { + return experiments.selfImprovingAutoSkills === true + } + + // Fallback: check experiment default (experiments.ts config map) + if (experimentDefault.selfImprovingAutoSkills === true) { + return true + } + + // Fallback: check persisted LearningStore config (state.json autoSkills flag) + if (persistedEnabled === true) { + return true + } + + return false + } + + async initialize(): Promise { + if (this.started) { + return + } + + try { + const runtime = this.getOrCreateRuntime() + await runtime.store.initialize() + + // Gate on VS Code experiment flag OR persisted config (state.json) + const experimentEnabled = SelfImprovingManager.isExperimentEnabled(this.getExperiments()) + const storeEnabled = runtime.store.getConfig().enabled + if (!experimentEnabled && !storeEnabled) { + return + } + + await this.memoryStore.initialize() + await this.skillUsageStore.initialize() + await this.transcriptRecall.initialize() + await this.curatorService.initialize() + await this.insightsEngine.initialize() + await this.reviewTeam.initialize() + this.started = true + this.startTimers(runtime.store) + + // Wire up auto mode orchestrator with pattern analyzer and pattern provider + this.autoModeOrchestrator.setPatternAnalyzer(runtime.patternAnalyzer) + this.autoModeOrchestrator.setPatternProvider(() => runtime.store.getPatterns() as any[]) + await this.autoModeOrchestrator.start() + + this.logger.appendLine( + "[SelfImprovingManager] Initialized: " + + `${runtime.store.getPatterns().length} patterns, ` + + `${runtime.store.getRecentEvents().length} events`, + ) + } catch (error) { + this.stopTimers() + this.started = false + this.runtime = undefined + this.logError("Initialization error", error) + } + } + + async handleExperimentChange(enabled?: boolean): Promise { + try { + const experimentEnabled = SelfImprovingManager.isExperimentEnabled(this.getExperiments()) + // Also check persisted config as fallback (state.json enabled) + const persistedEnabled = this.runtime?.store.getConfig().enabled ?? false + const shouldEnable = enabled ?? (experimentEnabled || persistedEnabled) + if (!shouldEnable) { + await this.dispose() + return + } + + await this.initialize() + } catch (error) { + this.logError("Experiment change handling error", error) + } + } + + /** + * Handle settings change — called when experiments are updated. + * This enables/disables the module at runtime. + */ + async onSettingsChanged(_experiments: Experiments | undefined): Promise { + await this.reconfigureIfNeeded() + await this.handleExperimentChange() + } + + async dispose(): Promise { + const enabled = SelfImprovingManager.isExperimentEnabled(this.getExperiments()) + if (!enabled && !this.started) { + return + } + + this.stopTimers() + this.autoModeOrchestrator.stop() + + try { + if (this.started) { + await this.runtime?.store.persist() + if (this.memoryStore instanceof MemoryStore) { + this.memoryStore.takeSnapshot() + } + } + + await this.memoryStore.dispose() + this.insightsEngine.dispose() + } catch (error) { + this.logError("Persist on dispose error", error) + } finally { + this.started = false + this.promptRevision = 0 + this.runtime = undefined + } + } + + async recordTaskCompletion(info: TaskEventInfo): Promise { + if (!SelfImprovingManager.isExperimentEnabled(this.getExperiments())) { + return + } + + if (!this.started || !this.runtime) { + return + } + + try { + const event = this.runtime.feedbackCollector.createTaskEvent(info) + this.runtime.store.addEvent(event) + this.lastUserActivityAt = event.timestamp + await this.transcriptRecall.record({ + id: event.id, + timestamp: event.timestamp, + taskId: info.taskId, + mode: info.mode, + summary: info.success + ? `Task completed: ${info.mode || "unknown"}` + : `Task failed: ${info.errorKey || "unknown"}`, + signal: info.success ? "TASK_SUCCESS" : "TASK_FAILURE", + workspacePath: info.workspacePath, + toolNames: info.toolNames, + errorKey: info.errorKey, + success: info.success, + }) + + // Log code index stats if available + const codeIndexInfo = this.runtime.codeIndexAdapter.getInfo() + if (codeIndexInfo.available) { + this.logger.appendLine( + `[SelfImprovingManager] Code index available: ${codeIndexInfo.hits} hits`, + ) + } + + this.runtime.store.incrementToolIterations( + Math.max(1, info.toolIterationCount ?? info.toolNames?.length ?? 1), + ) + await this.checkReviewTriggers(this.runtime.store) + await this.autoModeOrchestrator.onTaskCompleted(info.success ?? false) + } catch (error) { + this.logError("recordTaskCompletion error", error) + } + } + + async recordUserCorrection(info: TaskEventInfo): Promise { + if (!SelfImprovingManager.isExperimentEnabled(this.getExperiments())) { + return + } + + if (!this.started || !this.runtime) { + return + } + + try { + const event = this.runtime.feedbackCollector.createCorrectionEvent(info) + this.runtime.store.addEvent(event) + this.lastUserActivityAt = event.timestamp + await this.checkReviewTriggers(this.runtime.store) + } catch (error) { + this.logError("recordUserCorrection error", error) + } + } + + async recordUserTurn(): Promise { + if (!SelfImprovingManager.isExperimentEnabled(this.getExperiments())) { + return + } + + if (!this.started || !this.runtime) { + return + } + + try { + this.lastUserActivityAt = Date.now() + this.runtime.store.incrementUserTurns() + await this.checkReviewTriggers(this.runtime.store) + } catch (error) { + this.logError("recordUserTurn error", error) + } + } + + async recordCodeIndexEvent(taskId?: string, codeIndexInfo?: CodeIndexInfo): Promise { + if (!SelfImprovingManager.isExperimentEnabled(this.getExperiments())) { + return + } + + if (!this.started || !this.runtime) { + return + } + + try { + if (!this.runtime.store.getConfig().codeIndexCorrelationEnabled) { + return + } + + const resolvedCodeIndexInfo = codeIndexInfo ?? this.runtime.codeIndexAdapter.getInfo() + const event = this.runtime.feedbackCollector.createCodeIndexEvent(resolvedCodeIndexInfo, taskId) + this.runtime.store.addEvent(event) + this.lastUserActivityAt = event.timestamp + } catch (error) { + this.logError("recordCodeIndexEvent error", error) + } + } + + async runReviewCycle(): Promise { + if (!SelfImprovingManager.isExperimentEnabled(this.getExperiments())) { + return + } + + if (!this.started || !this.runtime) { + return + } + + if (this.reviewInFlight) { + this.logger.appendLine("[SelfImprovingManager] Review cycle already in progress, skipping") + return + } + this.reviewInFlight = true + + try { + const events = [...this.runtime.store.getRecentEvents()] as LearningEvent[] + if (events.length === 0) { + return + } + + const existingPatterns = [...this.runtime.store.getPatterns()] as LearnedPattern[] + const newPatterns = await this.runtime.patternAnalyzer.analyze(events, existingPatterns) + for (const pattern of newPatterns) { + this.runtime.store.addPattern(pattern) + } + + // Review patterns through multi-agent team + const { approved: approvedPatterns, rejected: rejectedPatterns } = + await this.reviewTeam.reviewPatterns(newPatterns) + + if (rejectedPatterns.length > 0) { + this.logger.appendLine(`[SelfImproving] Review team rejected ${rejectedPatterns.length} patterns`) + } + + const actions = this.runtime.improvementApplier.generateActions([ + ...this.runtime.store.getPatterns(), + ] as LearnedPattern[]) + for (const action of actions) { + this.runtime.store.addAction(action) + } + + // Review actions through multi-agent team + const { rejected: rejectedActions } = await this.reviewTeam.reviewActions(actions) + + if (rejectedActions.length > 0) { + this.logger.appendLine(`[SelfImproving] Review team rejected ${rejectedActions.length} actions`) + } + + const pendingActions = [...this.runtime.store.getPendingActions()] as ImprovementAction[] + if (pendingActions.length > 0) { + const succeeded = await this.actionExecutor.executeBatch(pendingActions) + for (const actionId of succeeded) { + this.runtime.store.removeAction(actionId) + } + + this.logger.appendLine( + `[SelfImprovingManager] Executed ${succeeded.size}/${pendingActions.length} actions`, + ) + } + + this.updateReviewTelemetry(this.runtime.store, actions) + this.promptRevision += 1 + this.runtime.store.resetCounters() + await this.runtime.store.persist() + + // Refresh the memory snapshot so newly learned patterns are visible in prompts + if (this.memoryStore instanceof MemoryStore) { + await this.memoryStore.takeSnapshot() + } + this.logger.appendLine( + `[SelfImprovingManager] Review cycle: ${newPatterns.length} patterns, ${actions.length} actions`, + ) + this.logger.appendLine("[SelfImprovingManager] Memory snapshot refreshed after review cycle") + } catch (error) { + this.logError("Review cycle error", error) + } finally { + this.reviewInFlight = false + } + } + + /** + * Immediately trigger a review cycle, bypassing the timer wait. + * Safe to call multiple times - runReviewCycle() has its own gating. + */ + public async triggerReview(): Promise { + if (!SelfImprovingManager.isExperimentEnabled(this.getExperiments())) { + return + } + + if (!this.started || !this.runtime) { + return + } + + await this.runReviewCycle() + } + + async runCuratorCycle(): Promise { + if (!SelfImprovingManager.isExperimentEnabled(this.getExperiments())) { + return undefined + } + + if (!this.started || !this.runtime) { + return undefined + } + + if (this.curatorInFlight) { + return undefined + } + this.curatorInFlight = true + + try { + const now = Date.now() + const report = await this.curatorService.run( + now, + this.lastUserActivityAt > 0 ? this.lastUserActivityAt : undefined, + ) + this.runtime.store.updateTelemetry({ lastCuratorRunAt: report.timestamp }) + + if (report.transitions.length > 0) { + this.logger.appendLine(`[SelfImprovingManager] Curator cycle: ${report.transitions.length} transitions`) + } + + return report + } catch (error) { + this.logger.appendLine( + `[SelfImprovingManager] Curator cycle error: ${error instanceof Error ? error.message : String(error)}`, + ) + return undefined + } finally { + this.curatorInFlight = false + } + } + + getPromptContext(): PromptContext | undefined { + if (!SelfImprovingManager.isExperimentEnabled(this.getExperiments())) { + return undefined + } + + if (!this.started || !this.runtime) { + return undefined + } + + try { + return this.runtime.improvementApplier.getPromptContext( + [...this.runtime.store.getPatterns()] as LearnedPattern[], + this.runtime.store.getConfig().maxPromptPatterns, + this.promptRevision, + ) + } catch (error) { + this.logError("getPromptContext error", error) + return undefined + } + } + + /** + * Get code index search results for prompt enrichment. + * Returns formatted string with top results when code index is available. + */ + async getCodeIndexSearchContext(query: string, limit: number = 5): Promise { + if (!this.started || !this.runtime) { + return "" + } + + try { + const adapter = this.runtime.codeIndexAdapter + if (!adapter.isAvailable()) { + return "" + } + + const results = await adapter.search(query, limit) + if (results.length === 0) { + return "" + } + + const lines = results.map( + (r, i) => + `${i + 1}. ${r.filePath} (score: ${r.score.toFixed(3)})${r.line ? `:${r.line}` : ""}`, + ) + return `\n## Code Index Results\n${lines.join("\n")}\n` + } catch (error) { + this.logError("getCodeIndexSearchContext error", error) + return "" + } + } + + getPromptContextString(): string { + if (!this.started) { + return "" + } + + try { + if (this.memoryStore instanceof MemoryStore) { + return this.memoryStore.getSnapshotString() + } + + return "" + } catch { + return "" + } + } + + async getStatus(): Promise<{ + enabled: boolean + started: boolean + patternCount: number + eventCount: number + actionCount: number + memoryEntries: number + memoryBackend?: string + skillRecords: number + curatorStatus: ReturnType + lastReviewAt?: number + lastCuratorRunAt?: number + autoMode: Record + reviewTeam: Record + questionEvaluator: Record + resilience: Record + toolErrorHealer: Record + preventionEngine: Record + verificationEngine: Record + requirementsVerification: Record + }> { + const enabled = SelfImprovingManager.isExperimentEnabled(this.getExperiments()) + const curatorStatus = this.curatorService.getStatus() + const reviewTeamStatus = { + enabled: this.reviewTeam.getConfig().enabled, + innovatorWeight: this.reviewTeam.getConfig().innovatorWeight, + contrarianWeight: this.reviewTeam.getConfig().contrarianWeight, + devilsAdvocateWeight: this.reviewTeam.getConfig().devilsAdvocateWeight, + deciderThreshold: this.reviewTeam.getConfig().deciderThreshold, + } + const resilienceStatus = this.resilienceService.getStatus() + const toolErrorHealerStatus = this.toolErrorHealer.getStatus() + const questionEvaluatorStatus = this.questionEvaluator.getStatus() + const verificationEngineStatus = this.verificationEngine?.getStatus() ?? { enabled: false } + const requirementsVerificationStatus = this.requirementsVerifier?.getStatus() ?? { enabled: false } + + if (!enabled) { + return { + enabled: false, + started: false, + patternCount: 0, + eventCount: 0, + actionCount: 0, + memoryEntries: 0, + skillRecords: 0, + curatorStatus, + autoMode: this.autoModeOrchestrator.getStatus(), + reviewTeam: reviewTeamStatus, + questionEvaluator: questionEvaluatorStatus, + resilience: resilienceStatus, + toolErrorHealer: toolErrorHealerStatus, + preventionEngine: { initialized: true }, + verificationEngine: verificationEngineStatus, + requirementsVerification: requirementsVerificationStatus, + } + } + + if (!this.started || !this.runtime) { + return { + enabled: true, + started: false, + patternCount: 0, + eventCount: 0, + actionCount: 0, + memoryEntries: 0, + skillRecords: 0, + curatorStatus, + autoMode: this.autoModeOrchestrator.getStatus(), + reviewTeam: reviewTeamStatus, + questionEvaluator: questionEvaluatorStatus, + resilience: resilienceStatus, + toolErrorHealer: toolErrorHealerStatus, + preventionEngine: { initialized: true }, + verificationEngine: verificationEngineStatus, + requirementsVerification: requirementsVerificationStatus, + } + } + + try { + const telemetry = this.runtime.store.getTelemetry() + const memStats = await this.memoryStore.getStats() + const skillStats = this.skillUsageStore.getStats() + return { + enabled: true, + started: true, + patternCount: this.runtime.store.getPatterns().length, + eventCount: this.runtime.store.getRecentEvents().length, + actionCount: this.runtime.store.getPendingActions().length, + memoryEntries: memStats.entryCount, + memoryBackend: memStats.backend, + skillRecords: skillStats.total, + curatorStatus, + lastReviewAt: telemetry.lastReviewAt, + lastCuratorRunAt: telemetry.lastCuratorRunAt, + autoMode: this.autoModeOrchestrator.getStatus(), + reviewTeam: reviewTeamStatus, + questionEvaluator: questionEvaluatorStatus, + resilience: resilienceStatus, + toolErrorHealer: toolErrorHealerStatus, + preventionEngine: { initialized: true }, + verificationEngine: verificationEngineStatus, + requirementsVerification: requirementsVerificationStatus, + } + } catch { + return { + enabled: true, + started: true, + patternCount: 0, + eventCount: 0, + actionCount: 0, + memoryEntries: 0, + skillRecords: 0, + curatorStatus, + autoMode: this.autoModeOrchestrator.getStatus(), + reviewTeam: reviewTeamStatus, + questionEvaluator: questionEvaluatorStatus, + resilience: resilienceStatus, + toolErrorHealer: toolErrorHealerStatus, + preventionEngine: { initialized: true }, + verificationEngine: verificationEngineStatus, + requirementsVerification: requirementsVerificationStatus, + } + } + } + + /** + * Returns the current insights report with session analysis data. + * Includes token usage, tool usage patterns, error rates, and performance metrics. + */ + getInsightsReport(): InsightsReport { + return this.insightsEngine.generateReport() + } + + async reset(): Promise { + if (!SelfImprovingManager.isExperimentEnabled(this.getExperiments())) { + return + } + + if (!this.started || !this.runtime) { + return + } + + try { + await this.runtime.store.reset() + this.promptRevision = 0 + this.logger.appendLine("[SelfImprovingManager] Learning state reset") + } catch (error) { + this.logError("Reset error", error) + } + } + + private getOrCreateRuntime(): Runtime { + if (!this.runtime) { + const experiments = this.getExperiments() + const store = new LearningStore(this.storageBasePath, this.logger) + const patternAnalyzer = new PatternAnalyzer({ + getExperiments: () => this.getExperiments(), + }) + + // Wire CodeIndexManager to runtime services if experiment is enabled + if (experiments?.selfImprovingCodeIndex !== false && this._codeIndexManager) { + store.setCodeIndexManager(this._codeIndexManager) + store.setExperimentsAccessor(() => this.getExperiments()) + patternAnalyzer.setCodeIndexManager(this._codeIndexManager) + } + + this.runtime = { + store, + feedbackCollector: new FeedbackCollector({ + getExperiments: () => this.getExperiments(), + }), + patternAnalyzer, + improvementApplier: new ImprovementApplier({ + getSkillNames: () => this.skillsManager?.getSkillNames() ?? [], + getSkillProvenance: (name: string) => this.resolveSkillProvenance(name), + getSkillProvenanceForSource: (name: string, source: "global" | "project") => + this.resolveSkillProvenance(name, source), + hasSkill: (name: string, source: "global" | "project") => + this.skillsManager?.hasSkill?.(name, source) ?? false, + isAutoSkillsEnabled: () => + SelfImprovingManager.isAutoSkillsEnabled( + this.getExperiments(), + this.runtime?.store.getConfig().enabled, + ), + getAutoSkillsScope: () => this.resolveAutoSkillsScope(), + getExperiments: () => this.getExperiments(), + }), + codeIndexAdapter: new CodeIndexAdapter(this.logger, this._codeIndexManager), + } + } + + return this.runtime + } + + private resolveMemoryBackend(fallback?: "builtin" | "agentmemory"): "builtin" | "agentmemory" { + return this.getMemoryBackend?.() ?? fallback ?? "builtin" + } + + private resolveAgentMemoryUrl(fallback?: string): string | undefined { + return this.getAgentMemoryUrl?.() ?? fallback + } + + private resolveSelfImprovingScope(): SelfImprovingScope { + return this.getSelfImprovingScope?.() ?? "global" + } + + private resolveAutoSkillsScope(): SelfImprovingScope { + return this.getAutoSkillsScope?.() ?? "workspace" + } + + private resolveStorageBasePath(): string { + if (this.resolveSelfImprovingScope() !== "workspace") { + return this.globalStoragePath + } + + const workspacePath = this.getWorkspacePath?.() + if (!workspacePath) { + return path.join(this.globalStoragePath, "workspace-scopes", "no-workspace") + } + + const workspaceHash = crypto.createHash("sha256").update(workspacePath).digest("hex").slice(0, 16) + return path.join(this.globalStoragePath, "workspace-scopes", workspaceHash) + } + + private createMemoryStore(): MemoryBackend { + return MemoryBackendFactory.create( + this.memoryBackendType, + this.storageBasePath, + this.logger, + this.agentMemoryUrl, + ) + } + + private createSkillUsageStore(): SkillUsageStore { + return new SkillUsageStore(this.storageBasePath, this.logger) + } + + private createCuratorService( + config: SelfImprovingManagerOptions["curatorConfig"] = this.curatorConfig, + ): CuratorService { + return new CuratorService(this.storageBasePath, this.skillUsageStore, this.logger, config) + } + + private createTranscriptRecall(): TranscriptRecall { + return new TranscriptRecall(this.storageBasePath, this.logger) + } + + private createActionExecutor(): ActionExecutor { + return new ActionExecutor(this.memoryStore, this.skillUsageStore, this.logger, this.skillsManager) + } + + private async reconfigureIfNeeded(): Promise { + const nextBackend = this.resolveMemoryBackend(this.memoryBackendType) + const nextUrl = this.resolveAgentMemoryUrl(this.agentMemoryUrl) + const nextStorageBasePath = this.resolveStorageBasePath() + const backendChanged = nextBackend !== this.memoryBackendType || nextUrl !== this.agentMemoryUrl + const storageChanged = nextStorageBasePath !== this.storageBasePath + + if (!backendChanged && !storageChanged) { + return + } + + const shouldRestart = this.started && SelfImprovingManager.isExperimentEnabled(this.getExperiments()) + if (shouldRestart) { + this.stopTimers() + await this.runtime?.store.persist() + if (this.memoryStore instanceof MemoryStore) { + this.memoryStore.takeSnapshot() + } + } + + await this.memoryStore.dispose() + this.started = false + this.runtime = undefined + this.promptRevision = 0 + + this.memoryBackendType = nextBackend + this.agentMemoryUrl = nextUrl + this.storageBasePath = nextStorageBasePath + this.memoryStore = this.createMemoryStore() + this.skillUsageStore = this.createSkillUsageStore() + this.actionExecutor = this.createActionExecutor() + this.curatorService = this.createCuratorService() + this.transcriptRecall = this.createTranscriptRecall() + + if (shouldRestart) { + await this.initialize() + } + + if (backendChanged) { + this.logger.appendLine( + `[SelfImprovingManager] Memory backend configured: ${this.memoryBackendType}${this.agentMemoryUrl ? ` (${this.agentMemoryUrl})` : ""}`, + ) + } + } + + private startTimers(store: LearningStore): void { + this.stopTimers() + const config = store.getConfig() + this.reviewTimer = setInterval(() => { + void this.runReviewCycle() + }, REVIEW_CHECK_INTERVAL_MS) + + if (config.curatorEnabled) { + this.curatorTimer = setInterval(() => { + this.runCuratorCycle().catch((error) => { + this.logger.appendLine( + `[SelfImprovingManager] Curator cycle error: ${error instanceof Error ? error.message : String(error)}`, + ) + }) + }, config.curatorIntervalMs) + } + } + + private stopTimers(): void { + if (this.reviewTimer) { + clearInterval(this.reviewTimer) + this.reviewTimer = null + } + + if (this.curatorTimer) { + clearInterval(this.curatorTimer) + this.curatorTimer = null + } + } + + private async checkReviewTriggers(store: LearningStore): Promise { + const counters = store.getCounters() + const config = store.getConfig() + if ( + config.reviewOnEveryTurn || + counters.userTurnsSinceReview >= config.reviewOnTurnCount || + counters.toolIterationsSinceReview >= config.reviewOnToolIterationCount + ) { + await this.runReviewCycle() + } + } + + private updateReviewTelemetry(store: LearningStore, actions: ImprovementAction[]): void { + const telemetry = store.getTelemetry() + store.updateTelemetry({ + lastReviewAt: Date.now(), + promptEnrichmentUses: + telemetry.promptEnrichmentUses + + actions.filter((action) => action.actionType === "PROMPT_ENRICHMENT").length, + toolPreferenceUses: + telemetry.toolPreferenceUses + + actions.filter((action) => action.actionType === "TOOL_PREFERENCE").length, + errorAvoidanceUses: + telemetry.errorAvoidanceUses + + actions.filter((action) => action.actionType === "ERROR_AVOIDANCE").length, + skillSuggestionCount: + telemetry.skillSuggestionCount + + actions.filter( + (action) => + action.actionType === "SKILL_SUGGESTION" || + action.actionType === "SKILL_CREATE" || + action.actionType === "SKILL_UPDATE" || + action.actionType === "SKILL_CREATE_FROM_SCRATCH", + ).length, + }) + } + + private resolveSkillProvenance(name: string, source?: "global" | "project"): string { + // 1. Check explicit provenance from SkillUsageStore (agent-created records) + const agentRecord = this.skillUsageStore.getAll().find((record) => { + if (record.createdBy !== "agent" || record.skillName !== name) { + return false + } + + if (!source) { + return true + } + + return record.skillId === this.buildSkillId(name, source) + }) + if (agentRecord) { + return agentRecord.createdBy + } + + // 2. Check SkillsManager for known user skills + const managerProvenance = source + ? this.skillsManager?.getSkillProvenanceForSource?.(name, source) + : this.skillsManager?.getSkillProvenance(name) + + if (managerProvenance && managerProvenance !== "unknown") { + return managerProvenance + } + + // 3. Heuristic: Check if skill name matches known bundled patterns + if (this.isKnownBundledSkill(name)) { + return "bundled" + } + + return "unknown" + } + + /** + * Heuristic check: determine if a skill name matches known bundled/hub patterns. + * This serves as a fallback when explicit provenance records are unavailable. + */ + private isKnownBundledSkill(skillId: string): boolean { + const bundledPatterns = [/^built-in-/i, /^core-/i, /^default-/i] + return bundledPatterns.some((pattern) => pattern.test(skillId)) + } + + private buildSkillId(name: string, source: "global" | "project"): string { + return `skill:${source}:${name}` + } + + private logError(context: string, error: unknown): void { + this.logger.appendLine( + `[SelfImprovingManager] ${context}: ${error instanceof Error ? error.message : String(error)}`, + ) + } +} diff --git a/src/services/self-improving/SkillUsageStore.ts b/src/services/self-improving/SkillUsageStore.ts new file mode 100644 index 0000000000..1703cebdbe --- /dev/null +++ b/src/services/self-improving/SkillUsageStore.ts @@ -0,0 +1,492 @@ +import * as fs from "fs/promises" +import * as path from "path" + +import { safeWriteJson } from "../../utils/safeWriteJson" +import type { Logger } from "./types" + +/** + * Skill provenance - who created the skill + */ +export type SkillProvenance = "agent" | "user" | "bundled" | "hub" | "unknown" + +/** + * Skill lifecycle state + */ +export type SkillLifecycleState = "active" | "stale" | "archived" + +/** + * Skill telemetry record + */ +export interface SkillTelemetryRecord { + /** Unique skill identifier */ + skillId: string + /** Skill name */ + skillName: string + /** Who created this skill */ + createdBy: SkillProvenance + /** Current lifecycle state */ + state: SkillLifecycleState + /** Whether the skill is pinned (protected from auto-mutation) */ + pinned: boolean + /** Number of times the skill has been loaded/viewed */ + viewCount: number + /** Number of times the skill has been used in a task */ + useCount: number + /** Number of times the skill has been patched/updated */ + patchCount: number + /** Timestamp of first creation */ + createdAt: number + /** Timestamp of last activity */ + lastActivityAt: number + /** Timestamp of archival (if archived) */ + archivedAt?: number + /** Tags for categorization */ + tags?: string[] + /** Name of umbrella skill this was absorbed into (set by curator merge) */ + absorbedInto?: string +} + +const SKILL_PROVENANCE_VALUES: ReadonlySet = new Set(["agent", "user", "bundled", "hub", "unknown"]) +const SKILL_LIFECYCLE_VALUES: ReadonlySet = new Set(["active", "stale", "archived"]) + +/** + * SkillUsageStore - telemetry sidecar for skill usage tracking. + * + * Mirrors Hermes' skill_usage.py pattern with: + * - Use/view/patch counters + * - Provenance tracking (agent-created vs user-authored vs bundled) + * - Pinning support (protected from autonomous mutation) + * - Lifecycle state management + * - Atomic file persistence + */ +export class SkillUsageStore { + private readonly filePath: string + private readonly logger: Logger + private records: Map = new Map() + private initialized = false + + constructor(baseDir: string, logger: Logger) { + this.filePath = path.join(baseDir, "self-improving", "skill-usage.json") + this.logger = logger + } + + /** + * Initialize the store - load persisted telemetry from disk. + */ + async initialize(): Promise { + if (this.initialized) { + return + } + + try { + await fs.mkdir(path.dirname(this.filePath), { recursive: true }) + await this.loadFromDisk() + this.logger.appendLine(`[SkillUsageStore] Initialized: ${this.records.size} skill records`) + } catch (error) { + this.logger.appendLine( + `[SkillUsageStore] Initialization error: ${error instanceof Error ? error.message : String(error)}`, + ) + } finally { + this.initialized = true + } + } + + /** + * Load telemetry from disk. + */ + private async loadFromDisk(): Promise { + try { + const raw = await fs.readFile(this.filePath, "utf-8") + const parsed = JSON.parse(raw) + + if (!Array.isArray(parsed)) { + return + } + + for (const candidate of parsed) { + const record = this.sanitizeRecord(candidate) + if (record) { + this.records.set(record.skillId, record) + } + } + } catch (error: unknown) { + const errorCode = typeof error === "object" && error !== null && "code" in error ? error.code : undefined + if (errorCode !== "ENOENT") { + this.logger.appendLine( + `[SkillUsageStore] Load error: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + } + + /** + * Persist telemetry to disk atomically with file-locking for concurrent write protection. + */ + private async persist(): Promise { + try { + await safeWriteJson(this.filePath, Array.from(this.records.values()), { prettyPrint: true }) + } catch (error) { + this.logger.appendLine( + `[SkillUsageStore] Persist error: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + + private queuePersist(): void { + void this.persist() + } + + private getRecord(skillId: string): SkillTelemetryRecord | undefined { + return this.records.get(skillId) + } + + private cloneRecord(record: SkillTelemetryRecord): SkillTelemetryRecord { + return { + ...record, + tags: record.tags ? [...record.tags] : undefined, + } + } + + private sanitizeRecord(value: unknown): SkillTelemetryRecord | null { + if (!value || typeof value !== "object") { + return null + } + + const candidate = value as Partial + if (typeof candidate.skillId !== "string" || candidate.skillId.trim().length === 0) { + return null + } + + const now = Date.now() + const createdAt = typeof candidate.createdAt === "number" ? candidate.createdAt : now + const lastActivityAt = typeof candidate.lastActivityAt === "number" ? candidate.lastActivityAt : createdAt + + return { + skillId: candidate.skillId, + skillName: + typeof candidate.skillName === "string" && candidate.skillName.trim().length > 0 + ? candidate.skillName + : candidate.skillId, + createdBy: this.normalizeProvenance(candidate.createdBy), + state: this.normalizeState(candidate.state), + pinned: candidate.pinned === true, + viewCount: this.normalizeCounter(candidate.viewCount), + useCount: this.normalizeCounter(candidate.useCount), + patchCount: this.normalizeCounter(candidate.patchCount), + createdAt, + lastActivityAt, + archivedAt: typeof candidate.archivedAt === "number" ? candidate.archivedAt : undefined, + tags: this.normalizeTags(candidate.tags), + absorbedInto: + typeof candidate.absorbedInto === "string" && candidate.absorbedInto.trim().length > 0 + ? candidate.absorbedInto + : undefined, + } + } + + private normalizeCounter(value: number | undefined): number { + return typeof value === "number" && Number.isFinite(value) && value >= 0 ? Math.floor(value) : 0 + } + + private normalizeProvenance(value: SkillProvenance | undefined): SkillProvenance { + return value && SKILL_PROVENANCE_VALUES.has(value) ? value : "unknown" + } + + private normalizeState(value: SkillLifecycleState | undefined): SkillLifecycleState { + return value && SKILL_LIFECYCLE_VALUES.has(value) ? value : "active" + } + + private normalizeTags(tags: string[] | undefined): string[] | undefined { + if (!Array.isArray(tags)) { + return undefined + } + + const normalized = Array.from(new Set(tags.map((tag) => tag.trim()).filter((tag) => tag.length > 0))) + + return normalized.length > 0 ? normalized : undefined + } + + // ──── Record management ──── + + /** + * Get or create a telemetry record for a skill. + */ + getOrCreate(skillId: string, skillName: string, createdBy: SkillProvenance = "unknown"): SkillTelemetryRecord { + const existing = this.getRecord(skillId) + if (existing) { + return this.cloneRecord(existing) + } + + const now = Date.now() + const record: SkillTelemetryRecord = { + skillId, + skillName, + createdBy, + state: "active", + pinned: false, + viewCount: 0, + useCount: 0, + patchCount: 0, + createdAt: now, + lastActivityAt: now, + } + + this.records.set(skillId, record) + this.queuePersist() + + return this.cloneRecord(record) + } + + /** + * Get a telemetry record by skill ID. + */ + get(skillId: string): SkillTelemetryRecord | undefined { + const record = this.getRecord(skillId) + return record ? this.cloneRecord(record) : undefined + } + + /** + * Get all telemetry records. + */ + getAll(): SkillTelemetryRecord[] { + return Array.from(this.records.values(), (record) => this.cloneRecord(record)) + } + + /** + * Get records filtered by provenance. + */ + getByProvenance(provenance: SkillProvenance): SkillTelemetryRecord[] { + return this.getAll().filter((record) => record.createdBy === provenance) + } + + /** + * Get records filtered by lifecycle state. + */ + getByState(state: SkillLifecycleState): SkillTelemetryRecord[] { + return this.getAll().filter((record) => record.state === state) + } + + // ──── Telemetry bumps ──── + + /** + * Record that a skill was viewed/loaded. + */ + async bumpView(skillId: string): Promise { + const record = this.getRecord(skillId) + if (!record) { + return + } + + record.viewCount += 1 + record.lastActivityAt = Date.now() + await this.persist() + } + + /** + * Record that a skill was used in a task. + */ + async bumpUse(skillId: string): Promise { + const record = this.getRecord(skillId) + if (!record) { + return + } + + record.useCount += 1 + record.lastActivityAt = Date.now() + await this.persist() + } + + /** + * Record that a skill was patched/updated. + */ + async bumpPatch(skillId: string): Promise { + const record = this.getRecord(skillId) + if (!record) { + return + } + + record.patchCount += 1 + record.lastActivityAt = Date.now() + await this.persist() + } + + // ──── Lifecycle management ──── + + /** + * Pin a skill (protect from autonomous mutation). + */ + async pin(skillId: string): Promise { + const record = this.getRecord(skillId) + if (!record) { + return + } + + record.pinned = true + record.lastActivityAt = Date.now() + await this.persist() + } + + /** + * Unpin a skill. + */ + async unpin(skillId: string): Promise { + const record = this.getRecord(skillId) + if (!record) { + return + } + + record.pinned = false + record.lastActivityAt = Date.now() + await this.persist() + } + + /** + * Check if a skill is pinned. + */ + isPinned(skillId: string): boolean { + return this.getRecord(skillId)?.pinned ?? false + } + + /** + * Transition a skill to a new lifecycle state. + */ + async transitionState(skillId: string, newState: SkillLifecycleState): Promise { + const record = this.getRecord(skillId) + if (!record || record.pinned) { + return + } + + record.state = newState + record.lastActivityAt = Date.now() + + if (newState === "archived") { + record.archivedAt = Date.now() + } else { + delete record.archivedAt + } + + await this.persist() + } + + /** + * Get skills eligible for stale transition. + * Skills with no activity for staleAfterDays. + */ + getStaleCandidates(staleAfterDays: number): SkillTelemetryRecord[] { + const threshold = Date.now() - staleAfterDays * 24 * 60 * 60 * 1000 + return this.getAll().filter( + (record) => record.state === "active" && !record.pinned && record.lastActivityAt < threshold, + ) + } + + /** + * Get skills eligible for archive transition. + */ + getArchiveCandidates(archiveAfterDays: number): SkillTelemetryRecord[] { + const threshold = Date.now() - archiveAfterDays * 24 * 60 * 60 * 1000 + return this.getAll().filter( + (record) => record.state === "stale" && !record.pinned && record.lastActivityAt < threshold, + ) + } + + /** + * Remove a skill record entirely. + */ + async remove(skillId: string): Promise { + if (!this.records.delete(skillId)) { + return + } + + await this.persist() + } + + /** + * Get aggregate statistics. + */ + getStats(): { + total: number + active: number + stale: number + archived: number + pinned: number + agentCreated: number + pinnedAgentCreated: number + } { + const records = Array.from(this.records.values()) + return { + total: records.length, + active: records.filter((record) => record.state === "active").length, + stale: records.filter((record) => record.state === "stale").length, + archived: records.filter((record) => record.state === "archived").length, + pinned: records.filter((record) => record.pinned).length, + agentCreated: records.filter((record) => record.createdBy === "agent").length, + pinnedAgentCreated: records.filter((record) => record.createdBy === "agent" && record.pinned).length, + } + } + /** + * Get agent-created skills for curator review (excludes pinned and non-agent). + */ + getAgentCreatedForReview(): SkillTelemetryRecord[] { + return this.getAll().filter((record) => record.createdBy === "agent" && !record.pinned) + } + + /** + * Get agent-created pinned skills. + */ + getAgentCreatedPinned(): SkillTelemetryRecord[] { + return this.getAll().filter((record) => record.createdBy === "agent" && record.pinned) + } + + /** + * Move a skill record to archived state. + * Sets archivedAt timestamp and transitions state. + */ + async archive(skillId: string): Promise { + const record = this.getRecord(skillId) + if (!record) { + return + } + + record.state = "archived" + record.archivedAt = Date.now() + record.lastActivityAt = Date.now() + await this.persist() + } + + /** + * Restore an archived skill back to active. + */ + async restore(skillId: string): Promise { + const record = this.getRecord(skillId) + if (!record) { + return + } + + record.state = "active" + record.archivedAt = undefined + record.lastActivityAt = Date.now() + await this.persist() + } + + /** + * Set the absorbedInto field on a skill record. + * Used by curator merge actions to mark skills as absorbed into an umbrella. + */ + async setAbsorbedInto(skillId: string, umbrellaName: string): Promise { + const record = this.getRecord(skillId) + if (!record) { + return + } + + record.absorbedInto = umbrellaName + await this.persist() + } + + /** + * Reset all telemetry. + */ + async reset(): Promise { + this.records.clear() + await this.persist() + } +} diff --git a/src/services/self-improving/ToolCallValidator.ts b/src/services/self-improving/ToolCallValidator.ts new file mode 100644 index 0000000000..30c48686cd --- /dev/null +++ b/src/services/self-improving/ToolCallValidator.ts @@ -0,0 +1,145 @@ +import { ErrorClassifier, ErrorCategory } from "./ErrorClassifier" + +export interface ValidationResult { + valid: boolean + warnings: string[] + suggestions: string[] +} + +export class ToolCallValidator { + private errorClassifier: ErrorClassifier + + constructor() { + this.errorClassifier = new ErrorClassifier() + } + + validate(toolName: string, params: Record): ValidationResult { + const warnings: string[] = [] + const suggestions: string[] = [] + + switch (toolName) { + case "read_file": { + const path = params.path as string | undefined + if (path && !path.includes(".")) { + warnings.push("Path has no file extension — may be a directory") + suggestions.push("Use list_files instead of read_file for directories") + } + if (path && path.endsWith("/")) { + warnings.push("Path ends with '/' — this is likely a directory") + suggestions.push("Use list_files tool to list directory contents") + } + break + } + + case "list_files": { + const recursive = params.recursive + if (recursive === true) { + warnings.push("Recursive listing may fail if ripgrep is unavailable") + suggestions.push( + "If list_files fails, use 'find PATH -type f' via execute_command", + ) + } + break + } + + case "search_files": { + if (!params.regex) { + warnings.push("Missing required 'regex' parameter") + suggestions.push("Provide a valid regex pattern for the search") + } + if (!params.path) { + warnings.push("Missing 'path' parameter — searching entire workspace may be slow") + suggestions.push("Specify a directory path to limit the search scope") + } + break + } + + case "execute_command": { + const command = params.command as string | undefined + if (command && command.length > 1000) { + warnings.push("Command is very long — may exceed shell limits") + suggestions.push("Break the command into smaller parts") + } + if (command && command.includes("rm -rf")) { + warnings.push("Command contains 'rm -rf' — destructive operation") + suggestions.push("Verify the target path before executing destructive commands") + } + break + } + + case "write_to_file": { + const content = params.content as string | undefined + if (content && content.length > 50_000) { + warnings.push("File content is very large (>50KB)") + suggestions.push("Consider splitting the file into smaller modules") + } + break + } + + case "apply_diff": { + if (!params.diff) { + warnings.push("Missing required 'diff' parameter") + suggestions.push("Provide the diff content to apply") + } + if (!params.path) { + warnings.push("Missing required 'path' parameter") + suggestions.push("Specify the file path to edit") + } + break + } + + case "attempt_completion": { + if (!params.result) { + warnings.push("Missing required 'result' parameter") + suggestions.push("Provide the completion result message") + } + break + } + + case "ask_followup_question": { + if (!params.question) { + warnings.push("Missing required 'question' parameter") + suggestions.push("Provide the question to ask the user") + } + break + } + + case "new_task": { + if (!params.mode) { + warnings.push("Missing required 'mode' parameter") + suggestions.push("Specify the mode for the new task") + } + if (!params.message) { + warnings.push("Missing required 'message' parameter") + suggestions.push("Provide the task message") + } + break + } + } + + return { + valid: warnings.length === 0, + warnings, + suggestions, + } + } + + getPreventionHints( + toolName: string, + recentErrors: Array<{ toolName: string; category: ErrorCategory }>, + ): string[] { + const hints: string[] = [] + + // Check if this tool has failed recently + const recentFailures = recentErrors.filter((e) => e.toolName === toolName) + if (recentFailures.length >= 2) { + const categories = [...new Set(recentFailures.map((e) => e.category))] + for (const cat of categories) { + const classified = this.errorClassifier.classify(cat, toolName) + hints.push(`⚠️ Previous ${toolName} calls failed: ${classified.suggestion}`) + } + } + + return hints + } +} diff --git a/src/services/self-improving/ToolErrorHealer.ts b/src/services/self-improving/ToolErrorHealer.ts new file mode 100644 index 0000000000..e8b54a7c16 --- /dev/null +++ b/src/services/self-improving/ToolErrorHealer.ts @@ -0,0 +1,258 @@ +import type { Logger } from "./types" + +export interface ToolErrorHealerConfig { + enabled: boolean + autoCorrect: boolean + learnFromCorrections: boolean + maxCorrectionsPerTool: number +} + +interface ToolCorrection { + toolName: string + missingParam: string + fixStrategy: string + occurrences: number + lastSeen: Date +} + +const DEFAULT_CONFIG: ToolErrorHealerConfig = { + enabled: true, + autoCorrect: true, + learnFromCorrections: true, + maxCorrectionsPerTool: 10, +} + +/** + * Known parameter requirements for tools that commonly have issues. + * Maps tool names to their required parameters and common fixes. + */ +const KNOWN_TOOL_REQUIREMENTS: Record = { + search_files: [ + { param: "regex", hint: "Provide a valid regex pattern for the search" }, + { param: "path", hint: "Specify the directory path to search in" }, + ], + read_file: [{ param: "path", hint: "Specify the file path to read" }], + write_to_file: [ + { param: "path", hint: "Specify the file path to write to" }, + { param: "content", hint: "Provide the content to write" }, + ], + apply_diff: [ + { param: "path", hint: "Specify the file path to edit" }, + { param: "diff", hint: "Provide the diff content to apply" }, + ], + execute_command: [{ param: "command", hint: "Provide the command to execute" }], + use_mcp_tool: [ + { param: "server_name", hint: "Specify the MCP server name" }, + { param: "tool_name", hint: "Specify the tool name to call" }, + { param: "arguments", hint: "Provide the tool arguments as JSON" }, + ], + access_mcp_resource: [ + { param: "server_name", hint: "Specify the MCP server name" }, + { param: "uri", hint: "Specify the resource URI" }, + ], + ask_followup_question: [{ param: "question", hint: "Provide the question to ask" }], + attempt_completion: [{ param: "result", hint: "Provide the completion result" }], + new_task: [ + { param: "mode", hint: "Specify the mode for the new task" }, + { param: "message", hint: "Provide the task message" }, + ], +} + +export class ToolErrorHealer { + private logger: Logger + private config: ToolErrorHealerConfig + private corrections: Map = new Map() + + constructor(logger: Logger, config?: Partial) { + this.logger = logger + this.config = { ...DEFAULT_CONFIG, ...config } + } + + getConfig(): ToolErrorHealerConfig { + return { ...this.config } + } + + updateConfig(updates: Partial): void { + this.config = { ...this.config, ...updates } + this.logger.appendLine(`[ToolErrorHealer] Config updated: ${JSON.stringify(updates)}`) + } + + /** + * Get a healing suggestion for a tool error message. + * Parses the error message to extract the missing parameter name, + * then looks up the known fix for that tool+parameter combination. + * Returns a human-readable suggestion string or null if no fix known. + */ + getHealingSuggestion(toolName: string, errorMessage: string): string | null { + if (!this.config.enabled) { + return null + } + + // Try to extract missing parameter name from common error patterns + const paramMatch = errorMessage.match(/parameter\s+'([^']+)'/i) + const missingParam = paramMatch?.[1] + + if (!missingParam) { + return null + } + + const requirements = KNOWN_TOOL_REQUIREMENTS[toolName] + if (!requirements) { + return null + } + + const req = requirements.find((r) => r.param === missingParam) + if (!req) { + return null + } + + return `Suggestion: ${req.hint}` + } + + /** + * Handle a tool parameter error. Returns a fix suggestion or null. + */ + handleToolError(toolName: string, missingParam: string): { fix: string; autoCorrectable: boolean } | null { + if (!this.config.enabled) { + return null + } + + // Check if we have a known fix first (before recording, so unknown tools don't get learned fixes) + const knownFix = this.getKnownFix(toolName, missingParam) + if (knownFix) { + this.logger.appendLine(`[ToolErrorHealer] Known fix for ${toolName}.${missingParam}: ${knownFix.fix}`) + // Record the error for learning purposes + this.recordError(toolName, missingParam) + return knownFix + } + + // For unknown tools, only return a learned fix if we've seen this error before + // and have enough occurrences to auto-correct + const learnedFix = this.getLearnedFix(toolName, missingParam) + if (learnedFix && learnedFix.autoCorrectable) { + this.logger.appendLine(`[ToolErrorHealer] Learned fix for ${toolName}.${missingParam}: ${learnedFix.fix}`) + // Record the error for learning purposes + this.recordError(toolName, missingParam) + return learnedFix + } + + // Record the error for future learning + this.recordError(toolName, missingParam) + + return null + } + + /** + * Get all known parameter requirements for a tool. + */ + getToolRequirements(toolName: string): { param: string; hint: string }[] { + return KNOWN_TOOL_REQUIREMENTS[toolName] ?? [] + } + + /** + * Check if a tool has known parameter requirements. + */ + hasKnownRequirements(toolName: string): boolean { + return toolName in KNOWN_TOOL_REQUIREMENTS + } + + /** + * Get a summary of all recorded corrections for learning. + */ + getCorrectionSummary(): { toolName: string; missingParam: string; occurrences: number }[] { + const summary: { toolName: string; missingParam: string; occurrences: number }[] = [] + for (const [, corrections] of this.corrections) { + for (const c of corrections) { + summary.push({ + toolName: c.toolName, + missingParam: c.missingParam, + occurrences: c.occurrences, + }) + } + } + return summary + } + + private recordError(toolName: string, missingParam: string): void { + if (!this.config.learnFromCorrections) { + return + } + + const key = `${toolName}:${missingParam}` + let toolCorrections = this.corrections.get(key) + + if (toolCorrections) { + const existing = toolCorrections.find((c) => c.toolName === toolName && c.missingParam === missingParam) + if (existing) { + existing.occurrences++ + existing.lastSeen = new Date() + } else { + toolCorrections.push({ + toolName, + missingParam, + fixStrategy: `Ensure '${missingParam}' is provided when calling ${toolName}`, + occurrences: 1, + lastSeen: new Date(), + }) + } + } else { + this.corrections.set(key, [ + { + toolName, + missingParam, + fixStrategy: `Ensure '${missingParam}' is provided when calling ${toolName}`, + occurrences: 1, + lastSeen: new Date(), + }, + ]) + } + + // Trim to max + const allCorrections = this.corrections.get(key) ?? [] + if (allCorrections.length > this.config.maxCorrectionsPerTool) { + allCorrections.sort((a, b) => b.occurrences - a.occurrences) + this.corrections.set(key, allCorrections.slice(0, this.config.maxCorrectionsPerTool)) + } + } + + private getKnownFix(toolName: string, missingParam: string): { fix: string; autoCorrectable: boolean } | null { + const requirements = KNOWN_TOOL_REQUIREMENTS[toolName] + if (!requirements) { + return null + } + + const req = requirements.find((r) => r.param === missingParam) + if (!req) { + return null + } + + return { + fix: req.hint, + autoCorrectable: this.config.autoCorrect, + } + } + + private getLearnedFix(toolName: string, missingParam: string): { fix: string; autoCorrectable: boolean } | null { + const key = `${toolName}:${missingParam}` + const toolCorrections = this.corrections.get(key) + if (!toolCorrections || toolCorrections.length === 0) { + return null + } + + const best = toolCorrections.reduce((a, b) => (a.occurrences > b.occurrences ? a : b)) + + return { + fix: best.fixStrategy, + autoCorrectable: this.config.autoCorrect && best.occurrences >= 2, + } + } + + getStatus(): Record { + return { + enabled: this.config.enabled, + autoCorrect: this.config.autoCorrect, + knownTools: Object.keys(KNOWN_TOOL_REQUIREMENTS).length, + recordedCorrections: this.getCorrectionSummary().length, + } + } +} diff --git a/src/services/self-improving/TranscriptRecall.ts b/src/services/self-improving/TranscriptRecall.ts new file mode 100644 index 0000000000..92c36c8061 --- /dev/null +++ b/src/services/self-improving/TranscriptRecall.ts @@ -0,0 +1,189 @@ +import * as fs from "fs/promises" +import * as path from "path" + +import { safeWriteJson } from "../../utils/safeWriteJson" +import type { Logger } from "./types" + +/** + * Transcript entry — a single recorded event or task outcome + */ +export interface TranscriptEntry { + id: string + timestamp: number + taskId?: string + mode?: string + summary: string + signal: string + workspacePath?: string + toolNames?: string[] + errorKey?: string + success?: boolean +} + +/** + * TranscriptRecall — searchable transcript evidence store. + */ +export class TranscriptRecall { + private readonly filePath: string + private readonly logger: Logger + private entries: TranscriptEntry[] = [] + private initialized = false + private initializePromise: Promise | null = null + + private static readonly MAX_ENTRIES = 1000 + + constructor(baseDir: string, logger: Logger) { + this.filePath = path.join(baseDir, "self-improving", "transcript-recall.json") + this.logger = logger + } + + async initialize(): Promise { + if (this.initialized) { + return + } + + if (!this.initializePromise) { + this.initializePromise = (async () => { + try { + await fs.mkdir(path.dirname(this.filePath), { recursive: true }) + await this.loadFromDisk() + } catch (error) { + this.logger.appendLine( + `[TranscriptRecall] Initialization error: ${error instanceof Error ? error.message : String(error)}`, + ) + } finally { + this.initialized = true + this.initializePromise = null + } + })() + } + + await this.initializePromise + } + + async record(entry: TranscriptEntry): Promise { + if (!this.initialized) { + await this.initialize() + } + + this.entries.push({ + ...entry, + toolNames: entry.toolNames ? [...entry.toolNames] : undefined, + }) + + if (this.entries.length > TranscriptRecall.MAX_ENTRIES) { + this.entries = this.entries.slice(-TranscriptRecall.MAX_ENTRIES) + } + + await this.persist() + } + + search(query: string): TranscriptEntry[] { + const normalizedQuery = query.toLowerCase() + return this.entries + .filter((entry) => { + if (entry.summary.toLowerCase().includes(normalizedQuery)) { + return true + } + + if (entry.signal.toLowerCase().includes(normalizedQuery)) { + return true + } + + if (entry.errorKey?.toLowerCase().includes(normalizedQuery)) { + return true + } + + if (entry.mode?.toLowerCase().includes(normalizedQuery)) { + return true + } + + return entry.toolNames?.some((toolName) => toolName.toLowerCase().includes(normalizedQuery)) ?? false + }) + .slice(-20) + } + + searchBySignal(signal: string): TranscriptEntry[] { + return this.entries.filter((entry) => entry.signal === signal).slice(-50) + } + + searchByErrorKey(errorKey: string): TranscriptEntry[] { + return this.entries.filter((entry) => entry.errorKey === errorKey).slice(-20) + } + + getRecent(count = 10): TranscriptEntry[] { + return this.entries.slice(-count) + } + + get size(): number { + return this.entries.length + } + + async clear(): Promise { + this.entries = [] + await this.persist() + } + + private async loadFromDisk(): Promise { + try { + const raw = await fs.readFile(this.filePath, "utf-8") + const parsed = JSON.parse(raw) + if (Array.isArray(parsed)) { + this.entries = parsed + .map((entry) => this.sanitizeEntry(entry)) + .filter((entry): entry is TranscriptEntry => entry !== null) + .slice(-TranscriptRecall.MAX_ENTRIES) + } + } catch (error: unknown) { + const errorCode = typeof error === "object" && error !== null && "code" in error ? error.code : undefined + if (errorCode !== "ENOENT") { + this.logger.appendLine( + `[TranscriptRecall] Load error: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } + } + + private sanitizeEntry(value: unknown): TranscriptEntry | null { + if (!value || typeof value !== "object") { + return null + } + + const candidate = value as Partial + if ( + typeof candidate.id !== "string" || + typeof candidate.timestamp !== "number" || + typeof candidate.summary !== "string" || + typeof candidate.signal !== "string" + ) { + return null + } + + return { + id: candidate.id, + timestamp: candidate.timestamp, + taskId: typeof candidate.taskId === "string" ? candidate.taskId : undefined, + mode: typeof candidate.mode === "string" ? candidate.mode : undefined, + summary: candidate.summary, + signal: candidate.signal, + workspacePath: typeof candidate.workspacePath === "string" ? candidate.workspacePath : undefined, + toolNames: + Array.isArray(candidate.toolNames) && + candidate.toolNames.every((toolName) => typeof toolName === "string") + ? [...candidate.toolNames] + : undefined, + errorKey: typeof candidate.errorKey === "string" ? candidate.errorKey : undefined, + success: typeof candidate.success === "boolean" ? candidate.success : undefined, + } + } + + private async persist(): Promise { + try { + await safeWriteJson(this.filePath, this.entries, { prettyPrint: true }) + } catch (error) { + this.logger.appendLine( + `[TranscriptRecall] Persist error: ${error instanceof Error ? error.message : String(error)}`, + ) + } + } +} diff --git a/src/services/self-improving/TrustService.ts b/src/services/self-improving/TrustService.ts new file mode 100644 index 0000000000..cd328b5bf2 --- /dev/null +++ b/src/services/self-improving/TrustService.ts @@ -0,0 +1,243 @@ +import type { Logger, Experiments } from "./types" +import type { CodeIndexManager } from "../code-index/manager" + +export interface TrustConfig { + enabled: boolean + autoApproveRead: boolean // auto-approve read operations + autoApproveWrite: boolean // auto-approve write operations + autoApproveCommands: boolean // auto-approve command execution + autoApproveMcp: boolean // auto-approve MCP tool calls + autoApproveModeSwitch: boolean // auto-approve mode switches + maxConsecutiveActions: number // max actions before requiring confirmation (0 = unlimited) + trustedCommands: string[] // specific commands to auto-approve (e.g., ["npm test", "git status"]) + trustedPaths: string[] // file path patterns to auto-approve writes to +} + +const DEFAULT_CONFIG: TrustConfig = { + enabled: false, + autoApproveRead: true, + autoApproveWrite: false, + autoApproveCommands: false, + autoApproveMcp: false, + autoApproveModeSwitch: false, + maxConsecutiveActions: 0, + trustedCommands: [], + trustedPaths: [], +} + +export class TrustService { + private logger: Logger + private config: TrustConfig + private consecutiveActions: number = 0 + /** Tracks whether the current task has already completed, preventing auto-approval of attempt_completion */ + public taskCompleted: boolean = false + private codeIndexManager: CodeIndexManager | undefined + private getExperiments: (() => Experiments | undefined) | undefined + + constructor(logger: Logger, config?: Partial) { + this.logger = logger + this.config = { ...DEFAULT_CONFIG, ...config } + } + + /** + * Set the CodeIndexManager instance for vector-search-based pattern retrieval. + */ + setCodeIndexManager(manager: CodeIndexManager | undefined): void { + this.codeIndexManager = manager + } + + /** + * Set the experiments accessor for feature gating. + */ + setExperimentsAccessor(getExperiments: () => Experiments | undefined): void { + this.getExperiments = getExperiments + } + + getConfig(): TrustConfig { + return { ...this.config } + } + + updateConfig(updates: Partial): void { + this.config = { ...this.config, ...updates } + this.logger.appendLine(`[TrustService] Config updated: ${JSON.stringify(updates)}`) + } + + /** + * Check if a tool/command should be auto-approved. + */ + shouldAutoApprove(toolName: string, params?: { command?: string; path?: string; mode?: string }): boolean { + if (!this.config.enabled) { + return false + } + + // Check max consecutive actions limit + if (this.config.maxConsecutiveActions > 0 && this.consecutiveActions >= this.config.maxConsecutiveActions) { + this.logger.appendLine( + `[TrustService] Max consecutive actions (${this.config.maxConsecutiveActions}) reached, requiring confirmation`, + ) + return false + } + + let approved = false + + switch (toolName) { + case "read_file": + case "search_files": + case "list_files": + approved = this.config.autoApproveRead + break + + case "write_file": + case "edit_file": + case "create_file": + case "delete_file": + if (this.config.autoApproveWrite && params?.path) { + approved = this.isPathTrusted(params.path) + } else { + approved = this.config.autoApproveWrite + } + break + + case "execute_command": + case "bash": + case "powershell": + if (this.config.autoApproveCommands && params?.command) { + approved = this.isCommandTrusted(params.command) + } else { + approved = this.config.autoApproveCommands + } + break + + case "use_mcp_tool": + case "access_mcp_resource": + approved = this.config.autoApproveMcp + break + + case "switch_mode": + approved = this.config.autoApproveModeSwitch + break + + case "ask_followup_question": + // These are always auto-approved (they're user-facing, not dangerous) + approved = true + break + + case "attempt_completion": + // Check if task already completed before auto-approving + if (this.taskCompleted) { + this.logger.appendLine(`[TrustService] Blocked attempt_completion: task already completed`) + return false + } + approved = true + break + + default: + approved = false + } + + if (approved) { + this.consecutiveActions++ + this.logger.appendLine( + `[TrustService] Auto-approved: ${toolName} (consecutive: ${this.consecutiveActions})`, + ) + } + + return approved + } + + /** + * Reset the consecutive action counter (call after user interaction). + */ + resetConsecutiveCounter(): void { + this.consecutiveActions = 0 + } + + /** + * Check if a command is in the trusted list. + */ + private isCommandTrusted(command: string): boolean { + if (this.config.trustedCommands.length === 0) { + return true + } // if autoApproveCommands is on, all commands trusted + + return this.config.trustedCommands.some((trusted) => { + if (trusted.endsWith("*")) { + return command.startsWith(trusted.slice(0, -1)) + } + return command === trusted || command.startsWith(trusted) + }) + } + + /** + * Check if a file path matches trusted patterns. + */ + private isPathTrusted(filePath: string): boolean { + if (this.config.trustedPaths.length === 0) { + return true + } // if autoApproveWrite is on, all paths trusted + + return this.config.trustedPaths.some((pattern) => { + if (pattern.endsWith("*")) { + return filePath.startsWith(pattern.slice(0, -1)) + } + if (pattern.endsWith("/")) { + return filePath.startsWith(pattern) + } + return filePath === pattern || filePath.startsWith(pattern + "/") + }) + } + + /** + * Search for similar patterns using vector search. + * Uses CodeIndexManager to find past successful/failed patterns similar to current context. + * Informs trust scoring decisions. + * Gated behind selfImprovingCodeIndex experiment flag. + */ + async searchSimilarPatterns( + context: string, + ): Promise> { + const experiments = this.getExperiments?.() + if (experiments?.selfImprovingCodeIndex === false) { + return [] + } + + if (!this.codeIndexManager) { + return [] + } + + try { + const results = await this.codeIndexManager.searchIndex(context) + if (!results || results.length === 0) { + return [] + } + + return results + .filter((r) => r.payload?.codeChunk) + .map((r) => ({ + summary: r.payload?.codeChunk?.slice(0, 200) ?? "", + score: r.score, + filePath: r.payload?.filePath, + })) + } catch (error) { + this.logger.appendLine( + `[TrustService] searchSimilarPatterns error: ${error instanceof Error ? error.message : String(error)}`, + ) + return [] + } + } + + getStatus(): Record { + return { + enabled: this.config.enabled, + autoApproveRead: this.config.autoApproveRead, + autoApproveWrite: this.config.autoApproveWrite, + autoApproveCommands: this.config.autoApproveCommands, + autoApproveMcp: this.config.autoApproveMcp, + autoApproveModeSwitch: this.config.autoApproveModeSwitch, + maxConsecutiveActions: this.config.maxConsecutiveActions, + trustedCommands: this.config.trustedCommands.length, + trustedPaths: this.config.trustedPaths.length, + consecutiveActions: this.consecutiveActions, + } + } +} diff --git a/src/services/self-improving/VerificationEngine.test.ts b/src/services/self-improving/VerificationEngine.test.ts new file mode 100644 index 0000000000..8ae0aaa402 --- /dev/null +++ b/src/services/self-improving/VerificationEngine.test.ts @@ -0,0 +1,195 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { VerificationEngine } from "./VerificationEngine" + +describe("VerificationEngine", () => { + let engine: VerificationEngine + let logger: { appendLine: ReturnType } + + beforeEach(() => { + logger = { appendLine: vi.fn() } + engine = new VerificationEngine(logger, { + checkBuild: false, + checkLint: false, + checkTypes: false, + checkTests: false, + gateTimeoutMs: 5000, + mandatory: true, + }) + }) + + describe("config", () => { + it("should use defaults when no config provided", () => { + const e = new VerificationEngine() + const config = e.getConfig() + expect(config.checkBuild).toBe(false) + expect(config.checkLint).toBe(false) + expect(config.checkTypes).toBe(false) + expect(config.checkTests).toBe(false) + expect(config.gateTimeoutMs).toBe(60000) + expect(config.mandatory).toBe(true) + }) + + it("should merge partial config with defaults", () => { + const e = new VerificationEngine(undefined, { + checkBuild: true, + buildCommand: "npm run build", + }) + const config = e.getConfig() + expect(config.checkBuild).toBe(true) + expect(config.buildCommand).toBe("npm run build") + expect(config.checkLint).toBe(false) + expect(config.gateTimeoutMs).toBe(60000) + }) + + it("should update config via updateConfig", () => { + engine.updateConfig({ checkLint: true, lintCommand: "npm run lint" }) + const config = engine.getConfig() + expect(config.checkLint).toBe(true) + expect(config.lintCommand).toBe("npm run lint") + }) + }) + + describe("verify with no gates", () => { + it("should return passed=true with no gates configured", async () => { + const result = await engine.verify() + expect(result.passed).toBe(true) + expect(result.gates).toHaveLength(0) + expect(result.summary).toBe("No verification gates configured") + }) + }) + + describe("verify with gates", () => { + it("should pass when all gates pass", async () => { + engine.updateConfig({ + checkBuild: true, + buildCommand: "echo build-ok", + checkLint: true, + lintCommand: "echo lint-ok", + cwd: "/tmp", + }) + + const result = await engine.verify() + expect(result.passed).toBe(true) + expect(result.gates).toHaveLength(2) + expect(result.gates[0].name).toBe("build") + expect(result.gates[0].passed).toBe(true) + expect(result.gates[1].name).toBe("lint") + expect(result.gates[1].passed).toBe(true) + expect(result.summary).toBe("All 2 verification gates passed") + }) + + it("should fail when build fails", async () => { + engine.updateConfig({ + checkBuild: true, + buildCommand: "false", + cwd: "/tmp", + }) + + const result = await engine.verify() + expect(result.passed).toBe(false) + expect(result.gates).toHaveLength(1) + expect(result.gates[0].name).toBe("build") + expect(result.gates[0].passed).toBe(false) + expect(result.gates[0].error).toBeTruthy() + expect(result.summary).toContain("1/1 gates failed") + }) + + it("should fail when lint fails", async () => { + engine.updateConfig({ + checkLint: true, + lintCommand: "false", + cwd: "/tmp", + }) + + const result = await engine.verify() + expect(result.passed).toBe(false) + expect(result.gates).toHaveLength(1) + expect(result.gates[0].name).toBe("lint") + expect(result.gates[0].passed).toBe(false) + }) + + it("should fail when type check fails", async () => { + engine.updateConfig({ + checkTypes: true, + typeCheckCommand: "false", + cwd: "/tmp", + }) + + const result = await engine.verify() + expect(result.passed).toBe(false) + expect(result.gates).toHaveLength(1) + expect(result.gates[0].name).toBe("type-check") + expect(result.gates[0].passed).toBe(false) + }) + + it("should fail when tests fail", async () => { + engine.updateConfig({ + checkTests: true, + testCommand: "false", + cwd: "/tmp", + }) + + const result = await engine.verify() + expect(result.passed).toBe(false) + expect(result.gates).toHaveLength(1) + expect(result.gates[0].name).toBe("tests") + expect(result.gates[0].passed).toBe(false) + }) + + it("should report multiple failures", async () => { + engine.updateConfig({ + checkBuild: true, + buildCommand: "false", + checkLint: true, + lintCommand: "false", + cwd: "/tmp", + }) + + const result = await engine.verify() + expect(result.passed).toBe(false) + expect(result.gates).toHaveLength(2) + expect(result.gates.every((g) => !g.passed)).toBe(true) + expect(result.summary).toContain("2/2 gates failed") + }) + + it("should record duration for each gate", async () => { + engine.updateConfig({ + checkBuild: true, + buildCommand: "echo build-ok", + cwd: "/tmp", + }) + + const result = await engine.verify() + expect(result.gates).toHaveLength(1) + expect(result.gates[0].durationMs).toBeGreaterThanOrEqual(0) + }) + + it("should timeout when command exceeds gateTimeoutMs", async () => { + engine.updateConfig({ + checkBuild: true, + buildCommand: "sleep 10", + gateTimeoutMs: 100, + cwd: "/tmp", + }) + + const result = await engine.verify() + expect(result.passed).toBe(false) + expect(result.gates[0].name).toBe("build") + expect(result.gates[0].passed).toBe(false) + expect(result.gates[0].error).toBeTruthy() + // Gate should have failed due to timeout — error is present + expect(result.gates[0].durationMs).toBeGreaterThanOrEqual(0) + }) + }) + + describe("mandatory flag", () => { + it("should return mandatory=true by default", () => { + expect(engine.getConfig().mandatory).toBe(true) + }) + + it("should allow setting mandatory=false", () => { + engine.updateConfig({ mandatory: false }) + expect(engine.getConfig().mandatory).toBe(false) + }) + }) +}) diff --git a/src/services/self-improving/VerificationEngine.ts b/src/services/self-improving/VerificationEngine.ts new file mode 100644 index 0000000000..5bd73efe32 --- /dev/null +++ b/src/services/self-improving/VerificationEngine.ts @@ -0,0 +1,156 @@ +import type { Logger } from "./types" + +export interface VerificationResult { + passed: boolean + gates: Array<{ + name: string + passed: boolean + output?: string + error?: string + durationMs: number + }> + summary: string +} + +export interface VerificationConfig { + /** Whether to run build check */ + checkBuild: boolean + /** Whether to run lint check */ + checkLint: boolean + /** Whether to run type check */ + checkTypes: boolean + /** Whether to run tests */ + checkTests: boolean + /** Build command (e.g., "npm run build") */ + buildCommand?: string + /** Lint command (e.g., "npm run lint") */ + lintCommand?: string + /** Type check command (e.g., "npm run typecheck") */ + typeCheckCommand?: string + /** Test command (e.g., "npm test") */ + testCommand?: string + /** Working directory for verification commands */ + cwd?: string + /** Timeout per gate in ms */ + gateTimeoutMs: number + /** Whether verification is mandatory (blocks completion) */ + mandatory: boolean +} + +const DEFAULT_CONFIG: VerificationConfig = { + checkBuild: false, + checkLint: false, + checkTypes: false, + checkTests: false, + gateTimeoutMs: 60_000, + mandatory: true, +} + +export class VerificationEngine { + private config: VerificationConfig + private lastVerifyAt?: number + private lastResult?: VerificationResult + + constructor( + private readonly logger?: Logger, + config?: Partial, + ) { + this.config = { ...DEFAULT_CONFIG, ...config } + } + + updateConfig(config: Partial): void { + this.config = { ...this.config, ...config } + this.logger?.appendLine( + `[VerificationEngine] Config updated: ${JSON.stringify(config)}`, + ) + } + + getConfig(): VerificationConfig { + return { ...this.config } + } + + getStatus(): Record { + return { + enabled: true, + lastVerifyAt: this.lastVerifyAt, + lastResult: this.lastResult, + } + } + + async verify(): Promise { + const gates: VerificationResult["gates"] = [] + + if (this.config.checkBuild && this.config.buildCommand) { + gates.push(await this.runGate("build", this.config.buildCommand)) + } + + if (this.config.checkLint && this.config.lintCommand) { + gates.push(await this.runGate("lint", this.config.lintCommand)) + } + + if (this.config.checkTypes && this.config.typeCheckCommand) { + gates.push(await this.runGate("type-check", this.config.typeCheckCommand)) + } + + if (this.config.checkTests && this.config.testCommand) { + gates.push(await this.runGate("tests", this.config.testCommand)) + } + + const passed = gates.every((g) => g.passed) + const failedGates = gates.filter((g) => !g.passed) + + let summary: string + if (gates.length === 0) { + summary = "No verification gates configured" + } else if (passed) { + summary = `All ${gates.length} verification gates passed` + } else { + summary = `${failedGates.length}/${gates.length} gates failed: ${failedGates.map((g) => g.name).join(", ")}` + } + + this.logger?.appendLine(`[VerificationEngine] ${summary}`) + + this.lastVerifyAt = Date.now() + this.lastResult = { passed, gates, summary } + + return { passed, gates, summary } + } + + private async runGate( + name: string, + command: string, + ): Promise { + const start = Date.now() + + try { + // Use dynamic import for child_process to avoid issues in webview context + const { execSync } = await import("child_process") + + const output = execSync(command, { + cwd: this.config.cwd, + timeout: this.config.gateTimeoutMs, + encoding: "utf-8", + stdio: ["pipe", "pipe", "pipe"], + }) + + const durationMs = Date.now() - start + this.logger?.appendLine( + `[VerificationEngine] Gate "${name}" passed (${durationMs}ms)`, + ) + return { name, passed: true, output: output.slice(0, 1000), durationMs } + } catch (error: any) { + const durationMs = Date.now() - start + const errorMsg = + error?.stderr || error?.stdout || error?.message || String(error) + this.logger?.appendLine( + `[VerificationEngine] Gate "${name}" FAILED (${durationMs}ms): ${errorMsg.slice(0, 200)}`, + ) + return { + name, + passed: false, + error: errorMsg.slice(0, 1000), + durationMs, + } + } + } +} diff --git a/src/services/self-improving/__tests__/ActionExecutor.spec.ts b/src/services/self-improving/__tests__/ActionExecutor.spec.ts new file mode 100644 index 0000000000..aee7a9c804 --- /dev/null +++ b/src/services/self-improving/__tests__/ActionExecutor.spec.ts @@ -0,0 +1,242 @@ +import { ActionExecutor } from "../ActionExecutor" +import type { ImprovementAction } from "../types" + +describe("ActionExecutor", () => { + const logger = { appendLine: vi.fn() } + + beforeEach(() => { + logger.appendLine.mockReset() + }) + + it("writes prompt, error, and tool guidance into memory", async () => { + const memoryStore = { + store: vi.fn().mockResolvedValue({ id: "mem-1" }), + } as any + const skillUsageStore = { getOrCreate: vi.fn() } as any + const executor = new ActionExecutor(memoryStore, skillUsageStore, logger) + + const actions: ImprovementAction[] = [ + { + id: "action-1", + actionType: "PROMPT_ENRICHMENT", + target: "system-prompt", + payload: { summary: "Prefer semantic search before regex search" }, + timestamp: 1, + }, + { + id: "action-2", + actionType: "ERROR_AVOIDANCE", + target: "task-execution", + payload: { summary: "Handle ENOENT before retry", errorKeys: ["ENOENT"] }, + timestamp: 2, + }, + { + id: "action-3", + actionType: "TOOL_PREFERENCE", + target: "task-execution", + payload: { summary: "Use codebase_search before search_files", toolNames: ["codebase_search"] }, + timestamp: 3, + }, + ] + + const succeeded = await executor.executeBatch(actions) + + expect(succeeded).toEqual(new Set(["action-1", "action-2", "action-3"])) + expect(memoryStore.store).toHaveBeenNthCalledWith(1, { + content: "Prefer semantic search before regex search", + source: "learning", + tags: ["learned", "prompt"], + }) + expect(memoryStore.store).toHaveBeenNthCalledWith(2, { + content: "Handle ENOENT before retry", + source: "learning", + tags: ["error-avoidance", "error:ENOENT"], + }) + expect(memoryStore.store).toHaveBeenNthCalledWith(3, { + content: "Use codebase_search before search_files", + source: "learning", + tags: ["tool-preference", "tool:codebase_search"], + }) + }) + + it("records skill suggestions in the telemetry sidecar", async () => { + const memoryStore = { store: vi.fn() } as any + const skillUsageStore = { getOrCreate: vi.fn() } as any + const executor = new ActionExecutor(memoryStore, skillUsageStore, logger) + + const action: ImprovementAction = { + id: "action-skill", + actionType: "SKILL_SUGGESTION", + target: "skills-manager", + payload: { + summary: "Create a self-improving review skill", + skillName: "Self Improving Review Skill", + }, + timestamp: 1, + } + + await expect(executor.execute(action)).resolves.toBe(true) + expect(skillUsageStore.getOrCreate).toHaveBeenCalledWith( + expect.stringMatching(/^suggested:/), + "Self Improving Review Skill", + "agent", + ) + expect(logger.appendLine).toHaveBeenCalledWith( + "[ActionExecutor] Skill suggestion recorded: Create a self-improving review skill", + ) + }) + + it("creates agent-managed skills from mutation actions", async () => { + const memoryStore = { store: vi.fn() } as any + const skillUsageStore = { getOrCreate: vi.fn() } as any + const skillsManager = { + createSkillFromContent: vi + .fn() + .mockResolvedValue("/tmp/.roo/skills/workflow-read-file-search-files/SKILL.md"), + } as any + const executor = new ActionExecutor(memoryStore, skillUsageStore, logger, skillsManager) + + const action: ImprovementAction = { + id: "action-skill-create", + actionType: "SKILL_CREATE", + target: "skills-manager", + payload: { + skillName: "workflow-read-file-search-files", + description: "Use when tasks repeatedly succeed with read_file and search_files.", + content: + "---\nname: workflow-read-file-search-files\ndescription: Use when tasks repeatedly succeed with read_file and search_files.\n---\n\n# Workflow\n", + source: "project", + modeSlugs: ["code"], + }, + timestamp: 1, + } + + await expect(executor.execute(action)).resolves.toBe(true) + expect(skillsManager.createSkillFromContent).toHaveBeenCalledWith( + "workflow-read-file-search-files", + "project", + "Use when tasks repeatedly succeed with read_file and search_files.", + expect.stringContaining("name: workflow-read-file-search-files"), + ["code"], + ) + expect(skillUsageStore.getOrCreate).toHaveBeenCalledWith( + "skill:project:workflow-read-file-search-files", + "workflow-read-file-search-files", + "agent", + ) + }) + + it("creates specialized skills from scratch via SKILL_CREATE_FROM_SCRATCH", async () => { + const memoryStore = { store: vi.fn() } as any + const skillUsageStore = { getOrCreate: vi.fn() } as any + const skillsManager = { + createSkillFromContent: vi.fn().mockResolvedValue("/tmp/.roo/skills/react-component-builder/SKILL.md"), + } as any + const executor = new ActionExecutor(memoryStore, skillUsageStore, logger, skillsManager) + + const action: ImprovementAction = { + id: "action-specialized", + actionType: "SKILL_CREATE_FROM_SCRATCH", + target: "skills-manager", + payload: { + name: "react-component-builder", + description: "Specialized skill for building React components", + instructions: "# React Component Builder\n\n## Instructions\n\n1. Create component file\n2. Add TypeScript types\n3. Write tests", + source: "project", + modeSlugs: ["code"], + tools: ["read_file", "write_to_file", "search_files"], + assets: [ + { relativePath: "scripts/validate.sh", content: "#!/bin/bash\necho validate" }, + ], + }, + timestamp: 1, + } + + await expect(executor.execute(action)).resolves.toBe(true) + expect(skillsManager.createSkillFromContent).toHaveBeenCalledWith( + "react-component-builder", + "project", + "Specialized skill for building React components", + expect.stringContaining("name: react-component-builder"), + ["code"], + ) + expect(skillUsageStore.getOrCreate).toHaveBeenCalledWith( + expect.stringMatching(/^skill:project:react-component-builder/), + "react-component-builder", + "agent", + ) + }) + + it("rejects SKILL_CREATE_FROM_SCRATCH with missing required fields", async () => { + const memoryStore = { store: vi.fn() } as any + const skillUsageStore = { getOrCreate: vi.fn() } as any + const skillsManager = { + createSkillFromContent: vi.fn(), + } as any + const executor = new ActionExecutor(memoryStore, skillUsageStore, logger, skillsManager) + + const action: ImprovementAction = { + id: "action-incomplete", + actionType: "SKILL_CREATE_FROM_SCRATCH", + target: "skills-manager", + payload: { + name: "incomplete-skill", + // missing description and instructions + }, + timestamp: 1, + } + + await expect(executor.execute(action)).resolves.toBe(false) + expect(skillsManager.createSkillFromContent).not.toHaveBeenCalled() + }) + + it("updates existing agent-managed skills from mutation actions", async () => { + const memoryStore = { store: vi.fn() } as any + const skillUsageStore = { + getOrCreate: vi.fn(), + bumpPatch: vi.fn().mockResolvedValue(undefined), + } as any + const skillsManager = { + updateSkillContent: vi.fn().mockResolvedValue(undefined), + } as any + const executor = new ActionExecutor(memoryStore, skillUsageStore, logger, skillsManager) + + const action: ImprovementAction = { + id: "action-skill-update", + actionType: "SKILL_UPDATE", + target: "skills-manager", + payload: { + skillId: "skill:project:workflow-read-file-search-files", + skillName: "workflow-read-file-search-files", + content: + "---\nname: workflow-read-file-search-files\ndescription: Updated workflow\n---\n\n# Workflow\nUpdated\n", + source: "project", + mode: "code", + }, + timestamp: 1, + } + + await expect(executor.execute(action)).resolves.toBe(true) + expect(skillsManager.updateSkillContent).toHaveBeenCalledWith( + "workflow-read-file-search-files", + "project", + expect.stringContaining("Updated workflow"), + "code", + ) + expect(skillUsageStore.bumpPatch).toHaveBeenCalledWith("skill:project:workflow-read-file-search-files") + }) + + it("keeps invalid actions pending by reporting failure", async () => { + const executor = new ActionExecutor({ store: vi.fn() } as any, { getOrCreate: vi.fn() } as any, logger) + + await expect( + executor.execute({ + id: "invalid", + actionType: "PROMPT_ENRICHMENT", + target: "system-prompt", + payload: {}, + timestamp: 1, + }), + ).resolves.toBe(false) + }) +}) diff --git a/src/services/self-improving/__tests__/AgentMemoryAdapter.spec.ts b/src/services/self-improving/__tests__/AgentMemoryAdapter.spec.ts new file mode 100644 index 0000000000..5009c3630b --- /dev/null +++ b/src/services/self-improving/__tests__/AgentMemoryAdapter.spec.ts @@ -0,0 +1,134 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest" + +import { AgentMemoryAdapter } from "../AgentMemoryAdapter" + +describe("AgentMemoryAdapter", () => { + const logger = { appendLine: vi.fn() } + const adapters: AgentMemoryAdapter[] = [] + + beforeEach(() => { + logger.appendLine.mockReset() + }) + + afterEach(async () => { + await Promise.all(adapters.splice(0).map((adapter) => adapter.dispose())) + vi.unstubAllGlobals() + vi.restoreAllMocks() + }) + + function createAdapter(): AgentMemoryAdapter { + const adapter = new AgentMemoryAdapter(logger) + adapters.push(adapter) + return adapter + } + + it("should report unavailable when server is not running", async () => { + vi.stubGlobal("fetch", vi.fn().mockRejectedValue(new Error("Connection refused"))) + + const adapter = createAdapter() + await adapter.initialize() + + const stats = await adapter.getStats() + expect(stats.backend).toContain("unavailable") + expect(stats.entryCount).toBe(0) + }) + + it("should return null from store when unavailable", async () => { + vi.stubGlobal("fetch", vi.fn().mockRejectedValue(new Error("Connection refused"))) + + const adapter = createAdapter() + await adapter.initialize() + + const result = await adapter.store({ + content: "test memory", + source: "learning", + }) + + expect(result).toBeNull() + }) + + it("should return empty array from search when unavailable", async () => { + vi.stubGlobal("fetch", vi.fn().mockRejectedValue(new Error("Connection refused"))) + + const adapter = createAdapter() + await adapter.initialize() + + const results = await adapter.search("test") + expect(results).toEqual([]) + }) + + it("should return empty array from recall when unavailable", async () => { + vi.stubGlobal("fetch", vi.fn().mockRejectedValue(new Error("Connection refused"))) + + const adapter = createAdapter() + await adapter.initialize() + + const results = await adapter.recall() + expect(results).toEqual([]) + }) + + it("should ignore empty forgetByContent queries", async () => { + const fetchSpy = vi.fn() + vi.stubGlobal("fetch", fetchSpy) + + const adapter = createAdapter() + + await expect(adapter.forgetByContent(" ")).resolves.toBe(0) + expect(fetchSpy).not.toHaveBeenCalled() + }) + + it("should trim forgetByContent queries before searching", async () => { + const fetchSpy = vi.fn(async (input: string, init?: RequestInit) => { + if (input.endsWith("/agentmemory/livez")) { + return { ok: true } as Response + } + + if (input.endsWith("/agentmemory/search")) { + const body = JSON.parse(String(init?.body)) as { query: string } + expect(body.query).toBe("Foo") + return { + ok: true, + json: async () => ({ + results: [ + { + observation: { id: "memory-1", content: "foo memory" }, + score: 1, + }, + ], + }), + } as Response + } + + if (input.endsWith("/agentmemory/forget")) { + return { ok: true, json: async () => ({ success: true }) } as Response + } + + throw new Error(`Unexpected fetch call: ${input}`) + }) + vi.stubGlobal("fetch", fetchSpy) + + const adapter = createAdapter() + await adapter.initialize() + + await expect(adapter.forgetByContent(" Foo ")).resolves.toBe(1) + }) + + it("should clean up health check interval on dispose", async () => { + vi.stubGlobal("fetch", vi.fn().mockRejectedValue(new Error("Connection refused"))) + + const adapter = createAdapter() + await adapter.initialize() + await adapter.dispose() + + const result = await adapter.store({ + content: "test", + source: "learning", + }) + expect(result).toBeNull() + }) + + it("should have correct backend type", () => { + const adapter = createAdapter() + expect(adapter.backendType).toBe("agentmemory") + }) +}) diff --git a/src/services/self-improving/__tests__/AutoModeOrchestrator.spec.ts b/src/services/self-improving/__tests__/AutoModeOrchestrator.spec.ts new file mode 100644 index 0000000000..0c36741bea --- /dev/null +++ b/src/services/self-improving/__tests__/AutoModeOrchestrator.spec.ts @@ -0,0 +1,302 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest" +import { AutoModeOrchestrator } from "../AutoModeOrchestrator" +import type { ToolErrorHealer } from "../ToolErrorHealer" +import type { ResilienceService } from "../ResilienceService" + +describe("AutoModeOrchestrator", () => { + let orchestrator: AutoModeOrchestrator + let mockLogger: { appendLine: ReturnType } + let mockHealer: Partial + let mockResilience: Partial + + beforeEach(() => { + mockLogger = { appendLine: vi.fn() } + mockHealer = { + handleToolError: vi.fn(), + } + mockResilience = { + onStreamingFailure: vi.fn(), + } + orchestrator = new AutoModeOrchestrator(mockLogger as any) + }) + + afterEach(() => { + orchestrator.stop() + }) + + describe("constructor", () => { + it("should use default config when no config provided", () => { + const config = orchestrator.getConfig() + expect(config.enabled).toBe(true) + expect(config.autoCreateModes).toBe(true) + expect(config.autoHeal).toBe(true) + expect(config.minPatternConfidence).toBe(0.3) + expect(config.minPatternFrequency).toBe(2) + expect(config.reviewIntervalMs).toBe(30000) + }) + + it("should merge provided config with defaults", () => { + const custom = new AutoModeOrchestrator(mockLogger as any, { + enabled: false, + reviewIntervalMs: 10000, + }) + const config = custom.getConfig() + expect(config.enabled).toBe(false) + expect(config.autoCreateModes).toBe(true) // from defaults + expect(config.reviewIntervalMs).toBe(10000) + custom.stop() + }) + + it("should accept optional ToolErrorHealer and ResilienceService", () => { + const custom = new AutoModeOrchestrator( + mockLogger as any, + undefined, + mockHealer as ToolErrorHealer, + mockResilience as ResilienceService, + ) + expect(custom).toBeInstanceOf(AutoModeOrchestrator) + custom.stop() + }) + }) + + describe("updateConfig", () => { + it("should update config values", () => { + orchestrator.updateConfig({ enabled: false, autoCreateModes: false }) + const config = orchestrator.getConfig() + expect(config.enabled).toBe(false) + expect(config.autoCreateModes).toBe(false) + expect(config.autoHeal).toBe(true) // unchanged + }) + }) + + describe("start/stop", () => { + it("should not start timer when disabled", async () => { + const disabled = new AutoModeOrchestrator(mockLogger as any, { enabled: false }) + await disabled.start() + disabled.stop() // should be safe + }) + + it("should start timer when enabled", async () => { + await orchestrator.start() + orchestrator.stop() + expect(mockLogger.appendLine).toHaveBeenCalledWith(expect.stringContaining("stopped")) + }) + }) + + describe("onTaskCompleted", () => { + it("should not process when disabled", async () => { + orchestrator.updateConfig({ enabled: false }) + await orchestrator.onTaskCompleted(true) + await orchestrator.onTaskCompleted(false) + }) + + it("should trigger auto-heal on failure", async () => { + orchestrator.setPatternAnalyzer({} as any) + + await orchestrator.onTaskCompleted(false) + expect(mockLogger.appendLine).toHaveBeenCalledWith(expect.stringContaining("failure #1 detected")) + }) + + it("should not trigger auto-heal on success", async () => { + // Spy on private autoHeal by checking logger output + await orchestrator.onTaskCompleted(true) + // Should not log failure-related messages + const failureLogs = mockLogger.appendLine.mock.calls.filter( + (call: any[]) => typeof call[0] === "string" && call[0].includes("failure"), + ) + expect(failureLogs.length).toBe(0) + }) + }) + + describe("autoHeal — ToolErrorHealer integration", () => { + it("should call ToolErrorHealer when missing parameter error detected", async () => { + const healer: Partial = { + handleToolError: vi.fn().mockReturnValue({ + fix: "Provide a valid regex pattern for the search", + autoCorrectable: true, + }), + } + const orch = new AutoModeOrchestrator(mockLogger as any, undefined, healer as ToolErrorHealer) + orch.setPatternAnalyzer({} as any) + orch.recordFailure("search_files", "Missing required parameter: regex") + + await orch.onTaskCompleted(false) + + expect(healer.handleToolError).toHaveBeenCalledWith("search_files", "regex") + expect(mockLogger.appendLine).toHaveBeenCalledWith( + expect.stringContaining("Auto-heal: applied tool error fix"), + ) + orch.stop() + }) + + it("should not call ToolErrorHealer when no missing parameter error", async () => { + const healer: Partial = { + handleToolError: vi.fn(), + } + const orch = new AutoModeOrchestrator(mockLogger as any, undefined, healer as ToolErrorHealer) + orch.setPatternAnalyzer({} as any) + orch.recordFailure("search_files", "Connection timeout") + + await orch.onTaskCompleted(false) + + expect(healer.handleToolError).not.toHaveBeenCalled() + orch.stop() + }) + + it("should fall through when ToolErrorHealer returns null", async () => { + const healer: Partial = { + handleToolError: vi.fn().mockReturnValue(null), + } + const orch = new AutoModeOrchestrator(mockLogger as any, undefined, healer as ToolErrorHealer) + orch.setPatternAnalyzer({} as any) + orch.recordFailure("search_files", "Missing required parameter: regex") + + await orch.onTaskCompleted(false) + + expect(healer.handleToolError).toHaveBeenCalled() + // Should fall through to pattern analysis fallback + expect(mockLogger.appendLine).toHaveBeenCalledWith(expect.stringContaining("queued for pattern analysis")) + orch.stop() + }) + }) + + describe("autoHeal — ResilienceService integration", () => { + it("should call ResilienceService for transient failures", async () => { + const resilience: Partial = { + onStreamingFailure: vi.fn().mockReturnValue(2000), + } + const orch = new AutoModeOrchestrator( + mockLogger as any, + undefined, + undefined, + resilience as ResilienceService, + ) + orch.setPatternAnalyzer({} as any) + orch.recordFailure("execute_command", "Network timeout") + + await orch.onTaskCompleted(false) + + expect(resilience.onStreamingFailure).toHaveBeenCalled() + expect(mockLogger.appendLine).toHaveBeenCalledWith( + expect.stringContaining("Auto-heal: scheduled retry in 2000ms"), + ) + orch.stop() + }) + + it("should handle max retries exceeded from ResilienceService", async () => { + const resilience: Partial = { + onStreamingFailure: vi.fn().mockReturnValue(-1), + } + const orch = new AutoModeOrchestrator( + mockLogger as any, + undefined, + undefined, + resilience as ResilienceService, + ) + orch.setPatternAnalyzer({} as any) + orch.recordFailure("execute_command", "Network timeout") + + await orch.onTaskCompleted(false) + + expect(resilience.onStreamingFailure).toHaveBeenCalled() + expect(mockLogger.appendLine).toHaveBeenCalledWith(expect.stringContaining("max retries exceeded")) + orch.stop() + }) + }) + + describe("autoHeal — setter injection", () => { + it("should accept ToolErrorHealer via setter", () => { + const orch = new AutoModeOrchestrator(mockLogger as any) + orch.setToolErrorHealer(mockHealer as ToolErrorHealer) + // No error means success + orch.stop() + }) + + it("should accept ResilienceService via setter", () => { + const orch = new AutoModeOrchestrator(mockLogger as any) + orch.setResilienceService(mockResilience as ResilienceService) + orch.stop() + }) + }) + + describe("getStatus", () => { + it("should return status object", () => { + const status = orchestrator.getStatus() + expect(status.autoModeEnabled).toBe(true) + expect(status.autoCreateModes).toBe(true) + expect(status.createdModes).toBe(0) + expect(status.createdModeSlugs).toEqual([]) + expect(status.lastModeCreation).toBe("never") + }) + }) + + describe("autoCreateModes", () => { + it("should create modes from candidate patterns", async () => { + const mockFactory = { + createModesFromPatterns: vi.fn().mockResolvedValue(["read_file-mode", "command-mode"]), + } + orchestrator.setModeFactory(mockFactory as any) + orchestrator.setPatternProvider(() => [ + { + id: "p1", + patternType: "tool", + state: "active", + summary: "pattern 1", + confidenceScore: 0.5, + frequency: 3, + successRate: 0.8, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["read_file"] }, + }, + { + id: "p2", + patternType: "tool", + state: "active", + summary: "pattern 2", + confidenceScore: 0.6, + frequency: 4, + successRate: 0.9, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["execute_command"] }, + }, + ]) + + // Trigger auto-create via onTaskCompleted (throttle bypass: first call always allowed) + await orchestrator.onTaskCompleted(true) + + expect(mockFactory.createModesFromPatterns).toHaveBeenCalled() + expect(mockLogger.appendLine).toHaveBeenCalledWith(expect.stringContaining("Created 2 custom modes")) + }) + + it("should skip patterns below confidence threshold", async () => { + const mockFactory = { + createModesFromPatterns: vi.fn().mockResolvedValue([]), + } + orchestrator.setModeFactory(mockFactory as any) + orchestrator.setPatternProvider(() => [ + { + id: "p-low", + patternType: "tool", + state: "active", + summary: "low confidence", + confidenceScore: 0.1, + frequency: 5, + successRate: 0.5, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["read_file"] }, + }, + ]) + + await orchestrator.onTaskCompleted(true) + + // No candidates → createModesFromPatterns should not be called + expect(mockFactory.createModesFromPatterns).not.toHaveBeenCalled() + }) + }) +}) diff --git a/src/services/self-improving/__tests__/CodeIndexAdapter.spec.ts b/src/services/self-improving/__tests__/CodeIndexAdapter.spec.ts new file mode 100644 index 0000000000..47e5896985 --- /dev/null +++ b/src/services/self-improving/__tests__/CodeIndexAdapter.spec.ts @@ -0,0 +1,73 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { CodeIndexAdapter } from "../CodeIndexAdapter" +import type { CodeIndexManager } from "../../code-index/manager" +import type { VectorStoreSearchResult } from "../../code-index/interfaces/vector-store" + +describe("CodeIndexAdapter", () => { + let adapter: CodeIndexAdapter + let mockManager: CodeIndexManager + + beforeEach(() => { + mockManager = { + getCurrentStatus: vi.fn().mockReturnValue({ + systemStatus: "Indexed", + fileStatuses: {}, + }), + searchIndex: vi.fn().mockResolvedValue([]), + } as unknown as CodeIndexManager + + adapter = new CodeIndexAdapter(undefined, mockManager) + }) + + describe("searchVectorStore", () => { + it("returns empty array when manager is not set", async () => { + const adapterWithoutManager = new CodeIndexAdapter() + const result = await adapterWithoutManager.searchVectorStore("test query") + expect(result).toEqual([]) + }) + + it("returns search results from manager", async () => { + const mockResults: VectorStoreSearchResult[] = [ + { + id: "file1", + score: 0.95, + payload: { + filePath: "src/test.ts", + startLine: 1, + endLine: 10, + codeChunk: "console.log('hello')", + }, + }, + ] + vi.mocked(mockManager.searchIndex).mockResolvedValue(mockResults) + + const result = await adapter.searchVectorStore("test query") + expect(result).toEqual(mockResults) + expect(mockManager.searchIndex).toHaveBeenCalledWith("test query", undefined) + }) + + it("passes directoryPrefix to manager", async () => { + vi.mocked(mockManager.searchIndex).mockResolvedValue([]) + + await adapter.searchVectorStore("test query", "src/utils") + expect(mockManager.searchIndex).toHaveBeenCalledWith("test query", "src/utils") + }) + + it("returns empty array on search error", async () => { + vi.mocked(mockManager.searchIndex).mockRejectedValue(new Error("search failed")) + + const result = await adapter.searchVectorStore("test query") + expect(result).toEqual([]) + }) + + it("returns empty array when manager is set via setCodeIndexManager", async () => { + const freshAdapter = new CodeIndexAdapter() + freshAdapter.setCodeIndexManager(mockManager) + vi.mocked(mockManager.searchIndex).mockResolvedValue([]) + + const result = await freshAdapter.searchVectorStore("test query") + expect(result).toEqual([]) + expect(mockManager.searchIndex).toHaveBeenCalledWith("test query", undefined) + }) + }) +}) diff --git a/src/services/self-improving/__tests__/CuratorService.spec.ts b/src/services/self-improving/__tests__/CuratorService.spec.ts new file mode 100644 index 0000000000..a6a5556b5a --- /dev/null +++ b/src/services/self-improving/__tests__/CuratorService.spec.ts @@ -0,0 +1,139 @@ +import * as fs from "fs/promises" +import * as os from "os" +import * as path from "path" + +import { CuratorService } from "../CuratorService" +import { SkillUsageStore } from "../SkillUsageStore" + +const DAY_MS = 24 * 60 * 60 * 1000 + +describe("CuratorService", () => { + let tempDir: string + let logger: { appendLine: ReturnType } + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "zoo-curator-")) + logger = { appendLine: vi.fn() } + }) + + afterEach(async () => { + await fs.rm(tempDir, { recursive: true, force: true }) + }) + + async function seedSkillUsage(records: unknown[]): Promise { + const targetDir = path.join(tempDir, "self-improving") + await fs.mkdir(targetDir, { recursive: true }) + await fs.writeFile(path.join(targetDir, "skill-usage.json"), JSON.stringify(records, null, 2), "utf-8") + } + + it("defers the first run once before applying transitions", async () => { + const now = Date.now() + await seedSkillUsage([ + { + skillId: "agent-skill", + skillName: "Agent Skill", + createdBy: "agent", + state: "active", + pinned: false, + viewCount: 1, + useCount: 1, + patchCount: 0, + createdAt: now - 30 * DAY_MS, + lastActivityAt: now - 20 * DAY_MS, + }, + ]) + + const skillUsageStore = new SkillUsageStore(tempDir, logger) + await skillUsageStore.initialize() + + const service = new CuratorService(tempDir, skillUsageStore, logger, { + intervalMs: 0, + minIdleMs: 0, + firstRunDeferred: true, + backupsEnabled: false, + }) + await service.initialize() + + const deferred = await service.run(now) + expect(deferred.error).toBe("Skipped: first-run deferral") + expect(deferred.transitions).toHaveLength(0) + + const applied = await service.run(now + 1) + expect(applied.transitions).toHaveLength(1) + expect(applied.transitions[0]).toMatchObject({ + skillId: "agent-skill", + fromState: "active", + toState: "stale", + }) + expect(skillUsageStore.get("agent-skill")?.state).toBe("stale") + }) + + it("writes backups and reports while protecting pinned and non-agent skills", async () => { + const now = Date.now() + await seedSkillUsage([ + { + skillId: "agent-skill", + skillName: "Agent Skill", + createdBy: "agent", + state: "active", + pinned: false, + viewCount: 1, + useCount: 1, + patchCount: 0, + createdAt: now - 30 * DAY_MS, + lastActivityAt: now - 20 * DAY_MS, + }, + { + skillId: "user-skill", + skillName: "User Skill", + createdBy: "user", + state: "active", + pinned: false, + viewCount: 1, + useCount: 1, + patchCount: 0, + createdAt: now - 30 * DAY_MS, + lastActivityAt: now - 20 * DAY_MS, + }, + { + skillId: "pinned-skill", + skillName: "Pinned Skill", + createdBy: "agent", + state: "active", + pinned: true, + viewCount: 1, + useCount: 1, + patchCount: 0, + createdAt: now - 30 * DAY_MS, + lastActivityAt: now - 20 * DAY_MS, + }, + ]) + + const skillUsageStore = new SkillUsageStore(tempDir, logger) + await skillUsageStore.initialize() + + const service = new CuratorService(tempDir, skillUsageStore, logger, { + intervalMs: 0, + minIdleMs: 0, + firstRunDeferred: false, + backupsEnabled: true, + }) + await service.initialize() + + const report = await service.run(now) + expect(report.backupPath).toBeTruthy() + expect(report.transitions).toHaveLength(1) + expect(skillUsageStore.get("agent-skill")?.state).toBe("stale") + expect(skillUsageStore.get("user-skill")?.state).toBe("active") + expect(skillUsageStore.get("pinned-skill")?.state).toBe("active") + + const runDir = path.join(tempDir, "self-improving", "curator", "reports", report.runId) + const runJson = JSON.parse(await fs.readFile(path.join(runDir, "run.json"), "utf-8")) + const markdown = await fs.readFile(path.join(runDir, "REPORT.md"), "utf-8") + const latest = await service.getLatestReport() + + expect(runJson.runId).toBe(report.runId) + expect(markdown).toContain("# Curator Run Report") + expect(latest?.runId).toBe(report.runId) + }) +}) diff --git a/src/services/self-improving/__tests__/ImprovementApplier.spec.ts b/src/services/self-improving/__tests__/ImprovementApplier.spec.ts new file mode 100644 index 0000000000..19796cd31a --- /dev/null +++ b/src/services/self-improving/__tests__/ImprovementApplier.spec.ts @@ -0,0 +1,278 @@ +import { ImprovementApplier } from "../ImprovementApplier" +import type { LearnedPattern } from "../types" + +function createToolPattern(): LearnedPattern { + return { + id: "pattern-tool", + patternType: "tool", + state: "active", + summary: "Effective tool combination: read_file,search_files", + confidenceScore: 0.82, + frequency: 4, + successRate: 0.9, + firstSeenAt: 1, + lastSeenAt: 2, + sourceSignals: ["TASK_SUCCESS"], + context: { + toolNames: ["read_file", "search_files"], + modes: ["code"], + }, + } +} + +describe("ImprovementApplier", () => { + it("creates agent skill actions for repeated tool workflows", () => { + const applier = new ImprovementApplier({ + getSkillNames: () => [], + getSkillProvenance: () => "unknown", + isAutoSkillsEnabled: () => true, + }) + + const actions = applier.generateActions([createToolPattern()]) + const skillAction = actions.find((action) => action.actionType === "SKILL_CREATE") + + expect(skillAction).toBeDefined() + expect(skillAction?.payload.skillName).toBe("workflow-read-file-search-files") + expect(skillAction?.payload.source).toBe("project") + expect(skillAction?.payload.description).toContain("read_file") + expect(skillAction?.payload.content).toContain("name: workflow-read-file-search-files") + expect(skillAction?.payload.content).toContain("`read_file`") + }) + + it("updates existing agent-created workflow skills instead of recreating them", () => { + const applier = new ImprovementApplier({ + getSkillNames: () => ["workflow-read-file-search-files"], + getSkillProvenance: () => "agent", + isAutoSkillsEnabled: () => true, + }) + + const actions = applier.generateActions([createToolPattern()]) + + expect(actions.some((action) => action.actionType === "SKILL_UPDATE")).toBe(true) + expect(actions.some((action) => action.actionType === "SKILL_CREATE")).toBe(false) + }) + + it("does not emit skill mutation actions when auto-skills are disabled", () => { + const applier = new ImprovementApplier({ + getSkillNames: () => [], + getSkillProvenance: () => "unknown", + isAutoSkillsEnabled: () => false, + }) + + const actions = applier.generateActions([createToolPattern()]) + + expect(actions.some((action) => action.actionType === "SKILL_CREATE")).toBe(false) + expect(actions.some((action) => action.actionType === "SKILL_UPDATE")).toBe(false) + }) + + it("creates global skills when auto-skills scope is global", () => { + const applier = new ImprovementApplier({ + getSkillNames: () => [], + getSkillProvenance: () => "unknown", + isAutoSkillsEnabled: () => true, + getAutoSkillsScope: () => "global", + }) + + const actions = applier.generateActions([createToolPattern()]) + const skillAction = actions.find((action) => action.actionType === "SKILL_CREATE") + + expect(skillAction?.payload.source).toBe("global") + expect(skillAction?.payload.skillId).toBe("skill:global:workflow-read-file-search-files") + }) + + it("creates a global skill when only a project-scoped skill with the same name exists", () => { + const applier = new ImprovementApplier({ + getSkillNames: () => ["workflow-read-file-search-files"], + getSkillProvenance: () => "agent", + isAutoSkillsEnabled: () => true, + getAutoSkillsScope: () => "global", + ...({ + hasSkill: (name: string, source: "global" | "project") => + name === "workflow-read-file-search-files" && source === "project", + getSkillProvenanceForSource: (_name: string, source: "global" | "project") => + source === "project" ? "agent" : "unknown", + } as any), + } as any) + + const actions = applier.generateActions([createToolPattern()]) + + expect(actions.some((action) => action.actionType === "SKILL_CREATE")).toBe(true) + expect(actions.some((action) => action.actionType === "SKILL_UPDATE")).toBe(false) + }) + + it("defaults to source-aware skill existence checks when only getSkillNames is provided", () => { + const applier = new ImprovementApplier({ + getSkillNames: () => ["workflow-read-file-search-files"], + getSkillProvenance: () => "agent", + isAutoSkillsEnabled: () => true, + getAutoSkillsScope: () => "global", + }) + + const actions = applier.generateActions([createToolPattern()]) + + expect(actions.some((action) => action.actionType === "SKILL_CREATE")).toBe(true) + expect(actions.some((action) => action.actionType === "SKILL_UPDATE")).toBe(false) + }) + + describe("specialized skills (SKILL_CREATE_FROM_SCRATCH)", () => { + function createReactPattern(): LearnedPattern { + return { + id: "pattern-react", + patternType: "tool", + state: "active", + summary: "Building React components with TypeScript", + confidenceScore: 0.85, + frequency: 5, + successRate: 0.95, + firstSeenAt: 1, + lastSeenAt: 10, + sourceSignals: ["TASK_SUCCESS"], + context: { + toolNames: ["write_to_file", "read_file", "search_files"], + modes: ["code"], + }, + } + } + + function createApiPattern(): LearnedPattern { + return { + id: "pattern-api", + patternType: "tool", + state: "active", + summary: "Creating REST API endpoints with Express", + confidenceScore: 0.78, + frequency: 4, + successRate: 0.88, + firstSeenAt: 2, + lastSeenAt: 8, + sourceSignals: ["TASK_SUCCESS"], + context: { + toolNames: ["write_to_file", "read_file", "search_files"], + modes: ["code"], + }, + } + } + + function createLowConfidencePattern(): LearnedPattern { + return { + id: "pattern-low", + patternType: "tool", + state: "active", + summary: "Building React components with TypeScript", + confidenceScore: 0.4, + frequency: 2, + successRate: 0.6, + firstSeenAt: 1, + lastSeenAt: 2, + sourceSignals: ["TASK_SUCCESS"], + context: { + toolNames: ["write_to_file"], + modes: ["code"], + }, + } + } + + it("generates SKILL_CREATE_FROM_SCRATCH for high-confidence domain patterns", () => { + const applier = new ImprovementApplier({ + getSkillNames: () => [], + getSkillProvenance: () => "unknown", + isAutoSkillsEnabled: () => true, + getExperiments: () => ({ selfImprovingSpecializedSkills: true } as any), + }) + + const actions = applier.generateActions([createReactPattern()]) + const specializedAction = actions.find( + (action) => action.actionType === "SKILL_CREATE_FROM_SCRATCH", + ) + + expect(specializedAction).toBeDefined() + expect(specializedAction?.payload.name).toContain("react-component") + expect(specializedAction?.payload.description).toContain("react-component") + expect(specializedAction?.payload.instructions).toContain("React Component") + expect(specializedAction?.payload.tools).toEqual(["write_to_file", "read_file", "search_files"]) + expect(specializedAction?.payload.modeSlugs).toEqual(["code"]) + }) + + it("generates SKILL_CREATE_FROM_SCRATCH for API domain patterns", () => { + const applier = new ImprovementApplier({ + getSkillNames: () => [], + getSkillProvenance: () => "unknown", + isAutoSkillsEnabled: () => true, + getExperiments: () => ({ selfImprovingSpecializedSkills: true } as any), + }) + + const actions = applier.generateActions([createApiPattern()]) + const specializedAction = actions.find( + (action) => action.actionType === "SKILL_CREATE_FROM_SCRATCH", + ) + + expect(specializedAction).toBeDefined() + expect(specializedAction?.payload.name).toContain("api-endpoint") + }) + + it("does not generate specialized skills for low-confidence patterns", () => { + const applier = new ImprovementApplier({ + getSkillNames: () => [], + getSkillProvenance: () => "unknown", + isAutoSkillsEnabled: () => true, + getExperiments: () => ({ selfImprovingSpecializedSkills: true } as any), + }) + + const actions = applier.generateActions([createLowConfidencePattern()]) + const specializedAction = actions.find( + (action) => action.actionType === "SKILL_CREATE_FROM_SCRATCH", + ) + + expect(specializedAction).toBeUndefined() + }) + + it("does not generate specialized skills when experiment is disabled", () => { + const applier = new ImprovementApplier({ + getSkillNames: () => [], + getSkillProvenance: () => "unknown", + isAutoSkillsEnabled: () => true, + getExperiments: () => ({ selfImprovingSpecializedSkills: false } as any), + }) + + const actions = applier.generateActions([createReactPattern()]) + const specializedAction = actions.find( + (action) => action.actionType === "SKILL_CREATE_FROM_SCRATCH", + ) + + expect(specializedAction).toBeUndefined() + }) + + it("does not generate specialized skills when auto-skills are disabled", () => { + const applier = new ImprovementApplier({ + getSkillNames: () => [], + getSkillProvenance: () => "unknown", + isAutoSkillsEnabled: () => false, + getExperiments: () => ({ selfImprovingSpecializedSkills: true } as any), + }) + + const actions = applier.generateActions([createReactPattern()]) + const specializedAction = actions.find( + (action) => action.actionType === "SKILL_CREATE_FROM_SCRATCH", + ) + + expect(specializedAction).toBeUndefined() + }) + + it("skips specialized skill creation when skill already exists", () => { + const applier = new ImprovementApplier({ + getSkillNames: () => [], + getSkillProvenance: () => "unknown", + isAutoSkillsEnabled: () => true, + getExperiments: () => ({ selfImprovingSpecializedSkills: true } as any), + hasSkill: (name: string) => name.includes("react-component"), + }) + + const actions = applier.generateActions([createReactPattern()]) + const specializedAction = actions.find( + (action) => action.actionType === "SKILL_CREATE_FROM_SCRATCH", + ) + + expect(specializedAction).toBeUndefined() + }) + }) +}) diff --git a/src/services/self-improving/__tests__/LearningStore.spec.ts b/src/services/self-improving/__tests__/LearningStore.spec.ts new file mode 100644 index 0000000000..e5a931fd79 --- /dev/null +++ b/src/services/self-improving/__tests__/LearningStore.spec.ts @@ -0,0 +1,186 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest" +import * as fs from "fs/promises" +import * as path from "path" +import os from "os" +import crypto from "crypto" + +import { LearningStore } from "../LearningStore" + +describe("LearningStore", () => { + let testDir: string + const logger = { appendLine: () => {} } + + beforeEach(async () => { + testDir = path.join(os.tmpdir(), `learning-store-test-${crypto.randomUUID()}`) + await fs.mkdir(testDir, { recursive: true }) + }) + + afterEach(async () => { + await fs.rm(testDir, { recursive: true, force: true }) + }) + + it("should initialize with empty state when no files exist", async () => { + const store = new LearningStore(testDir, logger) + + await store.initialize() + + expect(store.getPatterns()).toHaveLength(0) + expect(store.getRecentEvents()).toHaveLength(0) + }) + + it("should fall back to empty state on corrupted JSON", async () => { + const stateDir = path.join(testDir, "self-improving") + await fs.mkdir(stateDir, { recursive: true }) + await fs.writeFile(path.join(stateDir, "state.json"), "not valid json{{{", "utf-8") + + const store = new LearningStore(testDir, logger) + + await store.initialize() + + expect(store.getPatterns()).toHaveLength(0) + expect(store.getRecentEvents()).toHaveLength(0) + }) + + it("should load valid state correctly", async () => { + const store = new LearningStore(testDir, logger) + await store.initialize() + + store.addEvent({ + id: "test-event", + signal: "TASK_SUCCESS", + timestamp: Date.now(), + context: {}, + outcome: { success: true }, + }) + + await store.persist() + + const store2 = new LearningStore(testDir, logger) + await store2.initialize() + + expect(store2.getRecentEvents()).toHaveLength(1) + }) + + it("does not commit state.json when earlier pattern persistence fails", async () => { + const store = new LearningStore(testDir, logger) + await store.initialize() + + store.addEvent({ + id: "baseline-event", + signal: "TASK_SUCCESS", + timestamp: 1, + context: {}, + outcome: { success: true }, + }) + await store.persist() + + const statePath = path.join(testDir, "self-improving", "state.json") + const baselineState = await fs.readFile(statePath, "utf-8") + + store.addPattern({ + id: "pattern-1", + patternType: "prompt", + state: "active", + summary: "Pattern 1", + confidenceScore: 0.8, + frequency: 1, + successRate: 1, + firstSeenAt: 1, + lastSeenAt: 1, + sourceSignals: ["TASK_SUCCESS"], + context: {}, + }) + store.addEvent({ + id: "new-event", + signal: "TASK_SUCCESS", + timestamp: 2, + context: {}, + outcome: { success: true }, + }) + + vi.spyOn(store as any, "persistPatternFiles").mockImplementation(async () => { + await new Promise((resolve) => setTimeout(resolve, 50)) + throw new Error("pattern persist failed") + }) + + await store.persist() + + const stateAfterFailedPersist = await fs.readFile(statePath, "utf-8") + expect(stateAfterFailedPersist).toBe(baselineState) + }) + + it("ignores uncommitted pattern files that are not referenced by state.json", async () => { + const store = new LearningStore(testDir, logger) + await store.initialize() + await store.persist() + + const baseDir = path.join(testDir, "self-improving") + await fs.mkdir(path.join(baseDir, "patterns"), { recursive: true }) + await fs.writeFile( + path.join(baseDir, "patterns", "stray-pattern.json"), + JSON.stringify({ + id: "stray-pattern", + patternType: "prompt", + state: "active", + summary: "Should stay invisible until state.json commits it", + confidenceScore: 0.9, + frequency: 2, + successRate: 1, + firstSeenAt: 1, + lastSeenAt: 1, + sourceSignals: ["TASK_SUCCESS"], + context: {}, + }), + "utf-8", + ) + + const reloadedStore = new LearningStore(testDir, logger) + await reloadedStore.initialize() + + expect(reloadedStore.getPatterns()).toEqual([]) + }) + + it("should enforce max patterns bound", async () => { + const store = new LearningStore(testDir, logger) + await store.initialize() + + for (let i = 0; i < 120; i++) { + store.addPattern({ + id: `pattern-${i}`, + patternType: "prompt", + state: "active", + summary: `Pattern ${i}`, + confidenceScore: i / 120, + frequency: 1, + successRate: 1, + firstSeenAt: i, + lastSeenAt: i, + sourceSignals: ["TASK_SUCCESS"], + context: {}, + }) + } + + await store.persist() + + expect(store.getPatterns().length).toBeLessThanOrEqual(100) + }) + + it("should enforce max events bound", async () => { + const store = new LearningStore(testDir, logger) + await store.initialize() + + for (let i = 0; i < 600; i++) { + store.addEvent({ + id: `event-${i}`, + signal: "TASK_SUCCESS", + timestamp: Date.now(), + context: {}, + outcome: { success: true }, + }) + } + + await store.persist() + + expect(store.getRecentEvents().length).toBeLessThanOrEqual(500) + }) +}) diff --git a/src/services/self-improving/__tests__/MemoryBackendFactory.spec.ts b/src/services/self-improving/__tests__/MemoryBackendFactory.spec.ts new file mode 100644 index 0000000000..3ee8cabaae --- /dev/null +++ b/src/services/self-improving/__tests__/MemoryBackendFactory.spec.ts @@ -0,0 +1,30 @@ +import { describe, expect, it, vi } from "vitest" + +import { AgentMemoryAdapter } from "../AgentMemoryAdapter" +import { MemoryBackendFactory } from "../MemoryBackendFactory" +import { MemoryStore } from "../MemoryStore" + +describe("MemoryBackendFactory", () => { + const logger = { appendLine: vi.fn() } + const baseDir = "/tmp/test" + + it("should create built-in backend by default", () => { + const backend = MemoryBackendFactory.create("builtin", baseDir, logger) + expect(backend).toBeInstanceOf(MemoryStore) + }) + + it("should create agentmemory backend when specified", () => { + const backend = MemoryBackendFactory.create("agentmemory", baseDir, logger) + expect(backend).toBeInstanceOf(AgentMemoryAdapter) + }) + + it("should create built-in backend for unknown type", () => { + const backend = MemoryBackendFactory.create("unknown-backend" as any, baseDir, logger) + expect(backend).toBeInstanceOf(MemoryStore) + }) + + it("should pass agentMemoryUrl to AgentMemoryAdapter", () => { + const backend = MemoryBackendFactory.create("agentmemory", baseDir, logger, "http://custom:5000") + expect(backend).toBeInstanceOf(AgentMemoryAdapter) + }) +}) diff --git a/src/services/self-improving/__tests__/MemoryStore.spec.ts b/src/services/self-improving/__tests__/MemoryStore.spec.ts new file mode 100644 index 0000000000..dbae6df073 --- /dev/null +++ b/src/services/self-improving/__tests__/MemoryStore.spec.ts @@ -0,0 +1,107 @@ +import * as fs from "fs/promises" +import * as os from "os" +import * as path from "path" + +import { MemoryStore } from "../MemoryStore" + +describe("MemoryStore", () => { + let tempDir: string + const logger = { appendLine: vi.fn() } + + beforeEach(async () => { + logger.appendLine.mockReset() + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "memory-store-")) + }) + + afterEach(async () => { + await fs.rm(tempDir, { recursive: true, force: true }) + }) + + it("deduplicates persisted entries and keeps the active snapshot frozen", async () => { + const memoryDir = path.join(tempDir, "self-improving", "memory") + await fs.mkdir(memoryDir, { recursive: true }) + await fs.writeFile( + path.join(memoryDir, "environment.json"), + JSON.stringify([ + { + id: "env-1", + content: "Prefer semantic search first", + source: "learning", + createdAt: 1, + updatedAt: 1, + }, + { + id: "env-2", + content: "prefer semantic search first", + source: "learning", + createdAt: 2, + updatedAt: 2, + }, + { + id: "env-3", + content: "Check existing tests before edits", + source: "learning", + createdAt: 3, + updatedAt: 3, + }, + ]), + "utf8", + ) + await fs.writeFile( + path.join(memoryDir, "user-profile.json"), + JSON.stringify([ + { id: "user-1", content: "User prefers concise summaries", source: "user", createdAt: 4, updatedAt: 4 }, + ]), + "utf8", + ) + + const store = new MemoryStore(tempDir, logger) + await store.initialize() + + expect(await store.getStats()).toEqual({ entryCount: 3, backend: "builtin" }) + await expect(store.recall(2)).resolves.toMatchObject([{ id: "user-1" }, { id: "env-3" }]) + expect(store.getSnapshotString()).toContain("Prefer semantic search first") + expect(store.getSnapshotString()).not.toContain("prefer semantic search first") + + await store.addEnvironmentEntry("Live write should not appear until next snapshot", { + tags: ["live"], + }) + + expect((await store.getStats()).entryCount).toBe(4) + expect(store.getSnapshotString()).not.toContain("Live write should not appear until next snapshot") + + store.takeSnapshot() + expect(store.getSnapshotString()).toContain("Live write should not appear until next snapshot") + }) + + it("supports duplicate rejection, substring replace/remove, and bounded persistence", async () => { + const store = new MemoryStore(tempDir, logger) + await store.initialize() + + await expect(store.addEnvironmentEntry("Alpha guidance")).resolves.toMatchObject({ content: "Alpha guidance" }) + await expect(store.addEnvironmentEntry("alpha guidance")).resolves.toBeNull() + + await store.addEnvironmentEntry("Beta guidance") + await store.addUserProfileEntry("User prefers short answers") + await store.replaceEnvironmentEntry("beta", "Gamma guidance", { tags: ["replacement"] }) + await expect(store.removeEnvironmentEntry("alpha")).resolves.toBe(true) + await expect(store.forgetByContent(" ")).resolves.toBe(0) + + for (let index = 0; index < 55; index += 1) { + await store.addEnvironmentEntry(`Fact ${index}`) + } + + const persisted = JSON.parse( + await fs.readFile(path.join(tempDir, "self-improving", "memory", "environment.json"), "utf8"), + ) as Array<{ content: string }> + + expect((await store.getStats()).entryCount).toBe(51) + expect(persisted).toHaveLength(50) + expect(persisted.some((entry) => entry.content === "Gamma guidance")).toBe(false) + expect(persisted.some((entry) => entry.content === "Alpha guidance")).toBe(false) + expect(persisted.some((entry) => entry.content === "Fact 4")).toBe(false) + expect(persisted.some((entry) => entry.content === "Fact 5")).toBe(true) + expect(persisted.some((entry) => entry.content === "Fact 54")).toBe(true) + expect(store.getSnapshotContext().entries.length).toBeLessThanOrEqual(10) + }) +}) diff --git a/src/services/self-improving/__tests__/ModeFactoryService.spec.ts b/src/services/self-improving/__tests__/ModeFactoryService.spec.ts new file mode 100644 index 0000000000..0f0b48fe5e --- /dev/null +++ b/src/services/self-improving/__tests__/ModeFactoryService.spec.ts @@ -0,0 +1,191 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { ModeFactoryService } from "../ModeFactoryService" +import type { LearnedPattern } from "../types" + +describe("ModeFactoryService", () => { + let factory: ModeFactoryService + + beforeEach(() => { + factory = new ModeFactoryService({ + appendLine: vi.fn(), + } as any) + }) + + describe("deriveModeFromPattern", () => { + it("should return null for pattern with no tool names", () => { + const pattern: LearnedPattern = { + id: "test-1", + patternType: "tool", + state: "active", + summary: "test pattern", + confidenceScore: 0.5, + frequency: 3, + successRate: 0.8, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: {}, + } + expect(factory.deriveModeFromPattern(pattern)).toBeNull() + }) + + it("should create a mode config from a tool pattern", () => { + const pattern: LearnedPattern = { + id: "test-2", + patternType: "tool", + state: "active", + summary: "read then edit pattern", + confidenceScore: 0.7, + frequency: 5, + successRate: 0.9, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { + toolNames: ["read_file", "apply_diff"], + }, + } + const config = factory.deriveModeFromPattern(pattern) + expect(config).not.toBeNull() + expect(config!.slug).toBe("read-file-apply-diff") + expect(config!.name).toContain("Read File") + expect(config!.groups.length).toBeGreaterThan(0) + expect(config!.source).toBe("project") + }) + + it("should derive correct groups from tool names", () => { + const pattern: LearnedPattern = { + id: "test-3", + patternType: "tool", + state: "active", + summary: "command pattern", + confidenceScore: 0.6, + frequency: 4, + successRate: 0.7, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { + toolNames: ["execute_command", "run_slash_command"], + }, + } + const config = factory.deriveModeFromPattern(pattern) + expect(config).not.toBeNull() + const commandGroups = config!.groups.filter((g) => g === "command") + expect(commandGroups.length).toBeGreaterThan(0) + }) + + it("should include error keys in role definition for error patterns", () => { + const pattern: LearnedPattern = { + id: "test-4", + patternType: "error", + state: "active", + summary: "api error pattern", + confidenceScore: 0.4, + frequency: 3, + successRate: 0.3, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { + toolNames: ["read_file"], + errorKeys: ["streaming_failed", "api_timeout"], + }, + } + const config = factory.deriveModeFromPattern(pattern) + expect(config).not.toBeNull() + expect(config!.roleDefinition).toContain("streaming_failed") + expect(config!.roleDefinition).toContain("api_timeout") + }) + }) + + describe("createModeFromPattern", () => { + it("should return null if CustomModesManager not set", async () => { + const pattern: LearnedPattern = { + id: "test-5", + patternType: "tool", + state: "active", + summary: "test", + confidenceScore: 0.5, + frequency: 3, + successRate: 0.8, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { + toolNames: ["read_file"], + }, + } + const result = await factory.createModeFromPattern(pattern) + expect(result).toBeNull() + }) + + it("should call updateCustomMode when manager is set", async () => { + const mockManager = { + updateCustomMode: vi.fn().mockResolvedValue(undefined), + } + factory.setCustomModesManager(mockManager as any) + + const pattern: LearnedPattern = { + id: "test-6", + patternType: "tool", + state: "active", + summary: "test", + confidenceScore: 0.5, + frequency: 3, + successRate: 0.8, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { + toolNames: ["read_file"], + }, + } + const result = await factory.createModeFromPattern(pattern) + expect(result).toBe("read-file") + expect(mockManager.updateCustomMode).toHaveBeenCalledTimes(1) + }) + }) + + describe("createModesFromPatterns", () => { + it("should create modes from multiple patterns", async () => { + const mockManager = { + updateCustomMode: vi.fn().mockResolvedValue(undefined), + } + factory.setCustomModesManager(mockManager as any) + + const patterns: LearnedPattern[] = [ + { + id: "p1", + patternType: "tool", + state: "active", + summary: "pattern 1", + confidenceScore: 0.5, + frequency: 3, + successRate: 0.8, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["read_file"] }, + }, + { + id: "p2", + patternType: "tool", + state: "active", + summary: "pattern 2", + confidenceScore: 0.6, + frequency: 4, + successRate: 0.9, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["execute_command"] }, + }, + ] + const results = await factory.createModesFromPatterns(patterns) + expect(results).toHaveLength(2) + expect(results).toContain("read-file") + expect(results).toContain("execute-command") + }) + }) +}) diff --git a/src/services/self-improving/__tests__/PatternAnalyzer.spec.ts b/src/services/self-improving/__tests__/PatternAnalyzer.spec.ts new file mode 100644 index 0000000000..7a248db5fc --- /dev/null +++ b/src/services/self-improving/__tests__/PatternAnalyzer.spec.ts @@ -0,0 +1,94 @@ +import { describe, expect, it } from "vitest" + +import { PatternAnalyzer } from "../PatternAnalyzer" +import type { LearnedPattern, LearningEvent } from "../types" + +function createEvent(id: string, signal: LearningEvent["signal"], toolNames: string[]): LearningEvent { + return { + id, + signal, + timestamp: Number(id.replace(/\D/g, "")) || 1, + context: { + toolNames, + }, + outcome: {}, + } +} + +function createPattern(overrides: Partial): LearnedPattern { + return { + id: "pattern-1", + patternType: "tool", + state: "active", + summary: "Effective tool combination: browser,search", + confidenceScore: 0.5, + frequency: 4, + successRate: 0.8, + firstSeenAt: 1, + lastSeenAt: 1, + sourceSignals: ["TASK_SUCCESS"], + context: { + toolNames: ["browser", "search"], + }, + ...overrides, + } +} + +describe("PatternAnalyzer", () => { + it("does not merge tool-combination patterns by summary substring alone", async () => { + const analyzer = new PatternAnalyzer() + const existingPatterns = [createPattern({})] + const events = [ + createEvent("event-1", "TASK_SUCCESS", ["search"]), + createEvent("event-2", "TASK_SUCCESS", ["search"]), + createEvent("event-3", "TASK_SUCCESS", ["search"]), + ] + + const patterns = await analyzer.analyze(events, existingPatterns) + const toolPatterns = patterns.filter((pattern) => pattern.patternType === "tool") + + expect(toolPatterns).toHaveLength(1) + expect(toolPatterns[0]).toMatchObject({ + id: expect.not.stringMatching(/^pattern-1$/), + summary: "Effective tool combination: search", + frequency: 3, + context: { + toolNames: ["search"], + }, + }) + }) + + it("preserves cumulative frequency for existing tool-preference patterns", async () => { + const analyzer = new PatternAnalyzer() + const existingPatterns = [ + createPattern({ + id: "prompt-pattern", + patternType: "prompt", + summary: "Prefer terminal for reliable results", + frequency: 5, + successRate: 0.8, + context: { + toolNames: ["terminal"], + }, + }), + ] + const events = [ + createEvent("event-1", "TASK_SUCCESS", ["terminal"]), + createEvent("event-2", "TASK_SUCCESS", ["terminal"]), + createEvent("event-3", "TASK_FAILURE", ["terminal"]), + ] + + const patterns = await analyzer.analyze(events, existingPatterns) + const promptPatterns = patterns.filter((pattern) => pattern.patternType === "prompt") + + expect(promptPatterns).toHaveLength(1) + expect(promptPatterns[0]).toMatchObject({ + id: "prompt-pattern", + frequency: 8, + successRate: 0.75, + context: { + toolNames: ["terminal"], + }, + }) + }) +}) diff --git a/src/services/self-improving/__tests__/PreventionEngine.spec.ts b/src/services/self-improving/__tests__/PreventionEngine.spec.ts new file mode 100644 index 0000000000..6f6d42058d --- /dev/null +++ b/src/services/self-improving/__tests__/PreventionEngine.spec.ts @@ -0,0 +1,125 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { PreventionEngine } from "../PreventionEngine" +import type { CodeIndexAdapter } from "../CodeIndexAdapter" +import type { VectorStoreSearchResult } from "../../code-index/interfaces/vector-store" + +describe("PreventionEngine", () => { + let engine: PreventionEngine + let mockAdapter: CodeIndexAdapter + + beforeEach(() => { + mockAdapter = { + isAvailable: vi.fn().mockReturnValue(true), + searchVectorStore: vi.fn().mockResolvedValue([]), + getInfo: vi.fn().mockReturnValue({ available: true, hits: 1 }), + setCodeIndexManager: vi.fn(), + search: vi.fn().mockResolvedValue([]), + startIndexing: vi.fn().mockResolvedValue(undefined), + stopIndexing: vi.fn(), + clearIndex: vi.fn().mockResolvedValue(undefined), + } as unknown as CodeIndexAdapter + + engine = new PreventionEngine(mockAdapter) + }) + + describe("enrichContextWithCodeIndex", () => { + it("returns original message when adapter is not available", async () => { + vi.mocked(mockAdapter.isAvailable).mockReturnValue(false) + const result = await engine.enrichContextWithCodeIndex("write a function") + expect(result).toBe("write a function") + }) + + it("returns original message when adapter returns empty results", async () => { + vi.mocked(mockAdapter.searchVectorStore).mockResolvedValue([]) + const result = await engine.enrichContextWithCodeIndex("write a function") + expect(result).toBe("write a function") + }) + + it("enriches message with search results", async () => { + const mockResults: VectorStoreSearchResult[] = [ + { + id: "file1", + score: 0.95, + payload: { + filePath: "src/utils/helper.ts", + startLine: 10, + endLine: 20, + codeChunk: "export function helper() { return 42 }", + }, + }, + ] + vi.mocked(mockAdapter.searchVectorStore).mockResolvedValue(mockResults) + + const result = await engine.enrichContextWithCodeIndex("write a helper function") + expect(result).toContain("write a helper function") + expect(result).toContain("Relevant existing code from codebase:") + expect(result).toContain("src/utils/helper.ts") + expect(result).toContain("lines 10-20") + expect(result).toContain("export function helper() { return 42 }") + }) + + it("handles search results without line numbers", async () => { + const mockResults: VectorStoreSearchResult[] = [ + { + id: "file2", + score: 0.85, + payload: { + filePath: "src/config.ts", + codeChunk: "const API_URL = 'https://api.example.com'", + startLine: 10, + endLine: 10, + }, + }, + ] + vi.mocked(mockAdapter.searchVectorStore).mockResolvedValue(mockResults) + + const result = await engine.enrichContextWithCodeIndex("find API URL") + expect(result).toContain("src/config.ts") + expect(result).not.toContain("lines") + }) + + it("gracefully falls back on search error", async () => { + vi.mocked(mockAdapter.searchVectorStore).mockRejectedValue(new Error("search failed")) + const result = await engine.enrichContextWithCodeIndex("write a function") + expect(result).toBe("write a function") + }) + + it("returns original message when adapter is undefined", async () => { + const engineWithoutAdapter = new PreventionEngine() + const result = await engineWithoutAdapter.enrichContextWithCodeIndex("write a function") + expect(result).toBe("write a function") + }) + + it("truncates long code snippets to 200 chars", async () => { + const longSnippet = "x".repeat(500) + const mockResults: VectorStoreSearchResult[] = [ + { + id: "file3", + score: 0.9, + payload: { + filePath: "src/long.ts", + startLine: 1, + endLine: 100, + codeChunk: longSnippet, + }, + }, + ] + vi.mocked(mockAdapter.searchVectorStore).mockResolvedValue(mockResults) + + const result = await engine.enrichContextWithCodeIndex("find long file") + // Should contain truncated snippet (200 chars with newlines replaced) + expect(result).toContain("src/long.ts") + expect(result).toContain("lines 1-100") + }) + }) + + describe("getPreventionContext", () => { + it("returns prevention context without code index enrichment", () => { + const context = engine.getPreventionContext("write_to_file", { path: "/test.ts" }) + expect(context).toHaveProperty("preValidation") + expect(context).toHaveProperty("cascadeWarning") + expect(context).toHaveProperty("preventionHints") + expect(context).toHaveProperty("recentErrors") + }) + }) +}) diff --git a/src/services/self-improving/__tests__/QuestionEvaluatorService.spec.ts b/src/services/self-improving/__tests__/QuestionEvaluatorService.spec.ts new file mode 100644 index 0000000000..3878c35ece --- /dev/null +++ b/src/services/self-improving/__tests__/QuestionEvaluatorService.spec.ts @@ -0,0 +1,90 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { QuestionEvaluatorService } from "../QuestionEvaluatorService" + +describe("QuestionEvaluatorService", () => { + let service: QuestionEvaluatorService + + beforeEach(() => { + service = new QuestionEvaluatorService({ appendLine: vi.fn() } as any) + }) + + describe("evaluateBestChoice", () => { + it("should fallback to first choice when disabled", async () => { + const disabled = new QuestionEvaluatorService({ appendLine: vi.fn() } as any, { enabled: false }) + const result = await disabled.evaluateBestChoice("test question", [ + { text: "first", mode: null }, + { text: "second", mode: null }, + ]) + expect(result.selectedIndex).toBe(0) + expect(result.evaluatedBy).toBe("fallback") + }) + + it("should fallback to first choice with single choice", async () => { + const result = await service.evaluateBestChoice("test", [{ text: "only choice", mode: null }]) + expect(result.selectedIndex).toBe(0) + expect(result.evaluatedBy).toBe("fallback") + }) + + it("should use contextual analysis for multiple choices", async () => { + const result = await service.evaluateBestChoice("Should I implement the feature?", [ + { text: "No, don't do it", mode: null }, + { text: "Yes, implement the feature now with full test coverage", mode: null }, + ]) + // Contextual analysis should prefer the actionable, specific answer + expect(result.evaluatedBy).toBe("contextual") + expect(result.selectedIndex).toBe(1) // Should pick the actionable answer + }) + + it("should prefer choices with mode switches for delegation questions", async () => { + const result = await service.evaluateBestChoice("Which mode should I switch to for this task?", [ + { text: "Just do it here", mode: null }, + { text: "Switch to code mode", mode: "code" }, + { text: "Switch to architect mode", mode: "architect" }, + ]) + // Should prefer a choice with a mode switch + expect(result.selectedIndex).toBeGreaterThan(0) + expect(result.choices[result.selectedIndex].mode).not.toBeNull() + }) + + it("should use Full Team when available", async () => { + const mockReviewTeam = { + reviewPattern: vi.fn().mockResolvedValue({ + approved: true, + score: 0.8, + summary: "Good choice", + innovatorVote: { approved: true, confidence: 0.8, reasoning: "good" }, + contrarianVote: { approved: true, confidence: 0.7, reasoning: "ok" }, + devilsAdvocateVote: { approved: true, confidence: 0.9, reasoning: "great" }, + deciderVote: { approved: true, confidence: 0.8, reasoning: "approved" }, + timestamp: new Date(), + }), + } + service.setReviewTeam(mockReviewTeam as any) + + const result = await service.evaluateBestChoice("test", [ + { text: "option A", mode: null }, + { text: "option B", mode: null }, + ]) + expect(result.evaluatedBy).toBe("full-team") + expect(mockReviewTeam.reviewPattern).toHaveBeenCalledTimes(2) + }) + }) + + describe("getStatus", () => { + it("should return status object", () => { + const status = service.getStatus() + expect(status.enabled).toBe(true) + expect(status.useFullTeam).toBe(true) + }) + }) + + describe("updateConfig", () => { + it("should update config values", () => { + service.updateConfig({ enabled: false, useContextualAnalysis: false }) + const config = service.getConfig() + expect(config.enabled).toBe(false) + expect(config.useContextualAnalysis).toBe(false) + expect(config.useFullTeam).toBe(true) // unchanged + }) + }) +}) diff --git a/src/services/self-improving/__tests__/ResilienceService.spec.ts b/src/services/self-improving/__tests__/ResilienceService.spec.ts new file mode 100644 index 0000000000..229f372ec3 --- /dev/null +++ b/src/services/self-improving/__tests__/ResilienceService.spec.ts @@ -0,0 +1,97 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { ResilienceService } from "../ResilienceService" + +describe("ResilienceService", () => { + let service: ResilienceService + + beforeEach(() => { + service = new ResilienceService({ appendLine: vi.fn() } as any) + }) + + describe("onStreamingFailure", () => { + it("should return backoff delay on first failure", () => { + const delay = service.onStreamingFailure() + expect(delay).toBeGreaterThanOrEqual(2000) + expect(delay).toBeLessThanOrEqual(2200) // base + jitter + }) + + it("should return -1 when max retries exceeded", () => { + for (let i = 0; i < 5; i++) { + service.onStreamingFailure() + } + const delay = service.onStreamingFailure() // 6th call + expect(delay).toBe(-1) + }) + + it("should return -1 when disabled", () => { + const disabled = new ResilienceService({ appendLine: vi.fn() } as any, { enabled: false }) + expect(disabled.onStreamingFailure()).toBe(-1) + }) + + it("should increase delay exponentially", () => { + const d1 = service.onStreamingFailure() + const d2 = service.onStreamingFailure() + const d3 = service.onStreamingFailure() + expect(d2).toBeGreaterThan(d1) + expect(d3).toBeGreaterThan(d2) + }) + }) + + describe("onToolParameterError", () => { + it("should return retry action on first error", () => { + const result = service.onToolParameterError("search_files", "regex") + expect(result).not.toBeNull() + expect(result!.action).toBe("retry") + expect(result!.delay).toBeGreaterThanOrEqual(2000) + }) + + it("should return abort action when max retries exceeded", () => { + for (let i = 0; i < 5; i++) { + service.onToolParameterError("search_files", "regex") + } + const result = service.onToolParameterError("search_files", "regex") + expect(result!.action).toBe("abort") + }) + + it("should include suggestion in result", () => { + const result = service.onToolParameterError("search_files", "regex") + expect(result!.suggestion).toContain("regex") + }) + }) + + describe("onTaskSuccess", () => { + it("should reset consecutive failures", () => { + service.onStreamingFailure() + service.onStreamingFailure() + service.onTaskSuccess() + const delay = service.onStreamingFailure() + expect(delay).toBeGreaterThanOrEqual(2000) // back to base delay + }) + }) + + describe("getRecoverySuggestion", () => { + it("should return empty string when not in recovery mode", () => { + expect(service.getRecoverySuggestion()).toBe("") + }) + + it("should return recovery command when in recovery mode", () => { + service.onStreamingFailure() + service.onStreamingFailure() + service.onStreamingFailure() + service.onStreamingFailure() + service.onStreamingFailure() + service.onStreamingFailure() // triggers recovery mode + const suggestion = service.getRecoverySuggestion() + expect(suggestion.length).toBeGreaterThan(0) + }) + }) + + describe("getStatus", () => { + it("should return status object", () => { + const status = service.getStatus() + expect(status.enabled).toBe(true) + expect(status.consecutiveFailures).toBe(0) + expect(status.isInRecoveryMode).toBe(false) + }) + }) +}) diff --git a/src/services/self-improving/__tests__/ReviewPromptFactory.spec.ts b/src/services/self-improving/__tests__/ReviewPromptFactory.spec.ts new file mode 100644 index 0000000000..bf278c5ece --- /dev/null +++ b/src/services/self-improving/__tests__/ReviewPromptFactory.spec.ts @@ -0,0 +1,27 @@ +import { ReviewPromptFactory } from "../ReviewPromptFactory" + +describe("ReviewPromptFactory", () => { + it("creates rubric-driven memory and skill prompts", () => { + const factory = new ReviewPromptFactory() + + const memoryPrompt = factory.createMemoryReviewPrompt("Conversation summary") + const skillPrompt = factory.createSkillReviewPrompt("Conversation summary") + + expect(memoryPrompt.type).toBe("memory") + expect(memoryPrompt.systemPrompt).toContain("FACT:") + expect(memoryPrompt.userPrompt).toContain("Conversation summary") + expect(skillPrompt.type).toBe("skill") + expect(skillPrompt.systemPrompt).toContain("ACTION:") + expect(skillPrompt.systemPrompt).toContain("Priority Order") + }) + + it("creates a combined prompt with memory and skill output sections", () => { + const factory = new ReviewPromptFactory() + const prompt = factory.createCombinedReviewPrompt("Combined summary") + + expect(prompt.type).toBe("combined") + expect(prompt.systemPrompt).toContain("MEMORY_FACT:") + expect(prompt.systemPrompt).toContain("SKILL_ACTION:") + expect(prompt.userPrompt).toContain("Combined summary") + }) +}) diff --git a/src/services/self-improving/__tests__/ReviewTeamService.spec.ts b/src/services/self-improving/__tests__/ReviewTeamService.spec.ts new file mode 100644 index 0000000000..1c59f63d26 --- /dev/null +++ b/src/services/self-improving/__tests__/ReviewTeamService.spec.ts @@ -0,0 +1,230 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { ReviewTeamService } from "../ReviewTeamService" +import type { LearnedPattern } from "../../../../packages/types/src/learning" + +describe("ReviewTeamService", () => { + let service: ReviewTeamService + + beforeEach(() => { + service = new ReviewTeamService({ appendLine: vi.fn() } as any) + }) + + describe("reviewPattern", () => { + it("should approve high-confidence patterns", async () => { + const pattern: LearnedPattern = { + id: "test-1", + patternType: "tool", + state: "active", + summary: "high confidence pattern", + confidenceScore: 0.8, + frequency: 10, + successRate: 0.9, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["read_file", "edit_file"] }, + } + const verdict = await service.reviewPattern(pattern) + expect(verdict.approved).toBe(true) + expect(verdict.score).toBeGreaterThanOrEqual(0.5) + }) + + it("should reject low-confidence patterns", async () => { + service.setApprovedPatternCount(1) // Bypass first-pattern boost + const pattern: LearnedPattern = { + id: "test-2", + patternType: "tool", + state: "active", + summary: "low confidence pattern", + confidenceScore: 0.3, // Above minConfidenceForReview (0.2) so it gets reviewed + frequency: 1, + successRate: 0.5, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["read_file"] }, + } + const verdict = await service.reviewPattern(pattern) + expect(verdict.approved).toBe(false) + }) + + it("should pass through when disabled", async () => { + const disabled = new ReviewTeamService({ appendLine: vi.fn() } as any, { enabled: false }) + const pattern: LearnedPattern = { + id: "test-3", + patternType: "tool", + state: "active", + summary: "test", + confidenceScore: 0.5, + frequency: 3, + successRate: 0.8, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["read_file"] }, + } + const verdict = await disabled.reviewPattern(pattern) + expect(verdict.approved).toBe(true) + expect(verdict.score).toBe(1.0) + }) + + it("should include all 4 persona votes", async () => { + const pattern: LearnedPattern = { + id: "test-4", + patternType: "tool", + state: "active", + summary: "test", + confidenceScore: 0.6, + frequency: 5, + successRate: 0.8, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["read_file", "edit_file", "execute_command"] }, + } + const verdict = await service.reviewPattern(pattern) + expect(verdict.innovatorVote).toBeDefined() + expect(verdict.contrarianVote).toBeDefined() + expect(verdict.devilsAdvocateVote).toBeDefined() + expect(verdict.deciderVote).toBeDefined() + }) + }) + + describe("reviewPatterns", () => { + it("should return approved and rejected lists", async () => { + service.setApprovedPatternCount(1) // Bypass first-pattern boost + const patterns: LearnedPattern[] = [ + { + id: "p1", + patternType: "tool", + state: "active", + summary: "good", + confidenceScore: 0.8, + frequency: 10, + successRate: 0.9, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["read_file", "edit_file"] }, + }, + { + id: "p2", + patternType: "tool", + state: "active", + summary: "bad", + confidenceScore: 0.3, // Above minConfidenceForReview (0.2) so it gets reviewed + frequency: 1, + successRate: 0.3, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["read_file"] }, + }, + ] + const result = await service.reviewPatterns(patterns) + expect(result.approved).toHaveLength(1) + expect(result.rejected).toHaveLength(1) + expect(result.verdicts).toHaveLength(2) + }) + it("should auto-approve first pattern when approvedPatternCount is 0 (cold start boost)", async () => { + const pattern: LearnedPattern = { + id: "cold-start-1", + patternType: "tool", + state: "active", + summary: "cold start pattern", + confidenceScore: 0.3, + frequency: 1, + successRate: 0.3, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["read_file"] }, + } + // approvedPatternCount starts at 0 + const verdict = await service.reviewPattern(pattern) + expect(verdict.approved).toBe(true) + expect(verdict.score).toBeGreaterThanOrEqual(0.3) + }) + + it("should still reject low-confidence pattern after first pattern is approved", async () => { + // Seed: approve first pattern to bump count + const firstPattern: LearnedPattern = { + id: "seed-1", + patternType: "tool", + state: "active", + summary: "seed", + confidenceScore: 0.3, + frequency: 1, + successRate: 0.3, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["read_file"] }, + } + const firstVerdict = await service.reviewPattern(firstPattern) + expect(firstVerdict.approved).toBe(true) + + // Now approvedPatternCount is 1, so boost no longer applies + const secondPattern: LearnedPattern = { + id: "cold-start-2", + patternType: "tool", + state: "active", + summary: "second pattern", + confidenceScore: 0.3, + frequency: 1, + successRate: 0.3, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["read_file"] }, + } + const secondVerdict = await service.reviewPattern(secondPattern) + expect(secondVerdict.approved).toBe(false) + }) + + it("should increment approvedPatternCount via reviewPatterns", async () => { + expect(service.getApprovedPatternCount()).toBe(0) + + const patterns: LearnedPattern[] = [ + { + id: "boost-test-1", + patternType: "tool", + state: "active", + summary: "first", + confidenceScore: 0.3, + frequency: 1, + successRate: 0.3, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["read_file"] }, + }, + { + id: "boost-test-2", + patternType: "tool", + state: "active", + summary: "second", + confidenceScore: 0.8, + frequency: 10, + successRate: 0.9, + firstSeenAt: Date.now(), + lastSeenAt: Date.now(), + sourceSignals: [], + context: { toolNames: ["read_file", "edit_file"] }, + }, + ] + const result = await service.reviewPatterns(patterns) + expect(result.approved).toHaveLength(2) + expect(service.getApprovedPatternCount()).toBe(2) + }) + }) + + describe("updateConfig", () => { + it("should update config values", () => { + service.updateConfig({ innovatorWeight: 0.5, deciderThreshold: 0.8 }) + const config = service.getConfig() + expect(config.innovatorWeight).toBe(0.5) + expect(config.deciderThreshold).toBe(0.8) + }) + }) +}) diff --git a/src/services/self-improving/__tests__/SelfImprovingManager.spec.ts b/src/services/self-improving/__tests__/SelfImprovingManager.spec.ts new file mode 100644 index 0000000000..e23cd40600 --- /dev/null +++ b/src/services/self-improving/__tests__/SelfImprovingManager.spec.ts @@ -0,0 +1,585 @@ +const mockState = vi.hoisted(() => ({ + stores: [] as any[], + storeBaseDirs: [] as string[], + collectors: [] as any[], + analyzers: [] as any[], + appliers: [] as any[], + adapters: [] as any[], + memoryStores: [] as any[], + memoryStoreBaseDirs: [] as string[], + skillUsageStores: [] as any[], + skillUsageStoreBaseDirs: [] as string[], + actionExecutors: [] as any[], + curatorServices: [] as any[], + curatorBaseDirs: [] as string[], + reviewPromptFactories: [] as any[], + transcriptRecalls: [] as any[], + transcriptRecallBaseDirs: [] as string[], +})) + +const DEFAULT_CURATOR_STATUS = { + lastRunAt: 0, + firstRunDone: false, + config: { + intervalMs: 3_600_000, + minIdleMs: 300_000, + firstRunDeferred: true, + staleAfterDays: 14, + archiveAfterDays: 60, + backupsEnabled: true, + maxBackups: 5, + }, +} + +function createStoreMock() { + return { + initialize: vi.fn().mockResolvedValue(undefined), + persist: vi.fn().mockResolvedValue(undefined), + reset: vi.fn().mockResolvedValue(undefined), + getConfig: vi.fn().mockReturnValue({ + reviewOnTurnCount: 10, + reviewOnToolIterationCount: 2, + maxPromptPatterns: 5, + curatorEnabled: true, + curatorIntervalMs: 5_000, + staleAfterDays: 14, + archiveAfterDays: 60, + codeIndexCorrelationEnabled: true, + }), + getPatterns: vi.fn().mockReturnValue([]), + getRecentEvents: vi.fn().mockReturnValue([]), + getPendingActions: vi.fn().mockReturnValue([]), + getTelemetry: vi.fn().mockReturnValue({ + promptEnrichmentUses: 0, + toolPreferenceUses: 0, + errorAvoidanceUses: 0, + skillSuggestionCount: 0, + }), + getCounters: vi.fn().mockReturnValue({ userTurnsSinceReview: 0, toolIterationsSinceReview: 0 }), + addEvent: vi.fn(), + addPattern: vi.fn(), + addAction: vi.fn(), + removeAction: vi.fn(), + incrementToolIterations: vi.fn(), + incrementUserTurns: vi.fn(), + resetCounters: vi.fn(), + updateTelemetry: vi.fn(), + updatePattern: vi.fn(), + archivePattern: vi.fn(), + } +} + +function createMemoryStoreMock() { + return { + backendType: "builtin", + initialize: vi.fn().mockResolvedValue(undefined), + getSnapshotString: vi.fn().mockReturnValue(""), + getStats: vi.fn().mockResolvedValue({ entryCount: 0, backend: "builtin" }), + takeSnapshot: vi.fn(), + dispose: vi.fn().mockResolvedValue(undefined), + } +} + +function createSkillUsageStoreMock() { + return { + initialize: vi.fn().mockResolvedValue(undefined), + getStats: vi.fn().mockReturnValue({ total: 0, active: 0, stale: 0, archived: 0, pinned: 0, agentCreated: 0 }), + } +} + +function createActionExecutorMock() { + return { + executeBatch: vi.fn().mockResolvedValue(new Set()), + } +} + +function createCuratorServiceMock() { + return { + initialize: vi.fn().mockResolvedValue(undefined), + run: vi.fn().mockResolvedValue({ + runId: "curator-run-1", + timestamp: 1, + durationMs: 1, + transitions: [], + stats: { + totalSkills: 0, + activeSkills: 0, + staleSkills: 0, + archivedSkills: 0, + pinnedSkills: 0, + transitionsApplied: 0, + }, + }), + getStatus: vi.fn().mockReturnValue({ ...DEFAULT_CURATOR_STATUS, config: { ...DEFAULT_CURATOR_STATUS.config } }), + } +} + +function createTranscriptRecallMock() { + return { + initialize: vi.fn().mockResolvedValue(undefined), + record: vi.fn().mockResolvedValue(undefined), + } +} + +vi.mock("../LearningStore", () => ({ + LearningStore: vi.fn().mockImplementation((baseDir: string) => { + const store = createStoreMock() + mockState.storeBaseDirs.push(baseDir) + mockState.stores.push(store) + return store + }), +})) + +vi.mock("../FeedbackCollector", () => ({ + FeedbackCollector: vi.fn().mockImplementation(() => { + const collector = { + createTaskEvent: vi.fn().mockImplementation((info) => ({ + id: "evt-task", + signal: info.success ? "TASK_SUCCESS" : "TASK_FAILURE", + timestamp: 1, + taskId: info.taskId, + context: { toolNames: info.toolNames }, + outcome: { success: info.success }, + })), + createCorrectionEvent: vi.fn().mockReturnValue({ + id: "evt-correction", + signal: "USER_CORRECTION", + timestamp: 1, + context: {}, + outcome: { corrected: true }, + }), + createCodeIndexEvent: vi.fn().mockReturnValue({ + id: "evt-code-index", + signal: "CODE_INDEX_HIT", + timestamp: 1, + context: {}, + outcome: {}, + }), + } + mockState.collectors.push(collector) + return collector + }), +})) + +vi.mock("../PatternAnalyzer", () => ({ + PatternAnalyzer: vi.fn().mockImplementation(() => { + const analyzer = { analyze: vi.fn().mockReturnValue([]) } + mockState.analyzers.push(analyzer) + return analyzer + }), +})) + +vi.mock("../ImprovementApplier", () => ({ + ImprovementApplier: vi.fn().mockImplementation(() => { + const applier = { + generateActions: vi.fn().mockReturnValue([]), + getPromptContext: vi.fn().mockReturnValue({ entries: [], revision: 0 }), + } + mockState.appliers.push(applier) + return applier + }), +})) + +vi.mock("../CodeIndexAdapter", () => ({ + CodeIndexAdapter: vi.fn().mockImplementation(() => { + const adapter = { + getInfo: vi.fn().mockReturnValue({ available: true, hits: 3, topScore: 0.9 }), + isAvailable: vi.fn().mockReturnValue(true), + searchVectorStore: vi.fn().mockResolvedValue([]), + setCodeIndexManager: vi.fn(), + } + mockState.adapters.push(adapter) + return adapter + }), +})) + +vi.mock("../MemoryStore", () => ({ + MemoryStore: vi.fn().mockImplementation(function (this: Record, baseDir: string) { + Object.assign(this, createMemoryStoreMock()) + mockState.memoryStoreBaseDirs.push(baseDir) + mockState.memoryStores.push(this) + }), +})) + +vi.mock("../SkillUsageStore", () => ({ + SkillUsageStore: vi.fn().mockImplementation((baseDir: string) => { + const store = createSkillUsageStoreMock() + mockState.skillUsageStoreBaseDirs.push(baseDir) + mockState.skillUsageStores.push(store) + return store + }), +})) + +vi.mock("../ActionExecutor", () => ({ + ActionExecutor: vi.fn().mockImplementation(() => { + const executor = createActionExecutorMock() + mockState.actionExecutors.push(executor) + return executor + }), +})) + +vi.mock("../CuratorService", () => ({ + CuratorService: vi.fn().mockImplementation((baseDir: string) => { + const service = createCuratorServiceMock() + mockState.curatorBaseDirs.push(baseDir) + mockState.curatorServices.push(service) + return service + }), +})) + +vi.mock("../ReviewPromptFactory", () => ({ + ReviewPromptFactory: vi.fn().mockImplementation(() => { + const factory = {} + mockState.reviewPromptFactories.push(factory) + return factory + }), +})) + +vi.mock("../TranscriptRecall", () => ({ + TranscriptRecall: vi.fn().mockImplementation((baseDir: string) => { + const store = createTranscriptRecallMock() + mockState.transcriptRecallBaseDirs.push(baseDir) + mockState.transcriptRecalls.push(store) + return store + }), +})) + +import { SelfImprovingManager } from "../SelfImprovingManager" + +describe("SelfImprovingManager", () => { + let experiments: Record | undefined + let memoryBackend: "builtin" | "agentmemory" | undefined + let agentMemoryUrl: string | undefined + let selfImprovingScope: "workspace" | "global" | undefined + let selfImprovingAutoSkillsScope: "workspace" | "global" | undefined + let workspacePath: string | undefined + let logger: { appendLine: ReturnType } + + const createManager = () => + new SelfImprovingManager({ + globalStoragePath: "/tmp/zoo-code-tests", + logger, + getExperiments: () => experiments, + getMemoryBackend: () => memoryBackend, + getAgentMemoryUrl: () => agentMemoryUrl, + getSelfImprovingScope: () => selfImprovingScope, + getAutoSkillsScope: () => selfImprovingAutoSkillsScope, + getWorkspacePath: () => workspacePath, + }) + + beforeEach(() => { + vi.clearAllMocks() + vi.useFakeTimers() + mockState.stores.length = 0 + mockState.storeBaseDirs.length = 0 + mockState.collectors.length = 0 + mockState.analyzers.length = 0 + mockState.appliers.length = 0 + mockState.adapters.length = 0 + mockState.memoryStores.length = 0 + mockState.memoryStoreBaseDirs.length = 0 + mockState.skillUsageStores.length = 0 + mockState.skillUsageStoreBaseDirs.length = 0 + mockState.actionExecutors.length = 0 + mockState.curatorServices.length = 0 + mockState.curatorBaseDirs.length = 0 + mockState.reviewPromptFactories.length = 0 + mockState.transcriptRecalls.length = 0 + mockState.transcriptRecallBaseDirs.length = 0 + experiments = undefined + memoryBackend = undefined + agentMemoryUrl = undefined + selfImprovingScope = undefined + selfImprovingAutoSkillsScope = undefined + workspacePath = "/tmp/workspace-one" + logger = { appendLine: vi.fn() } + }) + + afterEach(() => { + vi.useRealTimers() + }) + + it("requires self-improving to be enabled before auto-skills activates", () => { + expect(SelfImprovingManager.isAutoSkillsEnabled(undefined)).toBe(false) + expect(SelfImprovingManager.isAutoSkillsEnabled({ selfImprovingAutoSkills: true } as any)).toBe(false) + expect( + SelfImprovingManager.isAutoSkillsEnabled({ selfImproving: true, selfImprovingAutoSkills: false } as any), + ).toBe(false) + expect( + SelfImprovingManager.isAutoSkillsEnabled({ selfImproving: true, selfImprovingAutoSkills: true } as any), + ).toBe(true) + }) + + it("has zero runtime overhead when disabled", async () => { + const manager = createManager() + + await manager.initialize() + await manager.recordTaskCompletion({ taskId: "task-1", success: true, toolNames: ["read_file"] }) + + // getOrCreateRuntime is called during initialize, so store is created + expect(mockState.stores).toHaveLength(1) + // but no timers are started since experiment is disabled + expect(vi.getTimerCount()).toBe(0) + expect(await manager.getStatus()).toMatchObject({ + enabled: false, + started: false, + patternCount: 0, + eventCount: 0, + actionCount: 0, + memoryEntries: 0, + skillRecords: 0, + curatorStatus: DEFAULT_CURATOR_STATUS, + autoMode: expect.objectContaining({ autoModeEnabled: true }), + reviewTeam: { + enabled: true, + innovatorWeight: 0.3, + contrarianWeight: 0.3, + devilsAdvocateWeight: 0.3, + deciderThreshold: 0.6, + }, + questionEvaluator: expect.objectContaining({ enabled: true }), + resilience: expect.objectContaining({ enabled: true, consecutiveFailures: 0 }), + toolErrorHealer: expect.objectContaining({ enabled: true, knownTools: 10 }), + preventionEngine: { initialized: true }, + verificationEngine: { enabled: true, lastVerifyAt: undefined, lastResult: undefined }, + requirementsVerification: { enabled: true, requirementCount: 0, activeCount: 0, supersededCount: 0, lastVerifyResult: undefined }, + }) + }) + + it("initializes store state and schedules timers when enabled", async () => { + experiments = { selfImproving: true } + const manager = createManager() + + await manager.initialize() + + expect(mockState.stores).toHaveLength(1) + expect(mockState.stores[0].initialize).toHaveBeenCalledTimes(1) + expect(mockState.memoryStores[0].initialize).toHaveBeenCalledTimes(1) + expect(mockState.skillUsageStores[0].initialize).toHaveBeenCalledTimes(1) + expect(mockState.transcriptRecalls[0].initialize).toHaveBeenCalledTimes(1) + expect(mockState.curatorServices[0].initialize).toHaveBeenCalledTimes(1) + // 2 existing timers + 1 auto mode review timer + expect(vi.getTimerCount()).toBe(3) + expect(await manager.getStatus()).toMatchObject({ enabled: true, started: true }) + }) + + it("runs a review cycle from task completion triggers", async () => { + experiments = { selfImproving: true } + const manager = createManager() + await manager.initialize() + + const store = mockState.stores[0] + const analyzer = mockState.analyzers[0] + const applier = mockState.appliers[0] + const executor = mockState.actionExecutors[0] + const transcriptRecall = mockState.transcriptRecalls[0] + const pattern = { + id: "pattern-1", + patternType: "prompt", + state: "active", + summary: "Prefer semantic search before regex search", + confidenceScore: 0.9, + frequency: 3, + successRate: 0.8, + firstSeenAt: 1, + lastSeenAt: 1, + sourceSignals: ["TASK_SUCCESS"], + context: {}, + } + const action = { + id: "action-1", + actionType: "PROMPT_ENRICHMENT", + target: "system-prompt", + payload: { summary: "Prefer semantic search before regex search" }, + timestamp: 1, + } + + store.getCounters.mockReturnValue({ userTurnsSinceReview: 0, toolIterationsSinceReview: 2 }) + store.getRecentEvents.mockReturnValue([ + { id: "evt-1", signal: "TASK_SUCCESS", timestamp: 1, context: {}, outcome: {} }, + ]) + store.getPatterns.mockReturnValueOnce([]).mockReturnValue([pattern]) + store.getPendingActions.mockReturnValue([action]) + analyzer.analyze.mockReturnValue([pattern]) + applier.generateActions.mockReturnValue([action]) + executor.executeBatch.mockResolvedValue(new Set(["action-1"])) + + await manager.recordTaskCompletion({ taskId: "task-1", success: true, toolNames: ["search_files"] }) + + expect(store.addEvent).toHaveBeenCalledTimes(1) + expect(transcriptRecall.record).toHaveBeenCalledWith({ + id: "evt-task", + timestamp: 1, + taskId: "task-1", + mode: undefined, + summary: "Task completed: unknown", + signal: "TASK_SUCCESS", + workspacePath: undefined, + toolNames: ["search_files"], + errorKey: undefined, + success: true, + }) + expect(store.incrementToolIterations).toHaveBeenCalledWith(1) + expect(analyzer.analyze).toHaveBeenCalledTimes(1) + expect(store.addPattern).toHaveBeenCalledWith(pattern) + expect(store.addAction).toHaveBeenCalledWith(action) + expect(executor.executeBatch).toHaveBeenCalledWith([action]) + expect(store.removeAction).toHaveBeenCalledWith("action-1") + expect(store.persist).toHaveBeenCalled() + }) + + it("formats prompt context and disposes cleanly on experiment disable", async () => { + experiments = { selfImproving: true } + const manager = createManager() + await manager.initialize() + + const memoryStore = mockState.memoryStores[0] + memoryStore.getSnapshotString.mockReturnValue("\n## Learned Context\n- Search relevant code before editing\n") + memoryStore.getStats.mockResolvedValue({ entryCount: 3, backend: "builtin" }) + mockState.skillUsageStores[0].getStats.mockReturnValue({ + total: 4, + active: 3, + stale: 1, + archived: 0, + pinned: 1, + agentCreated: 2, + }) + + expect(manager.getPromptContextString()).toBe("\n## Learned Context\n- Search relevant code before editing\n") + expect(await manager.getStatus()).toMatchObject({ memoryEntries: 3, memoryBackend: "builtin", skillRecords: 4 }) + + experiments = { selfImproving: false } + await manager.handleExperimentChange(false) + + expect(mockState.stores[0].persist).toHaveBeenCalledTimes(1) + expect(memoryStore.takeSnapshot).toHaveBeenCalledTimes(1) + expect(memoryStore.dispose).toHaveBeenCalledTimes(1) + expect(vi.getTimerCount()).toBe(0) + expect(await manager.getStatus()).toMatchObject({ + enabled: false, + started: false, + patternCount: 0, + eventCount: 0, + actionCount: 0, + memoryEntries: 0, + skillRecords: 0, + curatorStatus: DEFAULT_CURATOR_STATUS, + autoMode: expect.objectContaining({ autoModeEnabled: true }), + reviewTeam: { + enabled: true, + innovatorWeight: 0.3, + contrarianWeight: 0.3, + devilsAdvocateWeight: 0.3, + deciderThreshold: 0.6, + }, + questionEvaluator: expect.objectContaining({ enabled: true }), + resilience: expect.objectContaining({ enabled: true, consecutiveFailures: 0 }), + toolErrorHealer: expect.objectContaining({ enabled: true, knownTools: 10 }), + preventionEngine: { initialized: true }, + verificationEngine: { enabled: true, lastVerifyAt: undefined, lastResult: undefined }, + requirementsVerification: { enabled: true, requirementCount: 0, activeCount: 0, supersededCount: 0, lastVerifyResult: undefined }, + }) + }) + + it("rebuilds the memory backend when settings change", async () => { + experiments = { selfImproving: true } + const manager = createManager() + await manager.initialize() + + const originalStore = mockState.memoryStores[0] + memoryBackend = "agentmemory" + agentMemoryUrl = "http://agentmemory.internal:4001" + + await manager.onSettingsChanged(experiments as any) + + expect(originalStore.takeSnapshot).toHaveBeenCalledTimes(1) + expect(originalStore.dispose).toHaveBeenCalledTimes(1) + expect((manager.memoryStore as any).backendType).toBe("agentmemory") + expect(logger.appendLine).toHaveBeenCalledWith( + "[SelfImprovingManager] Memory backend configured: agentmemory (http://agentmemory.internal:4001)", + ) + }) + + it("rebuilds self-improving stores when scope changes to workspace", async () => { + experiments = { selfImproving: true } + const manager = createManager() + await manager.initialize() + + selfImprovingScope = "workspace" + selfImprovingAutoSkillsScope = "global" + workspacePath = "/tmp/workspace-two" + + await manager.onSettingsChanged(experiments as any) + + expect(mockState.storeBaseDirs.at(-1)).toContain("workspace-scopes") + expect(mockState.memoryStoreBaseDirs.at(-1)).toContain("workspace-scopes") + expect(mockState.skillUsageStoreBaseDirs.at(-1)).toContain("workspace-scopes") + expect(mockState.curatorBaseDirs.at(-1)).toContain("workspace-scopes") + expect(mockState.transcriptRecallBaseDirs.at(-1)).toContain("workspace-scopes") + expect(mockState.stores.length).toBeGreaterThanOrEqual(2) + expect(mockState.memoryStores.length).toBeGreaterThanOrEqual(2) + }) + + it("runs curator cycles through the curator service", async () => { + experiments = { selfImproving: true } + const manager = createManager() + await manager.initialize() + + const report = { + runId: "curator-run-2", + timestamp: 123, + durationMs: 5, + transitions: [ + { + skillId: "skill-1", + skillName: "Skill 1", + fromState: "active", + toState: "stale", + reason: "No activity for 14 days", + }, + ], + stats: { + totalSkills: 1, + activeSkills: 0, + staleSkills: 1, + archivedSkills: 0, + pinnedSkills: 0, + transitionsApplied: 1, + }, + } + mockState.curatorServices[0].run.mockResolvedValue(report) + + const result = await manager.runCuratorCycle() + + expect(mockState.curatorServices[0].run).toHaveBeenCalledTimes(1) + expect(mockState.stores[0].updateTelemetry).toHaveBeenCalledWith({ lastCuratorRunAt: 123 }) + expect(result).toEqual(report) + }) + + it("records code index events with explicit search hit details", async () => { + experiments = { selfImproving: true } + const manager = createManager() + await manager.initialize() + + const store = mockState.stores[0] + const collector = mockState.collectors[0] + store.getConfig.mockReturnValue({ + reviewOnTurnCount: 10, + reviewOnToolIterationCount: 2, + maxPromptPatterns: 5, + curatorEnabled: true, + curatorIntervalMs: 5_000, + staleAfterDays: 14, + archiveAfterDays: 60, + codeIndexCorrelationEnabled: true, + }) + + await manager.recordCodeIndexEvent("task-1", { available: true, hits: 4, topScore: 0.91 }) + + expect(collector.createCodeIndexEvent).toHaveBeenCalledWith( + { available: true, hits: 4, topScore: 0.91 }, + "task-1", + ) + expect(store.addEvent).toHaveBeenCalledWith(expect.objectContaining({ signal: "CODE_INDEX_HIT" })) + }) +}) diff --git a/src/services/self-improving/__tests__/SkillUsageStore.spec.ts b/src/services/self-improving/__tests__/SkillUsageStore.spec.ts new file mode 100644 index 0000000000..864d7ab811 --- /dev/null +++ b/src/services/self-improving/__tests__/SkillUsageStore.spec.ts @@ -0,0 +1,103 @@ +import * as fs from "fs/promises" +import * as os from "os" +import * as path from "path" + +import { SkillUsageStore } from "../SkillUsageStore" + +describe("SkillUsageStore", () => { + let tempDir: string + const logger = { appendLine: vi.fn() } + + beforeEach(async () => { + logger.appendLine.mockReset() + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "skill-usage-store-")) + }) + + afterEach(async () => { + await fs.rm(tempDir, { recursive: true, force: true }) + }) + + it("creates and persists telemetry sidecar records", async () => { + const store = new SkillUsageStore(tempDir, logger) + await store.initialize() + + const record = store.getOrCreate("skill-1", "Generated Skill", "agent") + const persistedPath = path.join(tempDir, "self-improving", "skill-usage.json") + const deadline = Date.now() + 1000 + let persisted: Array<{ skillId: string; skillName: string }> = [] + + while (Date.now() < deadline) { + try { + persisted = JSON.parse(await fs.readFile(persistedPath, "utf8")) as Array<{ + skillId: string + skillName: string + }> + if (persisted.some((entry) => entry.skillId === "skill-1")) { + break + } + } catch { + // Persist is async; retry until the file is ready. + } + + await new Promise((resolve) => setTimeout(resolve, 25)) + } + + expect(record).toMatchObject({ skillId: "skill-1", skillName: "Generated Skill", createdBy: "agent" }) + expect(persisted).toContainEqual(expect.objectContaining({ skillId: "skill-1", skillName: "Generated Skill" })) + }) + + it("tracks counters, pinning, and lifecycle candidates", async () => { + const filePath = path.join(tempDir, "self-improving", "skill-usage.json") + await fs.mkdir(path.dirname(filePath), { recursive: true }) + await fs.writeFile( + filePath, + JSON.stringify([ + { + skillId: "skill-1", + skillName: "Dormant Skill", + createdBy: "bundled", + state: "active", + pinned: false, + viewCount: 0, + useCount: 0, + patchCount: 0, + createdAt: 1, + lastActivityAt: 1, + }, + ]), + "utf8", + ) + + const store = new SkillUsageStore(tempDir, logger) + await store.initialize() + + await store.bumpView("skill-1") + await store.bumpUse("skill-1") + await store.bumpPatch("skill-1") + await store.pin("skill-1") + await store.transitionState("skill-1", "stale") + + expect(store.get("skill-1")).toMatchObject({ + pinned: true, + state: "active", + viewCount: 1, + useCount: 1, + patchCount: 1, + }) + + await store.unpin("skill-1") + await store.transitionState("skill-1", "stale") + + expect(store.getByState("stale")).toHaveLength(1) + expect(store.getArchiveCandidates(0)).toHaveLength(1) + expect(store.getStats()).toEqual({ + total: 1, + active: 0, + stale: 1, + archived: 0, + pinned: 0, + agentCreated: 0, + pinnedAgentCreated: 0, + }) + }) +}) diff --git a/src/services/self-improving/__tests__/ToolErrorHealer.spec.ts b/src/services/self-improving/__tests__/ToolErrorHealer.spec.ts new file mode 100644 index 0000000000..a5e7258e93 --- /dev/null +++ b/src/services/self-improving/__tests__/ToolErrorHealer.spec.ts @@ -0,0 +1,69 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { ToolErrorHealer } from "../ToolErrorHealer" + +describe("ToolErrorHealer", () => { + let healer: ToolErrorHealer + + beforeEach(() => { + healer = new ToolErrorHealer({ appendLine: vi.fn() } as any) + }) + + describe("handleToolError", () => { + it("should return known fix for search_files regex", () => { + const result = healer.handleToolError("search_files", "regex") + expect(result).not.toBeNull() + expect(result!.fix).toContain("regex") + expect(result!.autoCorrectable).toBe(true) + }) + + it("should return null for unknown tool", () => { + const result = healer.handleToolError("unknown_tool", "param") + expect(result).toBeNull() + }) + + it("should return null when disabled", () => { + const disabled = new ToolErrorHealer({ appendLine: vi.fn() } as any, { enabled: false }) + expect(disabled.handleToolError("search_files", "regex")).toBeNull() + }) + + it("should learn from repeated errors", () => { + healer.handleToolError("search_files", "regex") + healer.handleToolError("search_files", "regex") + const summary = healer.getCorrectionSummary() + const searchEntry = summary.find((s) => s.toolName === "search_files" && s.missingParam === "regex") + expect(searchEntry).toBeDefined() + expect(searchEntry!.occurrences).toBe(2) + }) + }) + + describe("getToolRequirements", () => { + it("should return requirements for known tools", () => { + const reqs = healer.getToolRequirements("search_files") + expect(reqs.length).toBeGreaterThan(0) + expect(reqs.some((r) => r.param === "regex")).toBe(true) + }) + + it("should return empty array for unknown tools", () => { + expect(healer.getToolRequirements("unknown_tool")).toEqual([]) + }) + }) + + describe("hasKnownRequirements", () => { + it("should return true for known tools", () => { + expect(healer.hasKnownRequirements("search_files")).toBe(true) + expect(healer.hasKnownRequirements("read_file")).toBe(true) + }) + + it("should return false for unknown tools", () => { + expect(healer.hasKnownRequirements("unknown_tool")).toBe(false) + }) + }) + + describe("getStatus", () => { + it("should return status object", () => { + const status = healer.getStatus() + expect(status.enabled).toBe(true) + expect(status.knownTools).toBeGreaterThan(0) + }) + }) +}) diff --git a/src/services/self-improving/__tests__/TranscriptRecall.spec.ts b/src/services/self-improving/__tests__/TranscriptRecall.spec.ts new file mode 100644 index 0000000000..4cd0acdc91 --- /dev/null +++ b/src/services/self-improving/__tests__/TranscriptRecall.spec.ts @@ -0,0 +1,166 @@ +import * as fs from "fs/promises" +import * as os from "os" +import * as path from "path" + +import { TranscriptRecall } from "../TranscriptRecall" + +describe("TranscriptRecall", () => { + let tempDir: string + let logger: { appendLine: ReturnType } + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "zoo-transcript-")) + logger = { appendLine: vi.fn() } + }) + + afterEach(async () => { + await fs.rm(tempDir, { recursive: true, force: true }) + }) + + it("records, persists, and searches transcript evidence", async () => { + const recall = new TranscriptRecall(tempDir, logger) + await recall.initialize() + + await recall.record({ + id: "entry-1", + timestamp: 1, + taskId: "task-1", + mode: "code", + summary: "Task failed while writing file", + signal: "TASK_FAILURE", + toolNames: ["write_to_file"], + errorKey: "EACCES", + success: false, + }) + await recall.record({ + id: "entry-2", + timestamp: 2, + taskId: "task-2", + mode: "code", + summary: "Task completed after search", + signal: "TASK_SUCCESS", + toolNames: ["search_files"], + success: true, + }) + + expect(recall.size).toBe(2) + expect(recall.search("search_files")).toHaveLength(1) + expect(recall.searchBySignal("TASK_FAILURE")).toHaveLength(1) + expect(recall.searchByErrorKey("EACCES")).toHaveLength(1) + + const reloaded = new TranscriptRecall(tempDir, logger) + await reloaded.initialize() + expect(reloaded.getRecent(1)[0].id).toBe("entry-2") + }) + + it("initializes lazily when recording before initialize", async () => { + const filePath = path.join(tempDir, "self-improving", "transcript-recall.json") + await fs.mkdir(path.dirname(filePath), { recursive: true }) + await fs.writeFile( + filePath, + JSON.stringify([ + { + id: "entry-0", + timestamp: 0, + summary: "Existing transcript entry", + signal: "TASK_SUCCESS", + }, + ]), + "utf8", + ) + + const recall = new TranscriptRecall(tempDir, logger) + await recall.record({ + id: "entry-1", + timestamp: 1, + summary: "Recorded without explicit initialize", + signal: "TASK_SUCCESS", + }) + + expect(recall.getRecent(2).map((entry) => entry.id)).toEqual(["entry-0", "entry-1"]) + }) + + it("serializes concurrent lazy initialization before recording", async () => { + const filePath = path.join(tempDir, "self-improving", "transcript-recall.json") + await fs.mkdir(path.dirname(filePath), { recursive: true }) + await fs.writeFile( + filePath, + JSON.stringify([ + { + id: "entry-0", + timestamp: 0, + summary: "Existing transcript entry", + signal: "TASK_SUCCESS", + }, + ]), + "utf8", + ) + + const recall = new TranscriptRecall(tempDir, logger) + await Promise.all([ + recall.record({ + id: "entry-1", + timestamp: 1, + summary: "Concurrent record one", + signal: "TASK_SUCCESS", + }), + recall.record({ + id: "entry-2", + timestamp: 2, + summary: "Concurrent record two", + signal: "TASK_SUCCESS", + }), + ]) + + expect(recall.size).toBe(3) + expect( + recall + .getRecent(3) + .map((entry) => entry.id) + .sort(), + ).toEqual(["entry-0", "entry-1", "entry-2"]) + }) + + it("ignores malformed persisted entries", async () => { + const filePath = path.join(tempDir, "self-improving", "transcript-recall.json") + await fs.mkdir(path.dirname(filePath), { recursive: true }) + await fs.writeFile( + filePath, + JSON.stringify([ + { + id: "entry-1", + timestamp: 1, + summary: "Valid transcript entry", + signal: "TASK_SUCCESS", + }, + { id: "entry-2", timestamp: "bad", summary: 1, signal: null }, + "not-an-entry", + ]), + "utf8", + ) + + const recall = new TranscriptRecall(tempDir, logger) + await recall.initialize() + + expect(recall.size).toBe(1) + expect(recall.search("valid")).toHaveLength(1) + }) + + it("clears persisted entries", async () => { + const recall = new TranscriptRecall(tempDir, logger) + await recall.initialize() + await recall.record({ + id: "entry-1", + timestamp: 1, + summary: "Task completed", + signal: "TASK_SUCCESS", + }) + + await recall.clear() + expect(recall.size).toBe(0) + + const reloaded = new TranscriptRecall(tempDir, logger) + await reloaded.initialize() + expect(reloaded.size).toBe(0) + }) +}) diff --git a/src/services/self-improving/__tests__/TrustService.spec.ts b/src/services/self-improving/__tests__/TrustService.spec.ts new file mode 100644 index 0000000000..e0251a558c --- /dev/null +++ b/src/services/self-improving/__tests__/TrustService.spec.ts @@ -0,0 +1,98 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { TrustService } from "../TrustService" + +describe("TrustService", () => { + let service: TrustService + + beforeEach(() => { + service = new TrustService({ appendLine: vi.fn() } as any) + }) + + describe("shouldAutoApprove", () => { + it("should return false when disabled", () => { + const disabled = new TrustService({ appendLine: vi.fn() } as any, { enabled: false }) + expect(disabled.shouldAutoApprove("read_file")).toBe(false) + }) + + it("should auto-approve read operations when enabled", () => { + service.updateConfig({ enabled: true, autoApproveRead: true }) + expect(service.shouldAutoApprove("read_file")).toBe(true) + expect(service.shouldAutoApprove("search_files")).toBe(true) + expect(service.shouldAutoApprove("list_files")).toBe(true) + }) + + it("should auto-approve write operations when enabled", () => { + service.updateConfig({ enabled: true, autoApproveWrite: true }) + expect(service.shouldAutoApprove("write_file")).toBe(true) + expect(service.shouldAutoApprove("edit_file")).toBe(true) + }) + + it("should auto-approve commands when enabled", () => { + service.updateConfig({ enabled: true, autoApproveCommands: true }) + expect(service.shouldAutoApprove("execute_command")).toBe(true) + expect(service.shouldAutoApprove("bash")).toBe(true) + }) + + it("should auto-approve MCP when enabled", () => { + service.updateConfig({ enabled: true, autoApproveMcp: true }) + expect(service.shouldAutoApprove("use_mcp_tool")).toBe(true) + expect(service.shouldAutoApprove("access_mcp_resource")).toBe(true) + }) + + it("should auto-approve mode switch when enabled", () => { + service.updateConfig({ enabled: true, autoApproveModeSwitch: true }) + expect(service.shouldAutoApprove("switch_mode")).toBe(true) + }) + + it("should always auto-approve user-facing tools", () => { + service.updateConfig({ enabled: true }) + expect(service.shouldAutoApprove("ask_followup_question")).toBe(true) + expect(service.shouldAutoApprove("attempt_completion")).toBe(true) + }) + + it("should respect max consecutive actions limit", () => { + service.updateConfig({ enabled: true, autoApproveRead: true, maxConsecutiveActions: 2 }) + expect(service.shouldAutoApprove("read_file")).toBe(true) + expect(service.shouldAutoApprove("read_file")).toBe(true) + expect(service.shouldAutoApprove("read_file")).toBe(false) // limit reached + }) + + it("should reset consecutive counter", () => { + service.updateConfig({ enabled: true, autoApproveRead: true, maxConsecutiveActions: 2 }) + service.shouldAutoApprove("read_file") + service.shouldAutoApprove("read_file") + service.resetConsecutiveCounter() + expect(service.shouldAutoApprove("read_file")).toBe(true) // counter reset + }) + + it("should check trusted commands", () => { + service.updateConfig({ + enabled: true, + autoApproveCommands: true, + trustedCommands: ["npm test", "git status"], + }) + expect(service.shouldAutoApprove("execute_command", { command: "npm test" })).toBe(true) + expect(service.shouldAutoApprove("execute_command", { command: "rm -rf /" })).toBe(false) + }) + + it("should check trusted paths", () => { + service.updateConfig({ + enabled: true, + autoApproveWrite: true, + trustedPaths: ["/home/project/src"], + }) + expect(service.shouldAutoApprove("write_file", { path: "/home/project/src/index.ts" })).toBe(true) + expect(service.shouldAutoApprove("write_file", { path: "/etc/passwd" })).toBe(false) + }) + }) + + describe("getStatus", () => { + it("should return status object", () => { + service.updateConfig({ enabled: true, autoApproveRead: true }) + const status = service.getStatus() + expect(status.enabled).toBe(true) + expect(status.autoApproveRead).toBe(true) + expect(status.consecutiveActions).toBe(0) + }) + }) +}) diff --git a/src/services/self-improving/index.ts b/src/services/self-improving/index.ts new file mode 100644 index 0000000000..0250b813d6 --- /dev/null +++ b/src/services/self-improving/index.ts @@ -0,0 +1,53 @@ +/** + * Self-Improving Module + * + * A standalone, experiment-gated subsystem that learns from task outcomes + * to improve prompt guidance, tool preferences, and error avoidance over time. + * + * Architecture: Hermes-agent symbolic learning loop adapted to Zoo-Code patterns. + * See ARCHITECTURE.md for full design documentation. + */ + +export { SelfImprovingManager } from "./SelfImprovingManager" +export { LearningStore } from "./LearningStore" +export { FeedbackCollector } from "./FeedbackCollector" +export { PatternAnalyzer } from "./PatternAnalyzer" +export { ImprovementApplier } from "./ImprovementApplier" +export { CodeIndexAdapter } from "./CodeIndexAdapter" +export { MemoryBackendFactory } from "./MemoryBackendFactory" +export { AgentMemoryAdapter } from "./AgentMemoryAdapter" +export { MemoryStore } from "./MemoryStore" +export { SkillUsageStore } from "./SkillUsageStore" +export { ActionExecutor } from "./ActionExecutor" +export { CuratorService } from "./CuratorService" +export { ReviewPromptFactory } from "./ReviewPromptFactory" +export { TranscriptRecall } from "./TranscriptRecall" +export { InsightsEngine } from "./InsightsEngine" +export { AutoModeOrchestrator } from "./AutoModeOrchestrator" +export type { AutoModeConfig } from "./AutoModeOrchestrator" +export { ModeFactoryService } from "./ModeFactoryService" +export { VerificationEngine } from "./VerificationEngine" +export type { VerificationResult, VerificationConfig } from "./VerificationEngine" +export { RequirementsVerifier } from "./RequirementsVerifier" +export type { RequirementsVerifierConfig } from "./RequirementsVerifier" +export { KeywordConflictResolver } from "./KeywordConflictResolver" +export { LLMConflictResolver } from "./LLMConflictResolver" + +export type { CodeIndexInfo, Logger, PromptContext, SelfImprovingManagerOptions, TaskEventInfo, Requirement, RequirementsVerificationResult, ConflictResolver, ConflictResolution } from "./types" +export type { MemoryBackend, MemoryBackendType } from "./MemoryBackend" +export type { MemoryStoreType } from "./MemoryStore" +export type { SkillTelemetryRecord, SkillProvenance, SkillLifecycleState } from "./SkillUsageStore" +export type { CuratorConfig, CuratorReport } from "./CuratorService" +export type { ReviewType, ReviewPrompt } from "./ReviewPromptFactory" +export type { TranscriptEntry } from "./TranscriptRecall" +export type { InsightsReport } from "./InsightsEngine" + +export { DEFAULT_CONFIG, EMPTY_STATE } from "./types" + +export { ErrorClassifier, ErrorCategory } from "./ErrorClassifier" +export type { ClassifiedError } from "./ErrorClassifier" +export { ToolCallValidator } from "./ToolCallValidator" +export type { ValidationResult } from "./ToolCallValidator" +export { CascadeTracker } from "./CascadeTracker" +export { PreventionEngine } from "./PreventionEngine" +export type { PreventionContext } from "./PreventionEngine" diff --git a/src/services/self-improving/tarUtils.ts b/src/services/self-improving/tarUtils.ts new file mode 100644 index 0000000000..782879c650 --- /dev/null +++ b/src/services/self-improving/tarUtils.ts @@ -0,0 +1,183 @@ +import * as fs from "fs/promises" +import * as fsSync from "fs" +import { createGzip, createGunzip } from "zlib" +import { pipeline } from "stream/promises" +import { Readable, Writable } from "stream" +import * as path from "path" + +/** + * Tar entry in memory: relative path + buffer content. + */ +export interface TarEntry { + path: string + content: Buffer +} + +/** + * Create a tar.gz file from an array of in-memory entries. + * + * tar format (no external libs): + * - Each file: 512-byte header + padded content (padded to 512 multiple) + * - End: two 512-byte zero blocks + * - gzip compressed + * + * @param entries Array of {path, content} objects + * @param outputPath File path for the .tar.gz output + */ +export async function createTarGzip( + entries: Array<{ path: string; content: Buffer }>, + outputPath: string, +): Promise { + // Build tar buffer + const blocks: Buffer[] = [] + + for (const entry of entries) { + const header = buildTarHeader(entry.path, entry.content.length) + blocks.push(header) + blocks.push(entry.content) + + // Pad content to 512-byte boundary + const padLen = (512 - (entry.content.length % 512)) % 512 + if (padLen > 0) { + blocks.push(Buffer.alloc(padLen, 0)) + } + } + + // Two 512-byte zero blocks at end + blocks.push(Buffer.alloc(1024, 0)) + + const tarBuffer = Buffer.concat(blocks) + + // gzip it and write + const gzip = createGzip() + const source = Readable.from([tarBuffer]) + const dest = fsSync.createWriteStream(outputPath) + + await pipeline(source, gzip, dest) +} + +/** + * Extract a tar.gz file into a target directory. + * + * @param archivePath Path to .tar.gz file + * @param targetDir Directory to extract into (will be created) + */ +export async function extractTarGzip(archivePath: string, targetDir: string): Promise { + await fs.mkdir(targetDir, { recursive: true }) + + // Read and decompress + const compressed = await fs.readFile(archivePath) + const gunzip = createGunzip() + const chunks: Buffer[] = [] + + // Decompress + const readable = Readable.from([compressed]) + const writable = new Writable({ + write(chunk: Buffer, _encoding, callback) { + chunks.push(chunk) + callback() + }, + }) + + await pipeline(readable, gunzip, writable) + + const tarData = Buffer.concat(chunks) + + // Parse tar and extract files + let offset = 0 + while (offset + 512 <= tarData.length) { + const header = tarData.subarray(offset, offset + 512) + offset += 512 + + // Check for end-of-archive (all zeros) + if (header[0] === 0) { + break + } + + // Parse file size from header (octal at bytes 124-135) + const sizeStr = header.toString("ascii", 124, 136).replace(/\0/g, "").trim() + const fileSize = parseInt(sizeStr, 8) + if (isNaN(fileSize)) { + break + } + + // Parse file name + const nameStr = header.toString("ascii", 0, 100).replace(/\0/g, "").trim() + if (!nameStr) { + break + } + + // Get file content + const content = tarData.subarray(offset, offset + fileSize) + offset += fileSize + + // Skip padding + const padLen = (512 - (fileSize % 512)) % 512 + offset += padLen + + // Resolve target path + const targetPath = path.join(targetDir, nameStr) + + // Ensure parent dir exists + await fs.mkdir(path.dirname(targetPath), { recursive: true }) + + // Write file + await fs.writeFile(targetPath, content) + } +} + +/** + * Build a 512-byte tar header block for a file. + * POSIX ustar format subset. + */ +function buildTarHeader(name: string, size: number): Buffer { + const buf = Buffer.alloc(512, 0) + + // File name (100 bytes) — truncate if too long + const nameBytes = Buffer.from(name, "ascii") + if (nameBytes.length > 99) { + nameBytes.copy(buf, 0, 0, 99) + } else { + nameBytes.copy(buf, 0) + } + + // File mode (8 bytes, octal) + buf.write("0000644", 100, 8, "ascii") + + // Owner UID (8 bytes, octal) + buf.write("0000000", 108, 8, "ascii") + + // Group GID (8 bytes, octal) + buf.write("0000000", 116, 8, "ascii") + + // File size (12 bytes, octal) + const sizeOctal = size.toString(8).padStart(11, "0") + buf.write(sizeOctal, 124, 12, "ascii") + + // Mtime (12 bytes, octal) + const mtime = Math.floor(Date.now() / 1000) + .toString(8) + .padStart(11, "0") + buf.write(mtime, 136, 12, "ascii") + + // Checksum placeholder (8 bytes of spaces) + buf.write(" ", 148, 8, "ascii") + + // Type flag: '0' = regular file + buf[156] = "0".charCodeAt(0) + + // Ustar indicator + buf.write("ustar", 257, 5, "ascii") + buf.write("00", 263, 2, "ascii") + + // Calculate and write checksum + let checksum = 0 + for (let i = 0; i < 512; i++) { + checksum += buf[i] + } + const checksumOctal = checksum.toString(8).padStart(6, "0") + buf.write(checksumOctal, 148, 7, "ascii") + buf[155] = 0x20 // trailing space in checksum field + + return buf +} diff --git a/src/services/self-improving/types.ts b/src/services/self-improving/types.ts new file mode 100644 index 0000000000..88d1bcce7a --- /dev/null +++ b/src/services/self-improving/types.ts @@ -0,0 +1,206 @@ +import { + DEFAULT_LEARNING_CONFIG, + EMPTY_LEARNING_STATE, + type ActionType, + type Experiments, + type FeedbackSignal, + type ImprovementAction, + type LearnedPattern, + type LearningConfig, + type LearningEvent, + type LearningState, + type LearningTelemetry, + type PatternState, + type PatternType, + type SelfImprovingScope, +} from "@roo-code/types" + +// Re-export shared types for convenience +export type { + ActionType, + Experiments, + FeedbackSignal, + ImprovementAction, + LearnedPattern, + LearningConfig, + LearningEvent, + LearningState, + LearningTelemetry, + PatternState, + PatternType, + SelfImprovingScope, +} + +/** + * Output channel logger interface - abstracts VS Code OutputChannel + */ +export interface Logger { + appendLine(message: string): void +} + +/** + * Code index adapter contract - read-only view of code index availability + */ +export interface CodeIndexInfo { + available: boolean + hits: number + topScore?: number +} + +/** + * Task lifecycle event adapter - normalizes task events into learning signals + */ +export interface TaskEventInfo { + taskId: string + mode?: string + workspacePath?: string + success?: boolean + corrected?: boolean + toolNames?: string[] + userTurnCount?: number + toolIterationCount?: number + errorKey?: string + promptFingerprint?: string +} + +/** + * Prompt context result - bounded set of learned guidance for prompt injection + */ +export interface PromptContext { + entries: Array<{ + type: PatternType + summary: string + confidence: number + }> + revision: number +} + +/** + * Manager options for construction + */ +export interface SelfImprovingManagerOptions { + globalStoragePath: string + logger: Logger + getExperiments: () => Experiments | undefined + getMemoryBackend?: () => "builtin" | "agentmemory" | undefined + getAgentMemoryUrl?: () => string | undefined + getSelfImprovingScope?: () => SelfImprovingScope | undefined + getAutoSkillsScope?: () => SelfImprovingScope | undefined + getWorkspacePath?: () => string | undefined + /** Memory backend type: "builtin" (default) or "agentmemory" */ + memoryBackend?: "builtin" | "agentmemory" + /** agentmemory server URL (default: http://localhost:3111) */ + agentMemoryUrl?: string + /** Optional curator configuration overrides */ + curatorConfig?: { + intervalMs?: number + minIdleMs?: number + firstRunDeferred?: boolean + staleAfterDays?: number + archiveAfterDays?: number + backupsEnabled?: boolean + maxBackups?: number + } + /** Optional SkillsManager reference for skill telemetry integration */ + skillsManager?: { + getSkillNames(): string[] + getSkillProvenance(name: string): string + getSkillProvenanceForSource?(name: string, source: "global" | "project"): string + hasSkill?(name: string, source: "global" | "project"): boolean + createSkillFromContent( + name: string, + source: "global" | "project", + description: string, + content: string, + modeSlugs?: string[], + ): Promise + updateSkillContent(name: string, source: "global" | "project", content: string, mode?: string): Promise + } +} + +/** + * Shared learning defaults re-exported for local convenience. + */ +export const DEFAULT_CONFIG: LearningConfig = DEFAULT_LEARNING_CONFIG + +/** + * Shared empty learning state re-exported for local convenience. + */ +export const EMPTY_STATE: LearningState = EMPTY_LEARNING_STATE + +/** + * A single requirement extracted from a user prompt. + */ +export interface Requirement { + id: string + /** The original requirement text extracted from user prompt */ + text: string + /** Category of requirement */ + category: "functional" | "non-functional" | "constraint" | "goal" | "edge-case" | "security" | "compliance" + /** Current verification status */ + status: "pending" | "verified" | "failed" | "skipped" | "superseded" + /** How this requirement was verified */ + verifiedBy?: "code-review" | "test" | "manual" | "build" | "lint" | "type-check" + /** Evidence that this requirement is fulfilled */ + evidence?: string + /** Timestamp when verified */ + verifiedAt?: number + /** Optional linked todo item ID */ + todoId?: string + /** Index of the user message this requirement was extracted from (0 = first message) */ + messageIndex: number + /** If superseded, the ID of the requirement that superseded this one */ + supersededBy?: string + /** If this requirement supersedes another, the ID of the superseded requirement */ + supersedes?: string +} + +/** + * Result of conflict resolution between a new requirement and existing ones. + */ +export interface ConflictResolution { + /** IDs of existing requirements that are superseded by the new requirement */ + supersedes: string[] + /** Confidence score 0-1 */ + confidence: number + /** Explanation of the decision */ + reason: string +} + +/** + * Pluggable conflict resolver that determines if a new requirement supersedes existing ones. + */ +export interface ConflictResolver { + readonly name: string + /** + * Determine if a new requirement supersedes any existing requirements. + * @param newRequirement The newly extracted requirement + * @param existingRequirements All currently active (non-superseded) requirements + * @param newMessageIndex The index of the message this requirement came from + * @param allMessages All user messages in the session (for context) + */ + resolve( + newRequirement: Requirement, + existingRequirements: Requirement[], + newMessageIndex: number, + allMessages: string[], + ): Promise +} + +/** + * Result of running requirements verification. + */ +export interface RequirementsVerificationResult { + /** All requirements passed */ + passed: boolean + /** Total requirements extracted */ + total: number + /** Requirements that passed verification */ + verified: Requirement[] + /** Requirements that failed verification */ + failed: Requirement[] + /** Requirements not yet checked */ + pending: Requirement[] + /** Summary message */ + summary: string +} diff --git a/src/services/skills/SkillsManager.ts b/src/services/skills/SkillsManager.ts index 0959b977c9..5f63f99fe3 100644 --- a/src/services/skills/SkillsManager.ts +++ b/src/services/skills/SkillsManager.ts @@ -49,6 +49,23 @@ export class SkillsManager { } } + private async postSkillsUpdatedMessage(text: string): Promise { + const provider = this.providerRef.deref() + if (!provider) { + return + } + + await provider.postMessageToWebview({ + type: "skillsUpdated", + text, + skills: this.getSkillsMetadata(), + }) + } + + private getSkillNameFromUri(uri: vscode.Uri): string { + return path.basename(path.dirname(uri.fsPath)) || "skill" + } + /** * Scan a skills directory for skill subdirectories. * Handles two symlink cases: @@ -290,6 +307,30 @@ export class SkillsManager { return this.getAllSkills() } + /** + * Get the unique discovered skill names. + */ + getSkillNames(): string[] { + return Array.from(new Set(this.getAllSkills().map((skill) => skill.name))).sort() + } + + /** + * Infer skill provenance for autonomous mutation safety. + * Project and global discovered skills default to user-authored unless + * a higher-level caller tracks agent provenance separately. + */ + getSkillProvenance(name: string): "user" | "bundled" | "hub" | "unknown" { + return this.getAllSkills().some((skill) => skill.name === name) ? "user" : "unknown" + } + + getSkillProvenanceForSource(name: string, source: "global" | "project"): "user" | "bundled" | "hub" | "unknown" { + return this.findSkillByNameAndSource(name, source) ? "user" : "unknown" + } + + hasSkill(name: string, source: "global" | "project"): boolean { + return this.findSkillByNameAndSource(name, source) !== undefined + } + /** * Get a skill by name, source, and optionally mode */ @@ -323,6 +364,36 @@ export class SkillsManager { return { valid: true } } + private validateSkillDocumentStructure(name: string, content: string, expectedDescription?: string): void { + const { data: frontmatter } = matter(content) + const frontmatterName = typeof frontmatter.name === "string" ? frontmatter.name.trim() : "" + const frontmatterDescription = typeof frontmatter.description === "string" ? frontmatter.description.trim() : "" + + if (!frontmatterName || !frontmatterDescription) { + throw new Error( + t("skills:errors.invalid_structure", { + reason: "missing required frontmatter fields: name, description", + }), + ) + } + + if (frontmatterName !== name) { + throw new Error(t("skills:errors.invalid_structure", { reason: `frontmatter name must match \"${name}\"` })) + } + + if (frontmatterDescription.length < 1 || frontmatterDescription.length > 1024) { + throw new Error(t("skills:errors.description_length", { length: frontmatterDescription.length })) + } + + if (expectedDescription && frontmatterDescription !== expectedDescription.trim()) { + throw new Error( + t("skills:errors.invalid_structure", { + reason: "frontmatter description must match the provided description", + }), + ) + } + } + /** * Convert skill name validation error code to a user-friendly error message. */ @@ -350,6 +421,43 @@ export class SkillsManager { source: "global" | "project", description: string, modeSlugs?: string[], + ): Promise { + const titleName = name + .split("-") + .map((word) => word.charAt(0).toUpperCase() + word.slice(1)) + .join(" ") + + const frontmatterLines = [`name: ${name}`, `description: ${description.trim()}`] + if (modeSlugs && modeSlugs.length > 0) { + frontmatterLines.push(`modeSlugs:`) + for (const slug of modeSlugs) { + frontmatterLines.push(` - ${slug}`) + } + } + + const skillContent = `--- +${frontmatterLines.join("\n")} +--- + +# ${titleName} + +## Instructions + +Add your skill instructions here. +` + + return this.createSkillFromContent(name, source, description, skillContent, modeSlugs) + } + + /** + * Create a new skill from fully prepared SKILL.md content. + */ + async createSkillFromContent( + name: string, + source: "global" | "project", + description: string, + content: string, + modeSlugs?: string[], ): Promise { // Validate skill name const validation = this.validateSkillName(name) @@ -363,6 +471,11 @@ export class SkillsManager { throw new Error(t("skills:errors.description_length", { length: trimmedDescription.length })) } + if (!content.trim()) { + throw new Error(t("skills:errors.description_length", { length: 0 })) + } + this.validateSkillDocumentStructure(name, content, trimmedDescription) + // Determine base directory let baseDir: string if (source === "global") { @@ -375,52 +488,48 @@ export class SkillsManager { baseDir = path.join(provider.cwd, ".roo") } - // Always use the generic skills directory (mode info stored in frontmatter now) const skillsDir = path.join(baseDir, "skills") const skillDir = path.join(skillsDir, name) const skillMdPath = path.join(skillDir, "SKILL.md") - // Check if skill already exists if (await fileExists(skillMdPath)) { throw new Error(t("skills:errors.already_exists", { name, path: skillMdPath })) } - // Create the skill directory await fs.mkdir(skillDir, { recursive: true }) + await fs.writeFile(skillMdPath, content, "utf-8") + await this.discoverSkills() + await this.postSkillsUpdatedMessage(`Skill created: "${name}"`) - // Generate SKILL.md content with frontmatter - const titleName = name - .split("-") - .map((word) => word.charAt(0).toUpperCase() + word.slice(1)) - .join(" ") + return skillMdPath + } - // Build frontmatter with optional modeSlugs - const frontmatterLines = [`name: ${name}`, `description: ${trimmedDescription}`] - if (modeSlugs && modeSlugs.length > 0) { - frontmatterLines.push(`modeSlugs:`) - for (const slug of modeSlugs) { - frontmatterLines.push(` - ${slug}`) - } + /** + * Update the full SKILL.md content for an existing skill. + */ + async updateSkillContent( + name: string, + source: "global" | "project", + content: string, + mode?: string, + ): Promise { + const skill = mode + ? Array.from(this.skills.values()).find( + (candidate) => + candidate.name === name && + candidate.source === source && + (candidate.mode === mode || candidate.modeSlugs?.includes(mode)), + ) + : this.findSkillByNameAndSource(name, source) + if (!skill) { + const modeInfo = mode ? ` (mode: ${mode})` : "" + throw new Error(t("skills:errors.not_found", { name, source, modeInfo })) } + this.validateSkillDocumentStructure(name, content) - const skillContent = `--- -${frontmatterLines.join("\n")} ---- - -# ${titleName} - -## Instructions - -Add your skill instructions here. -` - - // Write the SKILL.md file - await fs.writeFile(skillMdPath, skillContent, "utf-8") - - // Refresh skills list + await fs.writeFile(skill.path, content, "utf-8") await this.discoverSkills() - - return skillMdPath + await this.postSkillsUpdatedMessage(`Skill updated: "${name}"`) } /** @@ -445,6 +554,7 @@ Add your skill instructions here. // Refresh skills list await this.discoverSkills() + await this.postSkillsUpdatedMessage(`Skill deleted: "${name}"`) } /** @@ -516,6 +626,7 @@ Add your skill instructions here. // Refresh skills list await this.discoverSkills() + await this.postSkillsUpdatedMessage(`Skill updated: "${name}"`) } /** @@ -559,6 +670,7 @@ Add your skill instructions here. // Refresh skills list await this.discoverSkills() + await this.postSkillsUpdatedMessage(`Skill updated: "${name}"`) } /** @@ -694,17 +806,23 @@ Add your skill instructions here. watcher.onDidChange(async (uri) => { if (this.isDisposed) return + const skillName = this.getSkillNameFromUri(uri) await this.discoverSkills() + await this.postSkillsUpdatedMessage(`Skill updated: "${skillName}"`) }) watcher.onDidCreate(async (uri) => { if (this.isDisposed) return + const skillName = this.getSkillNameFromUri(uri) await this.discoverSkills() + await this.postSkillsUpdatedMessage(`Skill created: "${skillName}"`) }) watcher.onDidDelete(async (uri) => { if (this.isDisposed) return + const skillName = this.getSkillNameFromUri(uri) await this.discoverSkills() + await this.postSkillsUpdatedMessage(`Skill deleted: "${skillName}"`) }) this.disposables.push(watcher) diff --git a/src/services/skills/__tests__/SkillsManager.spec.ts b/src/services/skills/__tests__/SkillsManager.spec.ts index d36582d893..678267277d 100644 --- a/src/services/skills/__tests__/SkillsManager.spec.ts +++ b/src/services/skills/__tests__/SkillsManager.spec.ts @@ -101,6 +101,7 @@ vi.mock("../../../i18n", () => ({ "skills:errors.name_format": "Skill name must be lowercase letters/numbers/hyphens only (no leading/trailing hyphen, no consecutive hyphens)", "skills:errors.description_length": `Skill description must be 1-1024 characters (got ${params?.length})`, + "skills:errors.invalid_structure": `Invalid SKILL.md structure: ${params?.reason}`, "skills:errors.no_workspace": "Cannot create project skill: no workspace folder is open", "skills:errors.already_exists": `Skill "${params?.name}" already exists at ${params?.path}`, "skills:errors.not_found": `Skill "${params?.name}" not found in ${params?.source}${params?.modeInfo}`, @@ -115,6 +116,7 @@ import { ClineProvider } from "../../../core/webview/ClineProvider" describe("SkillsManager", () => { let skillsManager: SkillsManager let mockProvider: Partial + let mockPostMessageToWebview: ReturnType // Pre-computed paths for tests const globalSkillsDir = p(GLOBAL_ROO_DIR, "skills") @@ -131,10 +133,12 @@ describe("SkillsManager", () => { beforeEach(() => { vi.clearAllMocks() mockHomedir.mockReturnValue(HOME_DIR) + mockPostMessageToWebview = vi.fn() // Create mock provider mockProvider = { cwd: PROJECT_DIR, + postMessageToWebview: mockPostMessageToWebview, customModesManager: { getCustomModes: vi.fn().mockResolvedValue([]), } as any, @@ -1297,6 +1301,212 @@ Instructions`) "already exists", ) }) + + it("should create a skill from provided full content", async () => { + mockDirectoryExists.mockResolvedValue(false) + mockRealpath.mockImplementation(async (p: string) => p) + mockReaddir.mockResolvedValue([]) + mockFileExists.mockResolvedValue(false) + mockMkdir.mockResolvedValue(undefined) + mockWriteFile.mockResolvedValue(undefined) + + const content = `---\nname: workflow-read-file-search-files\ndescription: Use when read_file and search_files succeed repeatedly.\n---\n\n# Workflow\n\nUse it.` + + const createdPath = await skillsManager.createSkillFromContent( + "workflow-read-file-search-files", + "project", + "Use when read_file and search_files succeed repeatedly.", + content, + ["code"], + ) + + expect(createdPath).toBe(p(PROJECT_DIR, ".roo", "skills", "workflow-read-file-search-files", "SKILL.md")) + expect(mockWriteFile).toHaveBeenCalledWith( + p(PROJECT_DIR, ".roo", "skills", "workflow-read-file-search-files", "SKILL.md"), + content, + "utf-8", + ) + }) + + it("rejects createSkillFromContent when SKILL.md frontmatter is missing required fields", async () => { + mockDirectoryExists.mockResolvedValue(false) + mockFileExists.mockResolvedValue(false) + + await expect( + skillsManager.createSkillFromContent( + "workflow-read-file-search-files", + "project", + "Use when read_file and search_files succeed repeatedly.", + "# Workflow\n\nUse it.", + ["code"], + ), + ).rejects.toThrow("Invalid SKILL.md structure") + expect(mockWriteFile).not.toHaveBeenCalled() + }) + + it("should push a live skills created message to the webview after creating a skill", async () => { + const newSkillDir = p(globalSkillsDir, "new-skill") + const newSkillMd = p(newSkillDir, "SKILL.md") + let created = false + + mockDirectoryExists.mockImplementation(async (dir: string) => created && dir === globalSkillsDir) + mockRealpath.mockImplementation(async (p: string) => p) + mockReaddir.mockImplementation(async (dir: string) => + created && dir === globalSkillsDir ? ["new-skill"] : [], + ) + mockStat.mockImplementation(async (pathArg: string) => { + if (created && pathArg === newSkillDir) { + return { isDirectory: () => true } + } + throw new Error("Not found") + }) + mockFileExists.mockImplementation(async (file: string) => created && file === newSkillMd) + mockMkdir.mockResolvedValue(undefined) + mockWriteFile.mockImplementation(async () => { + created = true + }) + mockReadFile.mockImplementation(async (file: string) => { + if (created && file === newSkillMd) { + return `---\nname: new-skill\ndescription: A new skill description\n---\n\n# New Skill` + } + throw new Error("File not found") + }) + + await skillsManager.createSkill("new-skill", "global", "A new skill description") + + expect(mockPostMessageToWebview).toHaveBeenCalledWith( + expect.objectContaining({ + type: "skillsUpdated", + text: expect.stringContaining("new-skill"), + skills: expect.arrayContaining([ + expect.objectContaining({ + name: "new-skill", + source: "global", + }), + ]), + }), + ) + }) + }) + + describe("updateSkillContent", () => { + it("should update an existing skill with new content", async () => { + const testSkillDir = p(globalSkillsDir, "test-skill") + const testSkillMd = p(testSkillDir, "SKILL.md") + + mockDirectoryExists.mockImplementation(async (dir: string) => dir === globalSkillsDir) + mockRealpath.mockImplementation(async (pathArg: string) => pathArg) + mockReaddir.mockImplementation(async (dir: string) => (dir === globalSkillsDir ? ["test-skill"] : [])) + mockStat.mockImplementation(async (pathArg: string) => { + if (pathArg === testSkillDir) { + return { isDirectory: () => true } + } + throw new Error("Not found") + }) + mockFileExists.mockImplementation(async (file: string) => file === testSkillMd) + mockReadFile.mockResolvedValue(`---\nname: test-skill\ndescription: A test skill\n---\n\nOriginal content`) + mockWriteFile.mockResolvedValue(undefined) + + await skillsManager.discoverSkills() + + const updatedContent = `---\nname: test-skill\ndescription: Updated test skill\n---\n\nUpdated content` + + await expect( + skillsManager.updateSkillContent("test-skill", "global", updatedContent), + ).resolves.toBeUndefined() + expect(mockWriteFile).toHaveBeenCalledWith(testSkillMd, updatedContent, "utf-8") + }) + + it("rejects updateSkillContent when SKILL.md frontmatter becomes invalid", async () => { + const testSkillDir = p(globalSkillsDir, "test-skill") + const testSkillMd = p(testSkillDir, "SKILL.md") + + mockDirectoryExists.mockImplementation(async (dir: string) => dir === globalSkillsDir) + mockRealpath.mockImplementation(async (pathArg: string) => pathArg) + mockReaddir.mockImplementation(async (dir: string) => (dir === globalSkillsDir ? ["test-skill"] : [])) + mockStat.mockImplementation(async (pathArg: string) => { + if (pathArg === testSkillDir) { + return { isDirectory: () => true } + } + throw new Error("Not found") + }) + mockFileExists.mockImplementation(async (file: string) => file === testSkillMd) + mockReadFile.mockResolvedValue(`---\nname: test-skill\ndescription: A test skill\n---\n\nOriginal content`) + mockWriteFile.mockResolvedValue(undefined) + + await skillsManager.discoverSkills() + + await expect(skillsManager.updateSkillContent("test-skill", "global", "# Broken content")).rejects.toThrow( + "Invalid SKILL.md structure", + ) + expect(mockWriteFile).not.toHaveBeenCalled() + }) + + it("updates a multi-mode skill when addressed by a secondary mode slug", async () => { + const testSkillDir = p(globalSkillsDir, "test-skill") + const testSkillMd = p(testSkillDir, "SKILL.md") + + mockDirectoryExists.mockImplementation(async (dir: string) => dir === globalSkillsDir) + mockRealpath.mockImplementation(async (pathArg: string) => pathArg) + mockReaddir.mockImplementation(async (dir: string) => (dir === globalSkillsDir ? ["test-skill"] : [])) + mockStat.mockImplementation(async (pathArg: string) => { + if (pathArg === testSkillDir) { + return { isDirectory: () => true } + } + throw new Error("Not found") + }) + mockFileExists.mockImplementation(async (file: string) => file === testSkillMd) + mockReadFile.mockResolvedValue( + `---\nname: test-skill\ndescription: A test skill\nmodeSlugs:\n - code\n - architect\n---\n\nOriginal content`, + ) + mockWriteFile.mockResolvedValue(undefined) + + await skillsManager.discoverSkills() + + const updatedContent = `---\nname: test-skill\ndescription: Updated test skill\nmodeSlugs:\n - code\n - architect\n---\n\nUpdated content` + + await expect( + skillsManager.updateSkillContent("test-skill", "global", updatedContent, "architect"), + ).resolves.toBeUndefined() + expect(mockWriteFile).toHaveBeenCalledWith(testSkillMd, updatedContent, "utf-8") + }) + + it("should push a live skills updated message to the webview after updating a skill", async () => { + const testSkillDir = p(globalSkillsDir, "test-skill") + const testSkillMd = p(testSkillDir, "SKILL.md") + + mockDirectoryExists.mockImplementation(async (dir: string) => dir === globalSkillsDir) + mockRealpath.mockImplementation(async (pathArg: string) => pathArg) + mockReaddir.mockImplementation(async (dir: string) => (dir === globalSkillsDir ? ["test-skill"] : [])) + mockStat.mockImplementation(async (pathArg: string) => { + if (pathArg === testSkillDir) { + return { isDirectory: () => true } + } + throw new Error("Not found") + }) + mockFileExists.mockImplementation(async (file: string) => file === testSkillMd) + mockReadFile.mockResolvedValue(`---\nname: test-skill\ndescription: A test skill\n---\n\nOriginal content`) + mockWriteFile.mockResolvedValue(undefined) + + await skillsManager.discoverSkills() + + const updatedContent = `---\nname: test-skill\ndescription: Updated test skill\n---\n\nUpdated content` + + await skillsManager.updateSkillContent("test-skill", "global", updatedContent) + + expect(mockPostMessageToWebview).toHaveBeenCalledWith( + expect.objectContaining({ + type: "skillsUpdated", + text: expect.stringContaining("test-skill"), + skills: expect.arrayContaining([ + expect.objectContaining({ + name: "test-skill", + source: "global", + }), + ]), + }), + ) + }) }) describe("deleteSkill", () => { diff --git a/src/shared/__tests__/experiments-preventFocusDisruption.spec.ts b/src/shared/__tests__/experiments-preventFocusDisruption.spec.ts index e9f96c7ce7..e6e7c89bf0 100644 --- a/src/shared/__tests__/experiments-preventFocusDisruption.spec.ts +++ b/src/shared/__tests__/experiments-preventFocusDisruption.spec.ts @@ -7,11 +7,11 @@ describe("PREVENT_FOCUS_DISRUPTION experiment", () => { it("should have PREVENT_FOCUS_DISRUPTION in experimentConfigsMap", () => { expect(experimentConfigsMap.PREVENT_FOCUS_DISRUPTION).toBeDefined() - expect(experimentConfigsMap.PREVENT_FOCUS_DISRUPTION.enabled).toBe(false) + expect(experimentConfigsMap.PREVENT_FOCUS_DISRUPTION.enabled).toBe(true) }) it("should have PREVENT_FOCUS_DISRUPTION in experimentDefault", () => { - expect(experimentDefault.preventFocusDisruption).toBe(false) + expect(experimentDefault.preventFocusDisruption).toBe(true) }) it("should correctly check if PREVENT_FOCUS_DISRUPTION is enabled", () => { @@ -25,6 +25,6 @@ describe("PREVENT_FOCUS_DISRUPTION experiment", () => { // Test when experiment is not in config (should use default) const emptyConfig = {} - expect(experiments.isEnabled(emptyConfig, EXPERIMENT_IDS.PREVENT_FOCUS_DISRUPTION)).toBe(false) + expect(experiments.isEnabled(emptyConfig, EXPERIMENT_IDS.PREVENT_FOCUS_DISRUPTION)).toBe(true) }) }) diff --git a/src/shared/__tests__/experiments.spec.ts b/src/shared/__tests__/experiments.spec.ts index 92a7d7604f..c82696543c 100644 --- a/src/shared/__tests__/experiments.spec.ts +++ b/src/shared/__tests__/experiments.spec.ts @@ -9,7 +9,25 @@ describe("experiments", () => { it("is configured correctly", () => { expect(EXPERIMENT_IDS.PREVENT_FOCUS_DISRUPTION).toBe("preventFocusDisruption") expect(experimentConfigsMap.PREVENT_FOCUS_DISRUPTION).toMatchObject({ - enabled: false, + enabled: true, + }) + }) + }) + + describe("SELF_IMPROVING", () => { + it("is configured correctly", () => { + expect(EXPERIMENT_IDS.SELF_IMPROVING).toBe("selfImproving") + expect(experimentConfigsMap.SELF_IMPROVING).toMatchObject({ + enabled: true, + }) + }) + }) + + describe("SELF_IMPROVING_AUTO_SKILLS", () => { + it("is configured correctly", () => { + expect(EXPERIMENT_IDS.SELF_IMPROVING_AUTO_SKILLS).toBe("selfImprovingAutoSkills") + expect(experimentConfigsMap.SELF_IMPROVING_AUTO_SKILLS).toMatchObject({ + enabled: true, }) }) }) @@ -21,6 +39,27 @@ describe("experiments", () => { imageGeneration: false, runSlashCommand: false, customTools: false, + selfImproving: false, + selfImprovingAutoSkills: false, + selfImprovingAutoMode: false, + selfImprovingReviewTeam: false, + selfImprovingFullTrust: false, + selfImprovingQuestionEvaluation: false, + selfImprovingPromptQuality: false, + selfImprovingToolPreference: false, + selfImprovingSkillMerge: false, + selfImprovingPersistCounts: false, + selfImprovingCodeIndex: false, + verificationEngine: false, + requirementsVerification: false, + oneShotOrchestrator: false, + kaizenOrchestrator: false, + preventionEngine: false, + cascadeTracker: false, + resilienceService: false, + toolErrorHealer: false, + recoveryContext: false, + selfImprovingSpecializedSkills: false, } expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.PREVENT_FOCUS_DISRUPTION)).toBe(false) }) @@ -31,18 +70,60 @@ describe("experiments", () => { imageGeneration: false, runSlashCommand: false, customTools: false, + selfImproving: false, + selfImprovingAutoSkills: false, + selfImprovingAutoMode: false, + selfImprovingReviewTeam: false, + selfImprovingFullTrust: false, + selfImprovingQuestionEvaluation: false, + selfImprovingPromptQuality: false, + selfImprovingToolPreference: false, + selfImprovingSkillMerge: false, + selfImprovingPersistCounts: false, + selfImprovingCodeIndex: false, + verificationEngine: false, + requirementsVerification: false, + oneShotOrchestrator: false, + kaizenOrchestrator: false, + preventionEngine: false, + cascadeTracker: false, + resilienceService: false, + toolErrorHealer: false, + recoveryContext: false, + selfImprovingSpecializedSkills: false, } expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.PREVENT_FOCUS_DISRUPTION)).toBe(true) }) - it("returns false when experiment is not present", () => { + it("returns false when experiment is not in the map", () => { const experiments: Record = { preventFocusDisruption: false, imageGeneration: false, runSlashCommand: false, customTools: false, + selfImproving: false, + selfImprovingAutoSkills: false, + selfImprovingAutoMode: false, + selfImprovingReviewTeam: false, + selfImprovingFullTrust: false, + selfImprovingQuestionEvaluation: false, + selfImprovingPromptQuality: false, + selfImprovingToolPreference: false, + selfImprovingSkillMerge: false, + selfImprovingPersistCounts: false, + selfImprovingCodeIndex: false, + verificationEngine: false, + requirementsVerification: false, + oneShotOrchestrator: false, + kaizenOrchestrator: false, + preventionEngine: false, + cascadeTracker: false, + resilienceService: false, + toolErrorHealer: false, + recoveryContext: false, + selfImprovingSpecializedSkills: false, } - expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.PREVENT_FOCUS_DISRUPTION)).toBe(false) + expect(Experiments.isEnabled(experiments, "nonExistentExperiment" as ExperimentId)).toBeUndefined() }) }) }) diff --git a/src/shared/experiments.ts b/src/shared/experiments.ts index e189f99e23..3337c41c92 100644 --- a/src/shared/experiments.ts +++ b/src/shared/experiments.ts @@ -5,6 +5,27 @@ export const EXPERIMENT_IDS = { IMAGE_GENERATION: "imageGeneration", RUN_SLASH_COMMAND: "runSlashCommand", CUSTOM_TOOLS: "customTools", + SELF_IMPROVING: "selfImproving", + SELF_IMPROVING_AUTO_SKILLS: "selfImprovingAutoSkills", + SELF_IMPROVING_AUTO_MODE: "selfImprovingAutoMode", + SELF_IMPROVING_REVIEW_TEAM: "selfImprovingReviewTeam", + SELF_IMPROVING_FULL_TRUST: "selfImprovingFullTrust", + SELF_IMPROVING_QUESTION_EVALUATION: "selfImprovingQuestionEvaluation", + SELF_IMPROVING_PROMPT_QUALITY: "selfImprovingPromptQuality", + SELF_IMPROVING_TOOL_PREFERENCE: "selfImprovingToolPreference", + SELF_IMPROVING_SKILL_MERGE: "selfImprovingSkillMerge", + SELF_IMPROVING_PERSIST_COUNTS: "selfImprovingPersistCounts", + SELF_IMPROVING_CODE_INDEX: "selfImprovingCodeIndex", + ONE_SHOT_ORCHESTRATOR: "oneShotOrchestrator", + KAIZEN_ORCHESTRATOR: "kaizenOrchestrator", + PREVENTION_ENGINE: "preventionEngine", + CASCADE_TRACKER: "cascadeTracker", + RESILIENCE_SERVICE: "resilienceService", + TOOL_ERROR_HEALER: "toolErrorHealer", + VERIFICATION_ENGINE: "verificationEngine", + REQUIREMENTS_VERIFICATION: "requirementsVerification", + RECOVERY_CONTEXT: "recoveryContext", + SELF_IMPROVING_SPECIALIZED_SKILLS: "selfImprovingSpecializedSkills", } as const satisfies Record type _AssertExperimentIds = AssertEqual>> @@ -16,10 +37,31 @@ interface ExperimentConfig { } export const experimentConfigsMap: Record = { - PREVENT_FOCUS_DISRUPTION: { enabled: false }, - IMAGE_GENERATION: { enabled: false }, - RUN_SLASH_COMMAND: { enabled: false }, - CUSTOM_TOOLS: { enabled: false }, + PREVENT_FOCUS_DISRUPTION: { enabled: true }, + IMAGE_GENERATION: { enabled: true }, + RUN_SLASH_COMMAND: { enabled: true }, + CUSTOM_TOOLS: { enabled: true }, + SELF_IMPROVING: { enabled: true }, + SELF_IMPROVING_AUTO_SKILLS: { enabled: true }, + SELF_IMPROVING_AUTO_MODE: { enabled: true }, + SELF_IMPROVING_REVIEW_TEAM: { enabled: true }, + SELF_IMPROVING_FULL_TRUST: { enabled: true }, + SELF_IMPROVING_QUESTION_EVALUATION: { enabled: true }, + SELF_IMPROVING_PROMPT_QUALITY: { enabled: true }, + SELF_IMPROVING_TOOL_PREFERENCE: { enabled: true }, + SELF_IMPROVING_SKILL_MERGE: { enabled: true }, + SELF_IMPROVING_PERSIST_COUNTS: { enabled: true }, + SELF_IMPROVING_CODE_INDEX: { enabled: true }, + ONE_SHOT_ORCHESTRATOR: { enabled: true }, + KAIZEN_ORCHESTRATOR: { enabled: true }, + PREVENTION_ENGINE: { enabled: true }, + CASCADE_TRACKER: { enabled: true }, + RESILIENCE_SERVICE: { enabled: true }, + TOOL_ERROR_HEALER: { enabled: true }, + VERIFICATION_ENGINE: { enabled: true }, + REQUIREMENTS_VERIFICATION: { enabled: true }, + RECOVERY_CONTEXT: { enabled: true }, + SELF_IMPROVING_SPECIALIZED_SKILLS: { enabled: true }, } export const experimentDefault = Object.fromEntries( diff --git a/src/skills/react-component-builder/scripts/validate.sh b/src/skills/react-component-builder/scripts/validate.sh new file mode 100644 index 0000000000..e63e3123ae --- /dev/null +++ b/src/skills/react-component-builder/scripts/validate.sh @@ -0,0 +1,2 @@ +#!/bin/bash +echo validate \ No newline at end of file diff --git a/src/utils/git.ts b/src/utils/git.ts index 04c028c3d1..21819a6cc6 100644 --- a/src/utils/git.ts +++ b/src/utils/git.ts @@ -396,3 +396,65 @@ export async function getGitStatus(cwd: string, maxFiles: number = 20): Promise< return null } } + +/** + * Gets the current branch name from the git repository + * @param cwd The working directory of the git repository + * @returns The current branch name + */ +export async function getCurrentBranch(cwd: string): Promise { + const { stdout } = await execAsync("git rev-parse --abbrev-ref HEAD", { cwd }) + return stdout.trim() +} + +/** + * Stages all changes in the working directory + * @param cwd The working directory of the git repository + * @returns The git command output + */ +export async function gitAddAll(cwd: string): Promise { + const { stdout } = await execAsync("git add .", { cwd }) + return stdout.trim() +} + +/** + * Creates a git commit with the given message + * @param cwd The working directory of the git repository + * @param message The commit message + * @returns The git command output + */ +export async function gitCommit(cwd: string, message: string): Promise { + const { stdout } = await execAsync(`git commit -m "${message.replace(/"/g, '\\"')}"`, { cwd }) + return stdout.trim() +} + +/** + * Pushes changes to a remote repository + * @param cwd The working directory of the git repository + * @param remote The remote name (default: "origin") + * @param branch The branch to push (default: auto-detected from current branch) + * @returns The git command output + */ +export async function gitPush(cwd: string, remote: string = "origin", branch?: string): Promise { + const actualBranch = branch || (await getCurrentBranch(cwd)) + const { stdout } = await execAsync(`git push ${remote} ${actualBranch}`, { cwd }) + return stdout.trim() +} + +/** + * Performs the full git add, commit, push cycle in one call + * @param cwd The working directory of the git repository + * @param message The commit message + * @param remote The remote name (default: "origin") + * @returns Object containing outputs from each step + */ +export async function gitAddCommitPush( + cwd: string, + message: string, + remote: string = "origin", +): Promise<{ add: string; commit: string; push: string }> { + const add = await gitAddAll(cwd) + const commit = await gitCommit(cwd, message) + const push = await gitPush(cwd, remote) + return { add, commit, push } +} diff --git a/webview-ui/package.json b/webview-ui/package.json index 047ac73ad2..a632cc8c36 100644 --- a/webview-ui/package.json +++ b/webview-ui/package.json @@ -106,6 +106,7 @@ "identity-obj-proxy": "^3.0.0", "jsdom": "^26.0.0", "vite": "^8.0.13", + "vite-tsconfig-paths": "^6.1.1", "vitest": "^3.2.3" } } diff --git a/webview-ui/src/components/chat/ModeSelector.tsx b/webview-ui/src/components/chat/ModeSelector.tsx index b436d92225..58574e50cb 100644 --- a/webview-ui/src/components/chat/ModeSelector.tsx +++ b/webview-ui/src/components/chat/ModeSelector.tsx @@ -18,6 +18,27 @@ import { IconButton } from "./IconButton" const SEARCH_THRESHOLD = 6 +/** + * Checks whether ALL self-improving experiment flags are enabled. + * ONE-SHOT and KAIZEN orchestrator modes require every self-improving + * sub-feature to be active before they become available in the mode selector. + */ +const areAllSelfImprovingEnabled = (experiments: Record): boolean => { + const requiredFlags = [ + "selfImprovingPromptQuality", + "selfImprovingToolPreference", + "selfImprovingSkillMerge", + "selfImprovingPersistCounts", + "selfImprovingCodeIndex", + "selfImprovingReviewTeam", + "selfImprovingQuestionEvaluation", + "selfImprovingAutoSkills", + "selfImprovingAutoMode", + "selfImprovingFullTrust", + ] as const + return requiredFlags.every((flag) => experiments[flag] === true) +} + interface ModeSelectorProps { value: Mode onChange: (value: Mode) => void @@ -48,7 +69,7 @@ export const ModeSelector = ({ const scrollContainerRef = React.useRef(null) const lastNotifiedInvalidModeRef = React.useRef(null) const portalContainer = useRooPortal("roo-portal") - const { hasOpenedModeSelector, setHasOpenedModeSelector } = useExtensionState() + const { hasOpenedModeSelector, setHasOpenedModeSelector, experiments } = useExtensionState() const { t } = useAppTranslation() const trackModeSelectorOpened = React.useCallback(() => { @@ -63,14 +84,32 @@ export const ModeSelector = ({ }, [hasOpenedModeSelector, setHasOpenedModeSelector]) // Get all modes including custom modes and merge custom prompt descriptions. + // ONE-SHOT and KAIZEN orchestrator modes are gated behind ALL self-improving + // experiment flags being enabled + their own experiment flag. const modes = React.useMemo(() => { const allModes = getAllModes(customModes) - return allModes.map((mode) => ({ - ...mode, - description: customModePrompts?.[mode.slug]?.description ?? mode.description, - })) - }, [customModes, customModePrompts]) + return allModes + .map((mode) => ({ + ...mode, + description: customModePrompts?.[mode.slug]?.description ?? mode.description, + })) + .filter((mode) => { + if (mode.slug === "one-shot-orchestrator") { + return ( + experiments?.oneShotOrchestrator === true && + areAllSelfImprovingEnabled(experiments ?? {}) + ) + } + if (mode.slug === "kaizen-orchestrator") { + return ( + experiments?.kaizenOrchestrator === true && + areAllSelfImprovingEnabled(experiments ?? {}) + ) + } + return true // always show other modes + }) + }, [customModes, customModePrompts, experiments]) // Find the selected mode, falling back to default if current mode doesn't exist (e.g., after workspace switch) const selectedMode = React.useMemo(() => { diff --git a/webview-ui/src/components/settings/ExperimentalFeature.tsx b/webview-ui/src/components/settings/ExperimentalFeature.tsx index a96a00a426..8c427308a5 100644 --- a/webview-ui/src/components/settings/ExperimentalFeature.tsx +++ b/webview-ui/src/components/settings/ExperimentalFeature.tsx @@ -6,9 +6,10 @@ interface ExperimentalFeatureProps { onChange: (value: boolean) => void // Additional property to identify the experiment experimentKey?: string + checkboxTestId?: string } -export const ExperimentalFeature = ({ enabled, onChange, experimentKey }: ExperimentalFeatureProps) => { +export const ExperimentalFeature = ({ enabled, onChange, experimentKey, checkboxTestId }: ExperimentalFeatureProps) => { const { t } = useAppTranslation() // Generate translation keys based on experiment key @@ -18,7 +19,10 @@ export const ExperimentalFeature = ({ enabled, onChange, experimentKey }: Experi return (
- onChange(e.target.checked)}> + onChange(e.target.checked)} + data-testid={checkboxTestId}> {t(nameKey)}
diff --git a/webview-ui/src/components/settings/ExperimentalSettings.tsx b/webview-ui/src/components/settings/ExperimentalSettings.tsx index 23786ce0b9..0705edff20 100644 --- a/webview-ui/src/components/settings/ExperimentalSettings.tsx +++ b/webview-ui/src/components/settings/ExperimentalSettings.tsx @@ -5,6 +5,7 @@ import type { Experiments, ImageGenerationProvider } from "@roo-code/types" import { EXPERIMENT_IDS, experimentConfigsMap } from "@roo/experiments" import { useAppTranslation } from "@src/i18n/TranslationContext" +import { Input, Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui" import { cn } from "@src/lib/utils" import { SetExperimentEnabled } from "./types" @@ -14,6 +15,7 @@ import { SearchableSetting } from "./SearchableSetting" import { ExperimentalFeature } from "./ExperimentalFeature" import { ImageGenerationSettings } from "./ImageGenerationSettings" import { CustomToolsSettings } from "./CustomToolsSettings" +import { SelfImprovingStatus } from "./SelfImprovingStatus" type ExperimentalSettingsProps = HTMLAttributes & { experiments: Experiments @@ -23,9 +25,17 @@ type ExperimentalSettingsProps = HTMLAttributes & { imageGenerationProvider?: ImageGenerationProvider openRouterImageApiKey?: string openRouterImageGenerationSelectedModel?: string + memoryBackend?: "builtin" | "agentmemory" + agentMemoryUrl?: string + selfImprovingScope?: "workspace" | "global" + selfImprovingAutoSkillsScope?: "workspace" | "global" setImageGenerationProvider?: (provider: ImageGenerationProvider) => void setOpenRouterImageApiKey?: (apiKey: string) => void setImageGenerationSelectedModel?: (model: string) => void + setMemoryBackend?: (backend: "builtin" | "agentmemory") => void + setAgentMemoryUrl?: (url: string) => void + setSelfImprovingScope?: (scope: "workspace" | "global") => void + setSelfImprovingAutoSkillsScope?: (scope: "workspace" | "global") => void } export const ExperimentalSettings = ({ @@ -36,13 +46,35 @@ export const ExperimentalSettings = ({ imageGenerationProvider, openRouterImageApiKey, openRouterImageGenerationSelectedModel, + memoryBackend, + agentMemoryUrl, + selfImprovingScope, + selfImprovingAutoSkillsScope, setImageGenerationProvider, setOpenRouterImageApiKey, setImageGenerationSelectedModel, + setMemoryBackend, + setAgentMemoryUrl, + setSelfImprovingScope, + setSelfImprovingAutoSkillsScope, className, ...props }: ExperimentalSettingsProps) => { const { t } = useAppTranslation() + const autoSkillsVisible = experiments[EXPERIMENT_IDS.SELF_IMPROVING] ?? false + const autoSkillsEnabled = experiments[EXPERIMENT_IDS.SELF_IMPROVING_AUTO_SKILLS] ?? false + const autoModeEnabled = experiments[EXPERIMENT_IDS.SELF_IMPROVING_AUTO_MODE] ?? false + const reviewTeamEnabled = experiments[EXPERIMENT_IDS.SELF_IMPROVING_REVIEW_TEAM] ?? false + const fullTrustEnabled = experiments[EXPERIMENT_IDS.SELF_IMPROVING_FULL_TRUST] ?? false + const questionEvaluationEnabled = experiments[EXPERIMENT_IDS.SELF_IMPROVING_QUESTION_EVALUATION] ?? false + const promptQualityEnabled = experiments[EXPERIMENT_IDS.SELF_IMPROVING_PROMPT_QUALITY] ?? false + const toolPreferenceEnabled = experiments[EXPERIMENT_IDS.SELF_IMPROVING_TOOL_PREFERENCE] ?? false + const skillMergeEnabled = experiments[EXPERIMENT_IDS.SELF_IMPROVING_SKILL_MERGE] ?? false + const persistCountsEnabled = experiments[EXPERIMENT_IDS.SELF_IMPROVING_PERSIST_COUNTS] ?? false + const codeIndexEnabled = experiments[EXPERIMENT_IDS.SELF_IMPROVING_CODE_INDEX] ?? false + const currentMemoryBackend = memoryBackend ?? "builtin" + const currentSelfImprovingScope = selfImprovingScope ?? "global" + const currentAutoSkillsScope = selfImprovingAutoSkillsScope ?? "workspace" return (
@@ -50,9 +82,29 @@ export const ExperimentalSettings = ({
{Object.entries(experimentConfigsMap) - .filter(([key]) => key in EXPERIMENT_IDS) + .filter( + ([key]) => + key in EXPERIMENT_IDS && + key !== "SELF_IMPROVING_AUTO_SKILLS" && + key !== "SELF_IMPROVING_AUTO_MODE" && + key !== "SELF_IMPROVING_REVIEW_TEAM" && + key !== "SELF_IMPROVING_FULL_TRUST" && + key !== "SELF_IMPROVING_QUESTION_EVALUATION" && + key !== "SELF_IMPROVING_PROMPT_QUALITY" && + key !== "SELF_IMPROVING_TOOL_PREFERENCE" && + key !== "SELF_IMPROVING_SKILL_MERGE" && + key !== "SELF_IMPROVING_PERSIST_COUNTS" && + key !== "SELF_IMPROVING_CODE_INDEX" && + key !== "ONE_SHOT_ORCHESTRATOR" && + key !== "KAIZEN_ORCHESTRATOR" && + key !== "PREVENTION_ENGINE" && + key !== "CASCADE_TRACKER" && + key !== "RESILIENCE_SERVICE" && + key !== "TOOL_ERROR_HEALER" && + key !== "VERIFICATION_ENGINE" && + key !== "REQUIREMENTS_VERIFICATION", + ) .map((config) => { - // Use the same translation key pattern as ExperimentalFeature const experimentKey = config[0] const label = t(`settings:experimental.${experimentKey}.name`) @@ -99,6 +151,381 @@ export const ExperimentalSettings = ({ ) } + if (config[0] === "SELF_IMPROVING") { + return ( + +
+ + setExperimentEnabled(EXPERIMENT_IDS.SELF_IMPROVING, enabled) + } + checkboxTestId="experimental-self-improving-checkbox" + /> + {autoSkillsVisible && ( +
+ {setSelfImprovingScope && ( +
+ + +
+ )} + + setExperimentEnabled( + EXPERIMENT_IDS.SELF_IMPROVING_AUTO_SKILLS, + enabled, + ) + } + checkboxTestId="experimental-self-improving-auto-skills-checkbox" + /> + {autoSkillsEnabled && setSelfImprovingAutoSkillsScope && ( +
+ + +
+ )} + {setMemoryBackend && ( +
+ + +
+ )} + {currentMemoryBackend === "agentmemory" && setAgentMemoryUrl && ( +
+ + setAgentMemoryUrl(event.target.value)} + placeholder="http://localhost:3111" + data-testid="self-improving-agent-memory-url-input" + /> +
+ )} + + setExperimentEnabled( + EXPERIMENT_IDS.SELF_IMPROVING_AUTO_MODE, + enabled, + ) + } + checkboxTestId="experimental-self-improving-auto-mode-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.SELF_IMPROVING_REVIEW_TEAM, + enabled, + ) + } + checkboxTestId="experimental-self-improving-review-team-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.SELF_IMPROVING_FULL_TRUST, + enabled, + ) + } + checkboxTestId="experimental-self-improving-full-trust-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.SELF_IMPROVING_QUESTION_EVALUATION, + enabled, + ) + } + checkboxTestId="experimental-self-improving-question-evaluation-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.SELF_IMPROVING_PROMPT_QUALITY, + enabled, + ) + } + checkboxTestId="experimental-self-improving-prompt-quality-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.SELF_IMPROVING_TOOL_PREFERENCE, + enabled, + ) + } + checkboxTestId="experimental-self-improving-tool-preference-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.SELF_IMPROVING_SKILL_MERGE, + enabled, + ) + } + checkboxTestId="experimental-self-improving-skill-merge-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.SELF_IMPROVING_PERSIST_COUNTS, + enabled, + ) + } + checkboxTestId="experimental-self-improving-persist-counts-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.SELF_IMPROVING_CODE_INDEX, + enabled, + ) + } + checkboxTestId="experimental-self-improving-code-index-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.PREVENTION_ENGINE, + enabled, + ) + } + checkboxTestId="experimental-prevention-engine-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.CASCADE_TRACKER, + enabled, + ) + } + checkboxTestId="experimental-cascade-tracker-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.RESILIENCE_SERVICE, + enabled, + ) + } + checkboxTestId="experimental-resilience-service-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.TOOL_ERROR_HEALER, + enabled, + ) + } + checkboxTestId="experimental-tool-error-healer-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.VERIFICATION_ENGINE, + enabled, + ) + } + checkboxTestId="experimental-verification-engine-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.REQUIREMENTS_VERIFICATION, + enabled, + ) + } + checkboxTestId="experimental-requirements-verification-checkbox" + /> + + + setExperimentEnabled( + EXPERIMENT_IDS.ONE_SHOT_ORCHESTRATOR, + enabled, + ) + } + checkboxTestId="experimental-one-shot-orchestrator-checkbox" + /> + + setExperimentEnabled( + EXPERIMENT_IDS.KAIZEN_ORCHESTRATOR, + enabled, + ) + } + checkboxTestId="experimental-kaizen-orchestrator-checkbox" + /> +
+ )} +
+
+ ) + } return ( { + const { t } = useAppTranslation() + const { selfImprovingStatus, experiments } = useExtensionState() + + const isExperimentEnabled = experiments?.selfImproving ?? false + const status = selfImprovingStatus + + if (!isExperimentEnabled) { + return null + } + + if (!status) { + return ( +
+

+ {t("settings:experimental.SELF_IMPROVING.statusLoading", { defaultValue: "Loading status..." })} +

+
+ ) + } + + return ( +
+

+ {t("settings:experimental.SELF_IMPROVING.statusTitle", { defaultValue: "Self-Improving Status" })} +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {status.lastReviewAt && ( + + + + + )} + {status.lastCuratorRunAt && ( + + + + + )} + {/* Auto Mode sub-status */} + {status.autoMode && ( + + + + + )} + {/* Review Team sub-status */} + {status.reviewTeam && ( + + + + + )} + {/* Question Evaluator sub-status */} + {status.questionEvaluator && ( + + + + + )} + {/* Resilience sub-status */} + {status.resilience && ( + + + + + )} + {/* Tool Error Healer sub-status */} + {status.toolErrorHealer && ( + + + + + )} + {/* Prevention Engine sub-status */} + {status.preventionEngine && ( + + + + + )} + {/* Verification Engine sub-status */} + {status.verificationEngine && ( + + + + + )} + {/* Requirements Verification sub-status */} + {status.requirementsVerification && ( + + + + + )} + +
+ {t("settings:experimental.SELF_IMPROVING.statusEnabled", { defaultValue: "Enabled" })} + {status.enabled ? "Yes" : "No"}
+ {t("settings:experimental.SELF_IMPROVING.statusStarted", { defaultValue: "Started" })} + {status.started ? "Yes" : "No"}
+ {t("settings:experimental.SELF_IMPROVING.statusPatterns", { defaultValue: "Patterns" })} + {status.patternCount}
+ {t("settings:experimental.SELF_IMPROVING.statusEvents", { defaultValue: "Events" })} + {status.eventCount}
+ {t("settings:experimental.SELF_IMPROVING.statusActions", { defaultValue: "Pending Actions" })} + {status.actionCount}
+ {t("settings:experimental.SELF_IMPROVING.statusMemory", { defaultValue: "Memory Entries" })} + {status.memoryEntries}
+ {t("settings:experimental.SELF_IMPROVING.statusSkills", { defaultValue: "Skill Records" })} + {status.skillRecords}
+ {t("settings:experimental.SELF_IMPROVING.statusBackend", { defaultValue: "Memory Backend" })} + {status.memoryBackend ?? "builtin"}
+ {t("settings:experimental.SELF_IMPROVING.statusLastReview", { + defaultValue: "Last Review", + })} + + {new Date(status.lastReviewAt).toLocaleTimeString()} +
+ {t("settings:experimental.SELF_IMPROVING.statusLastCurator", { + defaultValue: "Last Curator Run", + })} + + {new Date(status.lastCuratorRunAt).toLocaleTimeString()} +
Auto Mode + {String(status.autoMode.status ?? status.autoMode.mode ?? "—")} +
Review Team + {status.reviewTeam.lastReview + ? new Date(status.reviewTeam.lastReview as number).toLocaleTimeString() + : status.reviewTeam.enabled + ? "Active" + : "Inactive"} +
Question Evaluator + {status.questionEvaluator.lastEvaluation + ? new Date(status.questionEvaluator.lastEvaluation as number).toLocaleTimeString() + : status.questionEvaluator.enabled + ? "Active" + : "Inactive"} +
Resilience + {status.resilience.isInRecoveryMode + ? `Recovery (${status.resilience.consecutiveFailures ?? 0} failures)` + : `OK (${status.resilience.consecutiveFailures ?? 0} failures)`} +
Tool Error Healer + {status.toolErrorHealer.learnedCorrections != null + ? `${status.toolErrorHealer.learnedCorrections} corrections` + : "Active"} +
Prevention Engine + {status.preventionEngine.cascadeCount != null + ? `${status.preventionEngine.cascadeCount} cascades` + : status.preventionEngine.lastPrevention + ? `Last: ${new Date(status.preventionEngine.lastPrevention as number).toLocaleTimeString()}` + : "Active"} +
Verification Engine + {status.verificationEngine.lastVerifyAt + ? new Date(status.verificationEngine.lastVerifyAt as number).toLocaleTimeString() + : status.verificationEngine.enabled + ? "Active" + : "Inactive"} +
Requirements Verification + {status.requirementsVerification.activeCount !== undefined + ? `${status.requirementsVerification.activeCount} active` + : status.requirementsVerification.enabled + ? "Active" + : "Inactive"} +
+
+ ) +} diff --git a/webview-ui/src/components/settings/SettingsView.tsx b/webview-ui/src/components/settings/SettingsView.tsx index 47e087615e..0877a3c786 100644 --- a/webview-ui/src/components/settings/SettingsView.tsx +++ b/webview-ui/src/components/settings/SettingsView.tsx @@ -29,6 +29,7 @@ import { ArrowLeft, GitCommitVertical, GraduationCap, + RefreshCw, } from "lucide-react" import { @@ -58,6 +59,8 @@ import { TooltipProvider, TooltipTrigger, StandardTooltip, + Input, + ToggleSwitch, } from "@src/components/ui" import { Tab, TabContent, TabHeader, TabList, TabTrigger } from "../common/Tab" @@ -110,6 +113,7 @@ export const sectionNames = [ "prompts", "ui", "experimental", + "kaizen", "language", "about", ] as const @@ -198,11 +202,21 @@ const SettingsView = forwardRef(({ onDone, t imageGenerationProvider, openRouterImageApiKey, openRouterImageGenerationSelectedModel, + memoryBackend, + agentMemoryUrl, + selfImprovingScope, + selfImprovingAutoSkillsScope, reasoningBlockCollapsed, enterBehavior, includeCurrentTime, includeCurrentCost, maxGitStatusFiles, + kaizenFrequency, + kaizenMiniGoal, + kaizenLimit, + kaizenAutoPush, + kaizenRemoteName, + kaizenCommitTemplate, } = cachedState const apiConfiguration = useMemo(() => cachedState.apiConfiguration ?? {}, [cachedState.apiConfiguration]) @@ -227,6 +241,15 @@ const SettingsView = forwardRef(({ onDone, t } }, [settingsImportedAt, extensionState]) + // Re-sync cachedState when extensionState changes externally (e.g., CLI, another webview, + // extension restart) but only if user hasn't made local edits. This prevents stale UI + // when settings are changed from outside the settings view. + useEffect(() => { + if (!isChangeDetected && extensionState) { + setCachedState(extensionState) + } + }, [extensionState, isChangeDetected]) + const setCachedStateField: SetCachedStateField = useCallback((field, value) => { setCachedState((prevState) => { if (prevState[field] === value) { @@ -342,6 +365,50 @@ const SettingsView = forwardRef(({ onDone, t }) }, []) + const setMemoryBackend = useCallback((backend: "builtin" | "agentmemory") => { + setCachedState((prevState) => { + if (prevState.memoryBackend === backend) { + return prevState + } + + setChangeDetected(true) + return { ...prevState, memoryBackend: backend } + }) + }, []) + + const setAgentMemoryUrl = useCallback((url: string) => { + setCachedState((prevState) => { + if (prevState.agentMemoryUrl === url) { + return prevState + } + + setChangeDetected(true) + return { ...prevState, agentMemoryUrl: url } + }) + }, []) + + const setSelfImprovingScope = useCallback((scope: "workspace" | "global") => { + setCachedState((prevState) => { + if (prevState.selfImprovingScope === scope) { + return prevState + } + + setChangeDetected(true) + return { ...prevState, selfImprovingScope: scope } + }) + }, []) + + const setSelfImprovingAutoSkillsScope = useCallback((scope: "workspace" | "global") => { + setCachedState((prevState) => { + if (prevState.selfImprovingAutoSkillsScope === scope) { + return prevState + } + + setChangeDetected(true) + return { ...prevState, selfImprovingAutoSkillsScope: scope } + }) + }, []) + const setCustomSupportPromptsField = useCallback((prompts: Record) => { setCachedState((prevState) => { const previousStr = JSON.stringify(prevState.customSupportPrompts) @@ -420,8 +487,18 @@ const SettingsView = forwardRef(({ onDone, t imageGenerationProvider, openRouterImageApiKey, openRouterImageGenerationSelectedModel, + memoryBackend, + agentMemoryUrl: agentMemoryUrl || "http://localhost:3111", + selfImprovingScope: selfImprovingScope ?? "global", + selfImprovingAutoSkillsScope: selfImprovingAutoSkillsScope ?? "workspace", experiments, customSupportPrompts, + kaizenFrequency: kaizenFrequency ?? 5, + kaizenMiniGoal: kaizenMiniGoal ?? "", + kaizenLimit: kaizenLimit ?? 50, + kaizenAutoPush: kaizenAutoPush ?? true, + kaizenRemoteName: kaizenRemoteName ?? "origin", + kaizenCommitTemplate: kaizenCommitTemplate ?? "kaizen: {description}", }, }) @@ -522,6 +599,7 @@ const SettingsView = forwardRef(({ onDone, t { id: "worktrees", icon: GitBranch }, { id: "ui", icon: Glasses }, { id: "experimental", icon: FlaskConical }, + { id: "kaizen", icon: RefreshCw }, { id: "language", icon: Globe }, { id: "about", icon: Info }, ], @@ -904,16 +982,139 @@ const SettingsView = forwardRef(({ onDone, t apiConfiguration={apiConfiguration} setApiConfigurationField={setApiConfigurationField} imageGenerationProvider={imageGenerationProvider} - openRouterImageApiKey={openRouterImageApiKey as string | undefined} - openRouterImageGenerationSelectedModel={ - openRouterImageGenerationSelectedModel as string | undefined - } + openRouterImageApiKey={openRouterImageApiKey} + openRouterImageGenerationSelectedModel={openRouterImageGenerationSelectedModel} + memoryBackend={memoryBackend} + agentMemoryUrl={agentMemoryUrl} + selfImprovingScope={selfImprovingScope} + selfImprovingAutoSkillsScope={selfImprovingAutoSkillsScope} setImageGenerationProvider={setImageGenerationProvider} setOpenRouterImageApiKey={setOpenRouterImageApiKey} setImageGenerationSelectedModel={setImageGenerationSelectedModel} + setMemoryBackend={setMemoryBackend} + setAgentMemoryUrl={setAgentMemoryUrl} + setSelfImprovingScope={setSelfImprovingScope} + setSelfImprovingAutoSkillsScope={setSelfImprovingAutoSkillsScope} /> )} + {/* KAIZEN Section */} + {renderTab === "kaizen" && ( +
+ {t("settings:sections.kaizen")} +
+
+
+
+
KAIZEN Frequency
+
+ How often (in minutes) KAIZEN runs improvement cycles +
+
+ + setCachedStateField("kaizenFrequency", Number(e.target.value)) + } + className="w-20" + min={1} + max={100} + data-testid="kaizen-frequency-input" + /> +
+
+
+
KAIZEN Mini Goal
+
+ Optional mini-goal description for KAIZEN cycles +
+
+ + setCachedStateField("kaizenMiniGoal", e.target.value) + } + className="w-48" + placeholder="" + data-testid="kaizen-mini-goal-input" + /> +
+
+
+
KAIZEN Limit
+
+ Maximum number of improvement iterations per cycle +
+
+ + setCachedStateField("kaizenLimit", Number(e.target.value)) + } + className="w-20" + min={1} + max={1000} + data-testid="kaizen-limit-input" + /> +
+
+
+
Auto Push
+
+ Automatically push KAIZEN commits to remote +
+
+ + setCachedStateField("kaizenAutoPush", !(kaizenAutoPush ?? true)) + } + /> +
+
+
+
Remote Name
+
+ Git remote to push KAIZEN commits to +
+
+ + setCachedStateField("kaizenRemoteName", e.target.value) + } + className="w-32" + placeholder="origin" + data-testid="kaizen-remote-name-input" + /> +
+
+
+
Commit Template
+
+ Template for KAIZEN commit messages +
+
+ + setCachedStateField("kaizenCommitTemplate", e.target.value) + } + className="w-48" + placeholder="kaizen: {description}" + data-testid="kaizen-commit-template-input" + /> +
+
+
+
+ )} + {/* Language Section */} {renderTab === "language" && ( diff --git a/webview-ui/src/components/settings/SkillsSettings.tsx b/webview-ui/src/components/settings/SkillsSettings.tsx index bf81d0c6c7..cd56c87100 100644 --- a/webview-ui/src/components/settings/SkillsSettings.tsx +++ b/webview-ui/src/components/settings/SkillsSettings.tsx @@ -35,12 +35,13 @@ import { CreateSkillDialog } from "./CreateSkillDialog" export const SkillsSettings: React.FC = () => { const { t } = useAppTranslation() - const { cwd, skills: rawSkills, customModes } = useExtensionState() + const { cwd, skills: rawSkills, customModes, skillsUpdateNotice } = useExtensionState() const skills = useMemo(() => rawSkills ?? [], [rawSkills]) const [deleteDialogOpen, setDeleteDialogOpen] = useState(false) const [skillToDelete, setSkillToDelete] = useState(null) const [createDialogOpen, setCreateDialogOpen] = useState(false) + const [visibleSkillsUpdateNotice, setVisibleSkillsUpdateNotice] = useState() // Mode selection modal state const [modeDialogOpen, setModeDialogOpen] = useState(false) @@ -65,6 +66,20 @@ export const SkillsSettings: React.FC = () => { handleRefresh() }, [handleRefresh]) + useEffect(() => { + if (!skillsUpdateNotice) { + setVisibleSkillsUpdateNotice(undefined) + return + } + + setVisibleSkillsUpdateNotice(skillsUpdateNotice) + const timeoutId = window.setTimeout(() => { + setVisibleSkillsUpdateNotice(undefined) + }, 3500) + + return () => window.clearTimeout(timeoutId) + }, [skillsUpdateNotice]) + const handleDeleteClick = useCallback((skill: SkillMetadata) => { setSkillToDelete(skill) setDeleteDialogOpen(true) @@ -238,6 +253,15 @@ export const SkillsSettings: React.FC = () => { {t("settings:skills.addSkill")} + + {visibleSkillsUpdateNotice && ( +
+ {visibleSkillsUpdateNotice} +
+ )}
diff --git a/webview-ui/src/components/settings/__tests__/SettingsView.spec.tsx b/webview-ui/src/components/settings/__tests__/SettingsView.spec.tsx index 6fa34ebe81..5cb687da13 100644 --- a/webview-ui/src/components/settings/__tests__/SettingsView.spec.tsx +++ b/webview-ui/src/components/settings/__tests__/SettingsView.spec.tsx @@ -1,6 +1,7 @@ // pnpm --filter @roo-code/vscode-webview test src/components/settings/__tests__/SettingsView.spec.tsx -import { render, screen, fireEvent, within } from "@/utils/test-utils" +import { render, screen, fireEvent, within, waitFor } from "@/utils/test-utils" +import userEvent from "@testing-library/user-event" import { QueryClient, QueryClientProvider } from "@tanstack/react-query" import { vscode } from "@/utils/vscode" @@ -39,7 +40,11 @@ vi.mock("@vscode/webview-ui-toolkit/react", () => ({ onChange({ target: { checked: e.target.checked } })} + onClick={(e: any) => { + const newChecked = !checked + e.target.checked = newChecked + onChange({ target: { checked: newChecked } }) + }} aria-label={typeof children === "string" ? children : undefined} data-testid={dataTestId} /> @@ -70,6 +75,12 @@ vi.mock("@vscode/webview-ui-toolkit/react", () => ({ role="textbox" /> ), + VSCodeDropdown: ({ children, value, onChange }: any) => ( + + ), + VSCodeOption: ({ children, value }: any) => , })) vi.mock("../../../components/common/Tab", () => ({ @@ -173,21 +184,17 @@ vi.mock("@/components/ui", () => ({ Input: ({ value, onChange, placeholder, "data-testid": dataTestId }: any) => ( ), - Select: ({ children, value, onValueChange }: any) => ( -
- - {children} -
- ), - SelectContent: ({ children }: any) =>
{children}
, - SelectGroup: ({ children }: any) =>
{children}
, - SelectItem: ({ children, value }: any) => ( -
+ Select: ({ children, value, onValueChange, "data-testid": dataTestId }: any) => ( + ), - SelectTrigger: ({ children }: any) =>
{children}
, - SelectValue: ({ placeholder }: any) =>
{placeholder}
, + SelectContent: ({ children }: any) => <>{children}, + SelectGroup: ({ children }: any) => <>{children}, + SelectItem: ({ children, value }: any) => , + SelectTrigger: () => null, + SelectValue: () => null, SearchableSelect: ({ value, onValueChange, options, placeholder }: any) => (