From c284b2f7e97974a8dadb0b83cdcc3c0aa6b87ce9 Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Fri, 5 Jun 2026 12:33:33 -0700 Subject: [PATCH 1/8] fix(skills): discover nested and frontmatter-less skills Skills following the Claude Code layout (//SKILL.md) or written as plain .md without YAML frontmatter were silently skipped in the standard skill dirs (.smallcode/skills, ~/.smallcode/skills, ~/.config/smallcode/skills). Both shapes now load; README-style files (README/CHANGELOG/LICENSE/CONTRIBUTING) are filtered by name. Fixes #81 Constraint: no warning channel exists in SkillManager, so silent skips had no user-visible signal Rejected: warn-on-skip only | users following Claude Code conventions expect these layouts to work Confidence: high Scope-risk: narrow Not-tested: fullscreen TUI /skill list rendering (logic shared with classic mode) Co-Authored-By: Claude Opus 4.8 (1M context) --- src/plugins/skills.js | 74 ++++++++++++++++++++++++++----------------- test/skills.test.js | 36 +++++++++++++++++++++ 2 files changed, 81 insertions(+), 29 deletions(-) diff --git a/src/plugins/skills.js b/src/plugins/skills.js index 60c06bb1..d88354c7 100644 --- a/src/plugins/skills.js +++ b/src/plugins/skills.js @@ -16,6 +16,10 @@ // `.agents/skills` or `.claude/skills` typically have no frontmatter — they // are treated as `manual`-trigger skills named after their parent directory. // +// The standard skill dirs also accept the nested `/SKILL.md` layout and +// flat `.md` files without frontmatter (named after the file) — both were +// previously skipped silently (closes #81). README-style files are ignored. +// // Frontmatter accepts both LF and CRLF line endings (closes #52). const fs = require('fs'); @@ -24,6 +28,8 @@ const os = require('os'); const FM_RE = /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/; const KV_RE = /^(\w+)\s*:\s*(.+?)\s*$/; +// Docs that live alongside skills but aren't skills themselves +const NON_SKILL_MD = /^(readme|changelog|license|contributing)\.md$/i; class SkillManager { constructor(projectDir) { @@ -71,14 +77,20 @@ class SkillManager { if (!dir || !fs.existsSync(dir)) return; let entries; try { - entries = fs.readdirSync(dir); + entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; } for (const entry of entries) { - if (!entry.endsWith('.md')) continue; - const full = path.join(dir, entry); - this._ingestFile(full, entry, dir); + if (entry.isDirectory()) { + // //SKILL.md inside a standard skill dir — users following + // the Claude Code layout expect this to work (closes #81) + this._loadSkillFolder(path.join(dir, entry.name), entry.name); + continue; + } + if (!entry.name.endsWith('.md') || NON_SKILL_MD.test(entry.name)) continue; + const full = path.join(dir, entry.name); + this._ingestFile(full, entry.name, dir, entry.name.replace(/\.md$/i, ''), 'flat'); } } @@ -92,38 +104,41 @@ class SkillManager { } for (const d of dirs) { if (!d.isDirectory()) continue; - const skillDir = path.join(root, d.name); - // Look for SKILL.md, skill.md, or any .md file inside the folder. - let skillFile = null; - const candidates = ['SKILL.md', 'skill.md', 'Skill.md']; - for (const c of candidates) { - const p = path.join(skillDir, c); - if (fs.existsSync(p)) { skillFile = p; break; } - } - if (!skillFile) { - // Fall back to first .md in the folder - try { - const md = fs.readdirSync(skillDir).find(f => f.endsWith('.md')); - if (md) skillFile = path.join(skillDir, md); - } catch {} - } - if (!skillFile) continue; - this._ingestFile(skillFile, path.basename(skillFile), skillDir, d.name); + this._loadSkillFolder(path.join(root, d.name), d.name); + } + } + + _loadSkillFolder(skillDir, name) { + // Look for SKILL.md, skill.md, or any .md file inside the folder. + let skillFile = null; + const candidates = ['SKILL.md', 'skill.md', 'Skill.md']; + for (const c of candidates) { + const p = path.join(skillDir, c); + if (fs.existsSync(p)) { skillFile = p; break; } + } + if (!skillFile) { + // Fall back to first .md in the folder + try { + const md = fs.readdirSync(skillDir).find(f => f.endsWith('.md')); + if (md) skillFile = path.join(skillDir, md); + } catch {} } + if (!skillFile) return; + this._ingestFile(skillFile, path.basename(skillFile), skillDir, name, 'nested'); } - _ingestFile(filePath, filename, dir, defaultName) { + _ingestFile(filePath, filename, dir, defaultName, origin) { let content; try { content = fs.readFileSync(filePath, 'utf-8'); } catch { return; } - const skill = this._parse(content, filename, dir, defaultName); + const skill = this._parse(content, filename, dir, defaultName, origin); if (skill) this.skills.set(skill.name, skill); } - _parse(content, filename, dir, defaultName) { + _parse(content, filename, dir, defaultName, origin) { // Parse YAML frontmatter (CRLF + LF tolerant — closes #52) const fmMatch = content.match(FM_RE); let frontmatter = ''; @@ -133,9 +148,10 @@ class SkillManager { frontmatter = fmMatch[1]; body = fmMatch[2]; } else if (!defaultName) { - // Flat-layout files without frontmatter aren't skills (could be a - // README). Nested-layout (.agents/skills//SKILL.md) files are - // accepted as plain-body skills using the parent directory name. + // Files without frontmatter and no derivable name aren't skills. + // Flat + nested loaders always pass a defaultName, so frontmatter-less + // files load as manual skills (closes #81); README-style files are + // filtered by name in _loadFlat. return null; } @@ -155,11 +171,11 @@ class SkillManager { return { name: meta.name || defaultName || filename.replace(/\.md$/i, ''), - trigger: meta.trigger || (defaultName ? 'manual' : 'manual'), + trigger: meta.trigger || 'manual', keywords: Array.isArray(meta.keywords) ? meta.keywords : [], content: body.trim(), path: path.join(dir, filename), - origin: defaultName ? 'nested' : 'flat', + origin: origin || (defaultName ? 'nested' : 'flat'), }; } diff --git a/test/skills.test.js b/test/skills.test.js index 087d756c..75ecf92f 100644 --- a/test/skills.test.js +++ b/test/skills.test.js @@ -98,6 +98,42 @@ test('list() reports nested skills with origin marker', () => { assert.equal(nested.origin, 'nested'); }); +test('issue #81: nested /SKILL.md inside .smallcode/skills is detected', () => { + const dir = freshProject(); + const skillFile = path.join(dir, '.smallcode', 'skills', 'my-skill', 'SKILL.md'); + write(skillFile, '# my skill\n\nDo nested things.'); + + const sm = new SkillManager(dir); + const got = sm.get('my-skill'); + assert.ok(got, 'nested skill inside .smallcode/skills should load'); + assert.equal(got.origin, 'nested'); + assert.match(got.content, /Do nested things\./); +}); + +test('issue #81: flat .md without frontmatter loads as manual skill', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'plain.md'), + '# Plain Skill\n\nNo frontmatter here.'); + + const sm = new SkillManager(dir); + const got = sm.get('plain'); + assert.ok(got, 'frontmatter-less flat skill should load'); + assert.equal(got.trigger, 'manual'); + assert.equal(got.origin, 'flat'); + assert.match(got.content, /No frontmatter here\./); +}); + +test('issue #81: README-style files in skill dirs are not skills', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'README.md'), '# About these skills'); + write(path.join(dir, '.smallcode', 'skills', 'real.md'), + '---\nname: real\ntrigger: manual\n---\nreal body'); + + const sm = new SkillManager(dir); + assert.equal(sm.get('README'), null); + assert.ok(sm.get('real')); +}); + test('add() persists a new skill and round-trips through .smallcode/skills', () => { const dir = freshProject(); const sm = new SkillManager(dir); From 086fa4a7456804dc301e279436e65a47518fc69d Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Sun, 7 Jun 2026 08:30:24 -0700 Subject: [PATCH 2/8] feat(evolver): /evolve proposes skills from session friction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create-mode evolver: deterministic friction extraction from saved traces (repeated near-duplicate prompts, consecutive tool-retry loops), LLM judgment routed to the strong tier, and ONE quarantined skill draft per run written to .smallcode/skills/drafts/. Drafts never auto-load; /evolve promote moves them live. Validation gates every write (name format, no frontmatter injection, trigger rules); name collisions across live+draft+global dirs abort; every create appends to .smallcode/evolver-audit.jsonl. The per-run cap is structural — EvolverRun raises on a second create. Constraint: small models produce noisy judgments, so all fuzzy output passes validate-or-abort before any write Rejected: plugin delivery | needs TraceRecorder + SkillManager internals unreachable from plugin dirs under binary installs Confidence: high Scope-risk: narrow Directive: keep mechanics LLM-free — judgment stays in the command handler so mechanics remain unit-testable Not-tested: strong-tier routing with a separately configured SMALLCODE_MODEL_STRONG endpoint Co-Authored-By: Claude Opus 4.8 (1M context) --- bin/commands.js | 156 ++++++++++++++++++++ src/plugins/audit_log.js | 33 +++++ src/plugins/evolver.js | 174 ++++++++++++++++++++++ src/plugins/friction_analyzer.js | 123 ++++++++++++++++ src/plugins/skills.js | 30 ++++ src/tui/fullscreen.js | 1 + test/evolver.test.js | 240 +++++++++++++++++++++++++++++++ 7 files changed, 757 insertions(+) create mode 100644 src/plugins/audit_log.js create mode 100644 src/plugins/evolver.js create mode 100644 src/plugins/friction_analyzer.js create mode 100644 test/evolver.test.js diff --git a/bin/commands.js b/bin/commands.js index 9ba5ebc8..e1a7e11c 100644 --- a/bin/commands.js +++ b/bin/commands.js @@ -851,6 +851,7 @@ module.exports = function createCommandHandler(config, conversationHistory, impr console.log(` ${chalk.cyan('/budget')} ${chalk.gray('Show context window budget')}`); console.log(` ${chalk.cyan('/mcp')} ${chalk.gray('Show connected MCP servers')}`); console.log(` ${chalk.cyan('/skill')} ${chalk.gray('Manage reusable skills')}`); + console.log(` ${chalk.cyan('/evolve')} ${chalk.gray('Propose a new skill from session friction (list|promote|log)')}`); console.log(` ${chalk.cyan('/plugin')} ${chalk.gray('List installed plugins')}`); console.log(` ${chalk.cyan('/provider')} ${chalk.gray('Configure LLM provider (interactive wizard)')}`); console.log(` ${chalk.cyan('/sessions')} ${chalk.gray('List/resume saved sessions')}`); @@ -863,6 +864,161 @@ module.exports = function createCommandHandler(config, conversationHistory, impr rl.prompt(); return; + case '/evolve': { + const { SkillManager } = require('../src/plugins/skills'); + const sm = new SkillManager(process.cwd()); + const sub = (parts[1] || '').trim(); + + if (sub === 'list') { + const drafts = sm.listDrafts(); + if (drafts.length === 0) { + console.log(chalk.gray(' No skill drafts. Run /evolve to analyze recent sessions.')); + } else { + console.log(chalk.bold(` Drafts (${drafts.length}) — promote with /evolve promote :`)); + for (const d of drafts) console.log(` ${chalk.cyan(d)}`); + } + console.log(''); + rl.prompt(); + return; + } + + if (sub === 'promote') { + const name = (parts[2] || '').trim(); + if (!name) { console.log(chalk.gray(' Usage: /evolve promote ')); } + else { + const target = sm.promoteDraft(name); + if (target) console.log(` ${chalk.green('✓')} Promoted to ${chalk.cyan(target)} — active next session.`); + else console.log(chalk.red(` Draft "${name}" not found (or a live skill with that name exists).`)); + } + console.log(''); + rl.prompt(); + return; + } + + if (sub === 'log') { + const { readEntries } = require('../src/plugins/audit_log'); + const entries = readEntries(path.join(process.cwd(), '.smallcode', 'evolver-audit.jsonl'), 10); + if (entries.length === 0) console.log(chalk.gray(' No evolution events logged yet.')); + for (const e of entries) { + console.log(` ${chalk.gray(e.ts)} ${chalk.cyan(e.name)} ${chalk.gray(e.rationale.slice(0, 60))}`); + } + console.log(''); + rl.prompt(); + return; + } + + // No sub-command: run an evolution pass + const { TraceRecorder } = require('./trace_recorder'); + const { extractFrictionSignals, formatReportForPrompt } = require('../src/plugins/friction_analyzer'); + const evolver = require('../src/plugins/evolver'); + + const tr = new TraceRecorder(process.cwd()); + const traceList = tr.list().slice(0, 20); + if (traceList.length < 3) { + console.log(chalk.gray(` Only ${traceList.length} trace(s) recorded — need at least 3 sessions of data.`)); + console.log(''); + rl.prompt(); + return; + } + const traces = traceList.map(t => tr.load(t.id)).filter(Boolean); + + const skillKeywords = sm.list().flatMap(s => s.keywords || []); + const report = extractFrictionSignals(traces, { skillKeywords }); + const signalCount = report.repeated_patterns.length + report.tool_retry_loops.length; + if (signalCount === 0) { + console.log(chalk.gray(` No friction patterns in last ${traces.length} traces. Nothing to evolve.`)); + console.log(''); + rl.prompt(); + return; + } + + console.log(chalk.bold(` Friction signals (${signalCount}):`)); + console.log(chalk.gray(formatReportForPrompt(report).split('\n').map(l => ' ' + l).join('\n'))); + + // LLM judgment — route to the strong tier when configured + const { getModelTarget, buildAuthHeaders, withModelTarget } = require('./config'); + const target = getModelTarget(config, 'strong'); + process.stdout.write(chalk.gray(` Asking ${target.model} for a proposal... `)); + + const sysPrompt = 'You design reusable skills for a coding agent. A skill is a short markdown instruction injected when relevant. Given friction signals from recent sessions, propose ONE skill addressing the most impactful pattern. Respond with ONLY a JSON object: {"name": "kebab-case-name", "description": "one line", "trigger": "match", "keywords": ["k1","k2"], "body": "markdown instructions for the agent", "rationale": "why this helps"}'; + let proposalRaw = null; + try { + const resp = await fetch(`${target.baseUrl}/chat/completions`, { + method: 'POST', + headers: buildAuthHeaders(withModelTarget(config, target)), + body: JSON.stringify({ + model: target.model, + messages: [ + { role: 'system', content: sysPrompt }, + { role: 'user', content: `Friction signals:\n${formatReportForPrompt(report)}` }, + ], + temperature: 0.2, + max_tokens: 1024, + }), + }); + if (resp.ok) { + const data = await resp.json(); + proposalRaw = data?.choices?.[0]?.message?.content || null; + } else { + console.log(chalk.red(`HTTP ${resp.status}`)); + } + } catch (e) { + console.log(chalk.red(e.message)); + } + if (!proposalRaw) { console.log(''); rl.prompt(); return; } + + // Forgiving parse: strict JSON → fenced JSON → abort with raw output + let parsed = null; + try { parsed = JSON.parse(proposalRaw); } catch { + const m = proposalRaw.match(/\{[\s\S]*\}/); + if (m) { try { parsed = JSON.parse(m[0]); } catch {} } + } + if (!parsed) { + console.log(chalk.yellow('could not parse')); + console.log(chalk.gray(' Raw model output (nothing written):')); + console.log(chalk.gray(' ' + proposalRaw.slice(0, 500).split('\n').join('\n '))); + console.log(''); + rl.prompt(); + return; + } + console.log(chalk.green('ok')); + + const proposal = evolver.buildSkillProposal( + String(parsed.name || ''), String(parsed.description || ''), String(parsed.body || ''), + { trigger: parsed.trigger, keywords: parsed.keywords, rationale: String(parsed.rationale || '') } + ); + const errors = evolver.validateProposal(proposal); + if (errors.length) { + console.log(chalk.red(` Proposal rejected: ${errors.join('; ')}`)); + console.log(''); + rl.prompt(); + return; + } + const collision = evolver.checkNameCollision(proposal.name, process.cwd()); + if (collision) { + console.log(chalk.red(` Name collision with ${collision} — nothing written.`)); + console.log(''); + rl.prompt(); + return; + } + + const run = new evolver.EvolverRun(); + const draftPath = run.writeDraft(proposal, process.cwd()); + evolver.logCreateEvent( + path.join(process.cwd(), '.smallcode', 'evolver-audit.jsonl'), + proposal, proposal.rationale, + report.repeated_patterns.flatMap(p => p.traceIds).concat(report.tool_retry_loops.flatMap(l => l.traceIds)) + ); + + console.log(''); + console.log(` ${chalk.green('✓')} Draft: ${chalk.cyan(draftPath)}`); + console.log(chalk.gray(` "${proposal.description}"`)); + console.log(chalk.gray(` Review the file, then: /evolve promote ${proposal.name}`)); + console.log(''); + rl.prompt(); + return; + } + case '/provider': { const sub = (parts[1] || '').trim(); if (sub === 'status' || sub === '--status' || sub === '-s') { diff --git a/src/plugins/audit_log.js b/src/plugins/audit_log.js new file mode 100644 index 00000000..12b2591d --- /dev/null +++ b/src/plugins/audit_log.js @@ -0,0 +1,33 @@ +// SmallCode — Evolution Audit Log +// Thin JSONL appender/reader for evolver create events. One JSON object per +// line; append-only. Writes are atomic (tmp + rename) so a crash mid-write +// never corrupts existing history. + +const fs = require('fs'); +const path = require('path'); + +function appendEntry(filePath, entry) { + const dir = path.dirname(filePath); + if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); + const line = JSON.stringify(entry) + '\n'; + // Read-modify-write atomically: copy existing content + new line to a tmp + // file, then rename over the original. + let existing = ''; + try { existing = fs.readFileSync(filePath, 'utf-8'); } catch {} + const tmpPath = filePath + `.tmp.${process.pid}.${Date.now()}`; + fs.writeFileSync(tmpPath, existing + line, 'utf-8'); + fs.renameSync(tmpPath, filePath); +} + +function readEntries(filePath, limit = 100) { + let content = ''; + try { content = fs.readFileSync(filePath, 'utf-8'); } catch { return []; } + const entries = []; + for (const line of content.split('\n')) { + if (!line.trim()) continue; + try { entries.push(JSON.parse(line)); } catch {} + } + return entries.slice(-limit); +} + +module.exports = { appendEntry, readEntries }; diff --git a/src/plugins/evolver.js b/src/plugins/evolver.js new file mode 100644 index 00000000..139a3123 --- /dev/null +++ b/src/plugins/evolver.js @@ -0,0 +1,174 @@ +// SmallCode — Evolver (create-mode mechanics) +// Deterministic mechanics behind the /evolve command: proposal building, +// validation, name-collision checking, quarantined draft writing, audit +// logging, and structural enforcement of the 1-create-per-run cap. +// +// The fuzzy judgment (is this friction worth a skill?) happens in the +// command handler via an LLM call. Everything here is pure mechanics so it +// can be unit-tested without a model. +// +// Safety rules (mirrors the create-mode evolver pattern): +// - Drafts only: writes go to .smallcode/skills/drafts/, never live dirs +// - Never deletes, never commits +// - validateProposal must pass before any write +// - EvolverRun raises on the 2nd create in a single run + +const fs = require('fs'); +const path = require('path'); +const { appendEntry } = require('./audit_log'); + +const MAX_CREATES_PER_RUN = 1; +const NAME_RE = /^[A-Za-z0-9_-]+$/; +const VALID_TRIGGERS = new Set(['manual', 'auto', 'match']); + +class ProposalCapExceededError extends Error {} + +// ── Builders ────────────────────────────────────────────────────────────── + +function buildSkillProposal(name, description, body, options = {}) { + return { + kind: 'create', + artefact: 'skill', + name, + description, + body, + trigger: options.trigger || 'manual', + keywords: Array.isArray(options.keywords) ? options.keywords : [], + rationale: options.rationale || '', + }; +} + +// ── Validation ──────────────────────────────────────────────────────────── + +function validateProposal(proposal) { + const errors = []; + if (!proposal || typeof proposal !== 'object') return ['proposal must be an object']; + + if (proposal.artefact !== 'skill') { + errors.push(`artefact must be "skill", got ${JSON.stringify(proposal.artefact)}`); + } + if (typeof proposal.name !== 'string' || !NAME_RE.test(proposal.name)) { + errors.push('name must be a non-empty alphanumeric/-_ string'); + } + if (typeof proposal.description !== 'string' || !proposal.description.trim()) { + errors.push('description must be a non-empty string'); + } else if (/[\r\n]/.test(proposal.description)) { + errors.push('description must not contain newlines (frontmatter-injection risk)'); + } + if (typeof proposal.body !== 'string' || !proposal.body.trim()) { + errors.push('body must be a non-empty string'); + } + if (!VALID_TRIGGERS.has(proposal.trigger)) { + errors.push(`trigger must be one of manual|auto|match, got ${JSON.stringify(proposal.trigger)}`); + } + if (proposal.trigger === 'match' && (!Array.isArray(proposal.keywords) || proposal.keywords.length === 0)) { + errors.push('trigger "match" requires a non-empty keywords list'); + } + return errors; +} + +// ── Name-collision check ────────────────────────────────────────────────── + +// Look for an existing skill with this name across the standard skill dirs +// (live and drafts). Returns the first matching path or null. +function checkNameCollision(name, projectDir) { + const os = require('os'); + const roots = [ + path.join(projectDir, '.smallcode', 'skills'), + path.join(os.homedir(), '.smallcode', 'skills'), + path.join(os.homedir(), '.config', 'smallcode', 'skills'), + ]; + for (const root of roots) { + for (const candidate of [ + path.join(root, `${name}.md`), + path.join(root, name, 'SKILL.md'), + path.join(root, 'drafts', `${name}.md`), + ]) { + if (fs.existsSync(candidate)) return candidate; + } + } + return null; +} + +// ── Draft writer ────────────────────────────────────────────────────────── + +function _skillMd(proposal) { + const fm = [ + '---', + `name: ${proposal.name}`, + `description: ${proposal.description}`, + `trigger: ${proposal.trigger}`, + proposal.keywords.length ? `keywords: [${proposal.keywords.join(', ')}]` : null, + '---', + ].filter(Boolean).join('\n'); + let body = proposal.body.trim() + '\n'; + if (proposal.rationale) { + body += `\n/g, '')} -->\n`; + } + return `${fm}\n${body}`; +} + +function writeDraft(proposal, projectDir) { + const errors = validateProposal(proposal); + if (errors.length) throw new Error(`invalid proposal: ${errors.join('; ')}`); + + const draftsDir = path.resolve(projectDir, '.smallcode', 'skills', 'drafts'); + const target = path.resolve(draftsDir, `${proposal.name}.md`); + // Path containment — name is already validated, but defend anyway + if (!target.startsWith(draftsDir + path.sep)) { + throw new Error(`draft path escapes drafts dir: ${target}`); + } + if (!fs.existsSync(draftsDir)) fs.mkdirSync(draftsDir, { recursive: true }); + const tmpPath = target + `.tmp.${process.pid}.${Date.now()}`; + fs.writeFileSync(tmpPath, _skillMd(proposal), 'utf-8'); + fs.renameSync(tmpPath, target); + return target; +} + +// ── Audit log ───────────────────────────────────────────────────────────── + +function logCreateEvent(auditPath, proposal, rationale, sourceTraceIds) { + appendEntry(auditPath, { + ts: new Date().toISOString(), + kind: 'create', + artefact: proposal.artefact, + name: proposal.name, + rationale: rationale || proposal.rationale || '', + source_traces: Array.isArray(sourceTraceIds) ? sourceTraceIds : [], + }); +} + +// ── Per-run cap (structural) ────────────────────────────────────────────── + +// Stateful tracker enforcing the create cap by construction. Use this, not +// writeDraft directly, when running an evolution pass. +class EvolverRun { + constructor(maxCreates = MAX_CREATES_PER_RUN) { + this.maxCreates = maxCreates; + this.createsSoFar = 0; + this.written = []; + } + + writeDraft(proposal, projectDir) { + if (proposal && proposal.kind === 'create' && this.createsSoFar >= this.maxCreates) { + throw new ProposalCapExceededError( + `already wrote ${this.createsSoFar} create(s); cap is ${this.maxCreates}` + ); + } + const target = writeDraft(proposal, projectDir); + if (proposal.kind === 'create') this.createsSoFar++; + this.written.push(target); + return target; + } +} + +module.exports = { + buildSkillProposal, + validateProposal, + checkNameCollision, + writeDraft, + logCreateEvent, + EvolverRun, + ProposalCapExceededError, + MAX_CREATES_PER_RUN, +}; diff --git a/src/plugins/friction_analyzer.js b/src/plugins/friction_analyzer.js new file mode 100644 index 00000000..2ba25bed --- /dev/null +++ b/src/plugins/friction_analyzer.js @@ -0,0 +1,123 @@ +// SmallCode — Friction Analyzer +// Deterministic friction-signal extraction from saved traces. No LLM calls — +// this produces the evidence the /evolve command hands to the model for +// judgment. +// +// Signals: +// - repeated_patterns: near-duplicate prompts appearing 3+ times with no +// matching skill keyword (the user keeps asking for the same thing by hand) +// - tool_retry_loops: 3+ consecutive failed calls of the same tool against +// the same file within a trace (the model keeps fighting the same wall) + +const REPEAT_THRESHOLD = 3; +const RETRY_THRESHOLD = 3; +const SIMILARITY_THRESHOLD = 0.5; + +function _wordSet(text) { + return new Set( + String(text || '').toLowerCase().split(/[^a-z0-9]+/).filter(w => w.length > 2) + ); +} + +function _jaccard(a, b) { + if (a.size === 0 && b.size === 0) return 0; + let inter = 0; + for (const w of a) if (b.has(w)) inter++; + return inter / (a.size + b.size - inter); +} + +function _isError(result) { + const s = String(result || ''); + return s.startsWith('✗') || /"error"\s*:/.test(s) || /^Error[:\s]/.test(s); +} + +// Group traces whose prompts are near-duplicates (Jaccard on word sets). +function _findRepeatedPatterns(traces, skillKeywords) { + const groups = []; // { words, prompts, traceIds } + for (const t of traces) { + const words = _wordSet(t.prompt); + if (words.size === 0) continue; + let placed = false; + for (const g of groups) { + if (_jaccard(words, g.words) >= SIMILARITY_THRESHOLD) { + g.prompts.push(t.prompt); + g.traceIds.push(t.id); + for (const w of words) g.words.add(w); + placed = true; + break; + } + } + if (!placed) groups.push({ words, prompts: [t.prompt], traceIds: [t.id] }); + } + + return groups + .filter(g => g.prompts.length >= REPEAT_THRESHOLD) + // Skip patterns a skill already covers (any keyword hits the group words) + .filter(g => !skillKeywords.some(kw => g.words.has(String(kw).toLowerCase()))) + .map(g => ({ + pattern: g.prompts[0].slice(0, 120), + count: g.prompts.length, + traceIds: g.traceIds, + })); +} + +// Detect consecutive failed calls of the same tool+file within each trace. +function _findToolRetryLoops(traces) { + const loops = []; + for (const t of traces) { + let runTool = null, runFile = null, failCount = 0; + const flush = () => { + if (failCount >= RETRY_THRESHOLD) { + loops.push({ tool: runTool, file: runFile, failCount, traceIds: [t.id] }); + } + runTool = null; runFile = null; failCount = 0; + }; + for (const step of t.steps || []) { + if (step.type !== 'tool_call') continue; + let file = ''; + try { + const args = typeof step.args === 'string' ? JSON.parse(step.args) : (step.args || {}); + file = args.path || args.file || ''; + } catch {} + const failed = _isError(step.result); + if (failed && step.name === runTool && file === runFile) { + failCount++; + } else { + flush(); + if (failed) { runTool = step.name; runFile = file; failCount = 1; } + } + } + flush(); + } + return loops; +} + +/** + * @param {object[]} traces - full trace objects (TraceRecorder.load shape) + * @param {object} options - { skillKeywords: string[] } keywords of existing skills + * @returns FrictionReport + */ +function extractFrictionSignals(traces, options = {}) { + const skillKeywords = options.skillKeywords || []; + const safe = (traces || []).filter(t => t && typeof t === 'object'); + return { + repeated_patterns: _findRepeatedPatterns(safe, skillKeywords), + tool_retry_loops: _findToolRetryLoops(safe), + analyzed_traces: safe.length, + }; +} + +// Compact text rendering of a friction report for the LLM prompt — counts +// and short descriptions only, never full trace content (budget guard). +function formatReportForPrompt(report) { + const lines = []; + for (const p of report.repeated_patterns) { + lines.push(`- Repeated request (${p.count}x): "${p.pattern}"`); + } + for (const l of report.tool_retry_loops) { + lines.push(`- Tool retry loop: ${l.tool} failed ${l.failCount}x in a row on ${l.file || '(no file)'}`); + } + return lines.join('\n').slice(0, 2000); +} + +module.exports = { extractFrictionSignals, formatReportForPrompt }; diff --git a/src/plugins/skills.js b/src/plugins/skills.js index d88354c7..c4f5206a 100644 --- a/src/plugins/skills.js +++ b/src/plugins/skills.js @@ -83,6 +83,9 @@ class SkillManager { } for (const entry of entries) { if (entry.isDirectory()) { + // drafts/ is quarantined — evolver proposals live there until a + // human promotes them (/evolve promote ). Never auto-load. + if (entry.name === 'drafts') continue; // //SKILL.md inside a standard skill dir — users following // the Claude Code layout expect this to work (closes #81) this._loadSkillFolder(path.join(dir, entry.name), entry.name); @@ -244,6 +247,33 @@ class SkillManager { return skill; } + // Promote a quarantined draft (.smallcode/skills/drafts/.md) into + // the live project skill dir and load it. Returns the new path or null. + promoteDraft(name) { + const safe = String(name || '').replace(/[^a-z0-9-_]/gi, ''); + if (!safe) return null; + const draftsDir = path.join(this.projectDir, '.smallcode', 'skills', 'drafts'); + const source = path.join(draftsDir, `${safe}.md`); + if (!fs.existsSync(source)) return null; + const target = path.join(this.projectDir, '.smallcode', 'skills', `${safe}.md`); + if (fs.existsSync(target)) return null; // never overwrite a live skill + fs.renameSync(source, target); + this._ingestFile(target, `${safe}.md`, path.dirname(target), safe, 'flat'); + return target; + } + + // List quarantined drafts (names only) + listDrafts() { + const draftsDir = path.join(this.projectDir, '.smallcode', 'skills', 'drafts'); + try { + return fs.readdirSync(draftsDir) + .filter(f => f.endsWith('.md')) + .map(f => f.replace(/\.md$/i, '')); + } catch { + return []; + } + } + // Remove a skill remove(name) { const skill = this.skills.get(name); diff --git a/src/tui/fullscreen.js b/src/tui/fullscreen.js index 6a4c935f..5dd8c02a 100644 --- a/src/tui/fullscreen.js +++ b/src/tui/fullscreen.js @@ -192,6 +192,7 @@ class FullScreenTUI { { cmd: '/cognition', alias: null, desc: 'MarrowScript cognition status' }, { cmd: '/mcp', alias: null, desc: 'Connected MCP servers' }, { cmd: '/skill', alias: null, desc: 'Manage reusable skills' }, + { cmd: '/evolve', alias: null, desc: 'Propose skill from session friction' }, { cmd: '/plugin', alias: null, desc: 'Manage plugins' }, { cmd: '/sessions', alias: null, desc: 'List/resume sessions' }, { cmd: '/session', alias: null, desc: 'Parallel sessions' }, diff --git a/test/evolver.test.js b/test/evolver.test.js new file mode 100644 index 00000000..ffb42a7a --- /dev/null +++ b/test/evolver.test.js @@ -0,0 +1,240 @@ +'use strict'; + +// SmallCode — Evolver (create-mode) tests +// Pins the deterministic mechanics behind /evolve: proposal validation, +// quarantined draft writing, the structural 1-create-per-run cap, friction +// extraction from traces, and the SkillManager drafts quarantine. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const evolver = require('../src/plugins/evolver'); +const { extractFrictionSignals, formatReportForPrompt } = require('../src/plugins/friction_analyzer'); +const { appendEntry, readEntries } = require('../src/plugins/audit_log'); +const { SkillManager } = require('../src/plugins/skills'); + +function freshProject() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'sc-evolver-')); +} + +function trace(id, prompt, steps = []) { + return { id, prompt, steps, tokens: { prompt: 0, completion: 0 } }; +} + +function failedStep(tool, file) { + return { type: 'tool_call', name: tool, args: JSON.stringify({ path: file }), result: '✗ failed' }; +} + +// ── Proposal building + validation ─────────────────────────────────────── + +test('buildSkillProposal returns a complete create proposal', () => { + const p = evolver.buildSkillProposal('my-skill', 'does things', 'Body here.', { + trigger: 'match', keywords: ['foo'], rationale: 'seen 3x', + }); + assert.equal(p.kind, 'create'); + assert.equal(p.artefact, 'skill'); + assert.equal(p.trigger, 'match'); + assert.deepEqual(p.keywords, ['foo']); +}); + +test('validateProposal accepts a valid proposal', () => { + const p = evolver.buildSkillProposal('ok-name', 'desc', 'body'); + assert.deepEqual(evolver.validateProposal(p), []); +}); + +test('validateProposal rejects bad names, empty fields, newline descriptions', () => { + const bad = (over) => evolver.validateProposal({ + ...evolver.buildSkillProposal('ok', 'desc', 'body'), ...over, + }); + assert.ok(bad({ name: 'has space' }).length > 0); + assert.ok(bad({ name: '../traverse' }).length > 0); + assert.ok(bad({ name: '' }).length > 0); + assert.ok(bad({ description: '' }).length > 0); + assert.ok(bad({ description: 'line1\nline2' }).length > 0, 'newline = frontmatter injection'); + assert.ok(bad({ body: ' ' }).length > 0); + assert.ok(bad({ trigger: 'bogus' }).length > 0); +}); + +test('validateProposal requires keywords for match trigger', () => { + const p = evolver.buildSkillProposal('m', 'd', 'b', { trigger: 'match', keywords: [] }); + assert.ok(evolver.validateProposal(p).length > 0); +}); + +// ── Collision check ─────────────────────────────────────────────────────── + +test('checkNameCollision finds existing flat and draft skills', () => { + const dir = freshProject(); + const skillsDir = path.join(dir, '.smallcode', 'skills'); + fs.mkdirSync(path.join(skillsDir, 'drafts'), { recursive: true }); + fs.writeFileSync(path.join(skillsDir, 'live-skill.md'), '---\nname: live-skill\n---\nx'); + fs.writeFileSync(path.join(skillsDir, 'drafts', 'pending.md'), '---\nname: pending\n---\nx'); + + assert.ok(evolver.checkNameCollision('live-skill', dir)); + assert.ok(evolver.checkNameCollision('pending', dir)); + assert.equal(evolver.checkNameCollision('brand-new', dir), null); +}); + +// ── Draft writing + cap ─────────────────────────────────────────────────── + +test('writeDraft writes to drafts/ quarantine with frontmatter', () => { + const dir = freshProject(); + const p = evolver.buildSkillProposal('drafted', 'a draft', 'Draft body.', { rationale: 'why' }); + const target = evolver.writeDraft(p, dir); + assert.match(target, /[\\/]drafts[\\/]drafted\.md$/); + const content = fs.readFileSync(target, 'utf-8'); + assert.match(content, /^---\nname: drafted\n/); + assert.match(content, /Draft body\./); + assert.match(content, /Rationale: why/); +}); + +test('writeDraft refuses invalid proposals', () => { + const dir = freshProject(); + assert.throws(() => evolver.writeDraft({ artefact: 'skill', name: 'x y', body: 'b' }, dir)); +}); + +test('EvolverRun allows one create, raises on the second', () => { + const dir = freshProject(); + const run = new evolver.EvolverRun(); + run.writeDraft(evolver.buildSkillProposal('first', 'd', 'b'), dir); + assert.throws( + () => run.writeDraft(evolver.buildSkillProposal('second', 'd', 'b'), dir), + evolver.ProposalCapExceededError + ); + assert.equal(run.createsSoFar, 1); +}); + +// ── Friction analysis ───────────────────────────────────────────────────── + +test('extractFrictionSignals returns empty report for no traces', () => { + const r = extractFrictionSignals([]); + assert.deepEqual(r.repeated_patterns, []); + assert.deepEqual(r.tool_retry_loops, []); + assert.equal(r.analyzed_traces, 0); +}); + +test('three near-identical prompts flag a repeated pattern', () => { + const traces = [ + trace('a1', 'convert this csv file to json format'), + trace('a2', 'convert the csv file into json format please'), + trace('a3', 'csv file convert to json format again'), + trace('b1', 'write unit tests for the auth module'), + ]; + const r = extractFrictionSignals(traces); + assert.equal(r.repeated_patterns.length, 1); + assert.equal(r.repeated_patterns[0].count, 3); + assert.deepEqual(r.repeated_patterns[0].traceIds.sort(), ['a1', 'a2', 'a3']); +}); + +test('repeated pattern covered by an existing skill keyword is suppressed', () => { + const traces = [ + trace('a1', 'convert this csv file to json format'), + trace('a2', 'convert the csv file into json format please'), + trace('a3', 'csv file convert to json format again'), + ]; + const r = extractFrictionSignals(traces, { skillKeywords: ['csv'] }); + assert.equal(r.repeated_patterns.length, 0); +}); + +test('three consecutive same-tool failures flag a retry loop', () => { + const t = trace('t1', 'fix the parser', [ + failedStep('patch', 'src/parser.js'), + failedStep('patch', 'src/parser.js'), + failedStep('patch', 'src/parser.js'), + ]); + const r = extractFrictionSignals([t]); + assert.equal(r.tool_retry_loops.length, 1); + assert.equal(r.tool_retry_loops[0].failCount, 3); + assert.equal(r.tool_retry_loops[0].tool, 'patch'); +}); + +test('interrupted failures do not flag a retry loop', () => { + const t = trace('t1', 'fix it', [ + failedStep('patch', 'a.js'), + failedStep('patch', 'a.js'), + { type: 'tool_call', name: 'read_file', args: '{"path":"a.js"}', result: 'content' }, + failedStep('patch', 'a.js'), + ]); + const r = extractFrictionSignals([t]); + assert.equal(r.tool_retry_loops.length, 0); +}); + +test('formatReportForPrompt stays compact', () => { + const r = extractFrictionSignals([ + trace('a1', 'x'.repeat(500) + ' aaa bbb ccc'), + ]); + assert.ok(formatReportForPrompt(r).length <= 2000); +}); + +// ── Drafts quarantine in SkillManager ───────────────────────────────────── + +test('SkillManager never auto-loads skills from drafts/', () => { + const dir = freshProject(); + const draftsDir = path.join(dir, '.smallcode', 'skills', 'drafts'); + fs.mkdirSync(draftsDir, { recursive: true }); + fs.writeFileSync(path.join(draftsDir, 'lurker.md'), '---\nname: lurker\ntrigger: auto\n---\nshould not load'); + + const sm = new SkillManager(dir); + assert.equal(sm.get('lurker'), null, 'draft must stay quarantined'); +}); + +test('promoteDraft moves draft live and a fresh SkillManager loads it', () => { + const dir = freshProject(); + evolver.writeDraft(evolver.buildSkillProposal('riser', 'promoted skill', 'Now live.'), dir); + + const sm = new SkillManager(dir); + assert.equal(sm.get('riser'), null); + const target = sm.promoteDraft('riser'); + assert.ok(target); + assert.ok(sm.get('riser'), 'promoted skill loads in the same manager'); + + const sm2 = new SkillManager(dir); + assert.ok(sm2.get('riser'), 'promoted skill loads in a fresh manager'); + assert.equal(sm2.listDrafts().length, 0); +}); + +test('promoteDraft never overwrites an existing live skill', () => { + const dir = freshProject(); + const skillsDir = path.join(dir, '.smallcode', 'skills'); + fs.mkdirSync(skillsDir, { recursive: true }); + fs.writeFileSync(path.join(skillsDir, 'taken.md'), '---\nname: taken\n---\noriginal'); + evolver.writeDraft(evolver.buildSkillProposal('taken', 'd', 'impostor'), dir); + + const sm = new SkillManager(dir); + assert.equal(sm.promoteDraft('taken'), null); + assert.match(fs.readFileSync(path.join(skillsDir, 'taken.md'), 'utf-8'), /original/); +}); + +test('listDrafts reports quarantined names', () => { + const dir = freshProject(); + evolver.writeDraft(evolver.buildSkillProposal('one', 'd', 'b'), dir); + const sm = new SkillManager(dir); + assert.deepEqual(sm.listDrafts(), ['one']); +}); + +// ── Audit log ───────────────────────────────────────────────────────────── + +test('audit log appends and reads back entries', () => { + const dir = freshProject(); + const file = path.join(dir, '.smallcode', 'evolver-audit.jsonl'); + appendEntry(file, { ts: 't1', kind: 'create', name: 'a' }); + appendEntry(file, { ts: 't2', kind: 'create', name: 'b' }); + const entries = readEntries(file); + assert.equal(entries.length, 2); + assert.equal(entries[1].name, 'b'); +}); + +test('logCreateEvent writes a well-formed audit row', () => { + const dir = freshProject(); + const file = path.join(dir, '.smallcode', 'evolver-audit.jsonl'); + const p = evolver.buildSkillProposal('logged', 'd', 'b', { rationale: 'because' }); + evolver.logCreateEvent(file, p, 'because', ['t1', 't2']); + const [e] = readEntries(file); + assert.equal(e.kind, 'create'); + assert.equal(e.artefact, 'skill'); + assert.equal(e.name, 'logged'); + assert.deepEqual(e.source_traces, ['t1', 't2']); + assert.ok(e.ts); +}); From 7095ce325ba0206224bdf5bb727173b17c461117 Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Sun, 7 Jun 2026 09:52:26 -0700 Subject: [PATCH 3/8] fix(evolver): stopword filtering in prompt clustering Field regression: rephrased prompts with filler drift (another/please/new) failed to cluster because stopwords diluted Jaccard below threshold. Real prompts from a live session pinned as a test. --- src/plugins/friction_analyzer.js | 12 +++++++++++- test/evolver.test.js | 13 +++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/plugins/friction_analyzer.js b/src/plugins/friction_analyzer.js index 2ba25bed..0165db58 100644 --- a/src/plugins/friction_analyzer.js +++ b/src/plugins/friction_analyzer.js @@ -13,9 +13,19 @@ const REPEAT_THRESHOLD = 3; const RETRY_THRESHOLD = 3; const SIMILARITY_THRESHOLD = 0.5; +// Filler words carry no task identity but dilute Jaccard similarity — +// "another seating chart please" must cluster with "a seating chart for..." +const STOPWORDS = new Set([ + 'the', 'and', 'for', 'with', 'that', 'this', 'these', 'those', 'from', + 'into', 'onto', 'please', 'can', 'you', 'could', 'would', 'will', + 'another', 'again', 'new', 'now', 'just', 'some', 'all', 'any', + 'make', 'give', 'get', 'want', 'need', 'like', +]); + function _wordSet(text) { return new Set( - String(text || '').toLowerCase().split(/[^a-z0-9]+/).filter(w => w.length > 2) + String(text || '').toLowerCase().split(/[^a-z0-9]+/) + .filter(w => w.length > 2 && !STOPWORDS.has(w)) ); } diff --git a/test/evolver.test.js b/test/evolver.test.js index ffb42a7a..4fcf3d0e 100644 --- a/test/evolver.test.js +++ b/test/evolver.test.js @@ -128,6 +128,19 @@ test('three near-identical prompts flag a repeated pattern', () => { assert.deepEqual(r.repeated_patterns[0].traceIds.sort(), ['a1', 'a2', 'a3']); }); +test('rephrased prompts with filler-word drift still cluster (field regression)', () => { + // Exact prompts from a real session that failed to cluster before + // stopword filtering: the third drops the names and adds filler. + const traces = [ + trace('s1', 'generate a random seating chart for my classroom students Ana, Ben, Cara, Dan, Eli and Fay'), + trace('s2', 'generate a new random seating chart for the classroom students Ana, Ben, Cara, Dan, Eli and Fay'), + trace('s3', 'generate another random seating chart for my classroom students please'), + ]; + const r = extractFrictionSignals(traces); + assert.equal(r.repeated_patterns.length, 1); + assert.equal(r.repeated_patterns[0].count, 3); +}); + test('repeated pattern covered by an existing skill keyword is suppressed', () => { const traces = [ trace('a1', 'convert this csv file to json format'), From 4896c27ec01e6af18ec0428304ef3e400f896ce5 Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Sun, 7 Jun 2026 08:48:17 -0700 Subject: [PATCH 4/8] feat(skills): lazy index-first loading + use_skill tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SkillManager now reads only frontmatter on startup (_index Map) and loads bodies on demand via _loadBody(), cached in skills Map. This cuts per-turn skill injection from ~60k chars (all bodies) to ~240 chars (compact index) for a typical 30-skill install. New surface: getIndex() flat list, formatSkillIndex/formatSkillResult in skill_index_formatter.js, use_skill tool (executor + tools.js). getSkillContext() injects the index always; auto-matched bodies append after, subject to the existing 4000-char cap. Public API (get/list/getAutoSkills/formatForPrompt/add/remove/ promoteDraft/listDrafts) is unchanged — all 335 tests pass. Rejected: inject all bodies always | O(skills) context cost per turn Constraint: existing tests must pass unmodified Confidence: high Scope-risk: moderate Not-tested: live use_skill call by real model (requires interactive session) --- bin/executor.js | 18 ++ bin/smallcode.js | 25 ++- bin/tools.js | 1 + src/plugins/skill_index_formatter.js | 39 +++++ src/plugins/skills.js | 235 ++++++++++++++++++++------- test/skill_lazy.test.js | 190 ++++++++++++++++++++++ 6 files changed, 443 insertions(+), 65 deletions(-) create mode 100644 src/plugins/skill_index_formatter.js create mode 100644 test/skill_lazy.test.js diff --git a/bin/executor.js b/bin/executor.js index 87314a30..091aa28a 100644 --- a/bin/executor.js +++ b/bin/executor.js @@ -840,6 +840,24 @@ async function executeTool(name, args, ctx) { return { result: '' }; } + case 'use_skill': { + const skillManager = ctx.skillManager || null; + if (!skillManager) return { error: 'use_skill: skill system not available' }; + const skillName = String(args.name || '').trim(); + if (!skillName) return { error: 'use_skill: name is required' }; + const skill = skillManager.get(skillName); + if (!skill) { + const validNames = skillManager.getIndex().map(e => e.name).slice(0, 10); + return { error: `use_skill: skill "${skillName}" not found. Valid names: ${validNames.join(', ')}` }; + } + const { formatSkillResult } = require('../src/plugins/skill_index_formatter'); + const index = skillManager.getIndex(); + const relatedEntries = (skill.related || []) + .map(r => index.find(e => e.name === r)) + .filter(Boolean); + return { result: formatSkillResult(skill, relatedEntries) }; + } + case 'bone_compile': { const safe = safeResolvePath(args.path, cwd); if (!safe.ok) return { error: `bone_compile rejected: ${safe.reason}` }; diff --git a/bin/smallcode.js b/bin/smallcode.js index d2ac8768..c5be9c50 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -444,6 +444,7 @@ async function executeTool(name, args) { flags, config, tui, + skillManager, }); try { if (dedup) dedup.record(name, args, result); } catch {} @@ -2086,21 +2087,29 @@ function getMemoryContext(messages) { } } -// Auto-load relevant skills based on the user's message +// Auto-load relevant skills based on the user's message. // Fix #18: Cap skill injection to ~1000 tokens (4000 chars). Multiple matching // skills can each be a full .md file, quickly blowing up the system prompt. +// +// Lazy-skills: always inject the compact index (one line per skill, ~8 tokens each) +// so the model can call use_skill to pull any body on demand. Auto-matched skill +// bodies are appended after the index, subject to the 4000-char aggregate cap. function getSkillContext(messages) { if (!skillManager) return ''; try { + const { formatSkillIndex } = require('../src/plugins/skill_index_formatter'); + const index = skillManager.getIndex(); + const indexStr = formatSkillIndex(index); + const lastUser = [...messages].reverse().find(m => m.role === 'user'); - if (!lastUser) return ''; - const skills = skillManager.getAutoSkills(lastUser.content); - if (skills.length === 0) return ''; - const formatted = skillManager.formatForPrompt(skills); + const autoSkills = lastUser ? skillManager.getAutoSkills(lastUser.content) : []; + const autoFormatted = skillManager.formatForPrompt(autoSkills); + + const combined = indexStr + (autoFormatted ? '\n' + autoFormatted : ''); // Hard cap: truncate if too long - return formatted.length > 4000 - ? formatted.slice(0, 4000) + '\n... (skills truncated to fit context)' - : formatted; + return combined.length > 4000 + ? combined.slice(0, 4000) + '\n... (skills truncated to fit context)' + : combined; } catch { return ''; } diff --git a/bin/tools.js b/bin/tools.js index c34191a1..9682fd49 100644 --- a/bin/tools.js +++ b/bin/tools.js @@ -32,6 +32,7 @@ const TOOLS = [ { type: 'function', function: { name: 'contract_assert_pass', description: 'Mark a contract assertion as passed, with command-line evidence. Use the assertion id from contract_status (e.g. "a01"). evidence should be a short (<240 char) summary of what was run and what it returned.', parameters: { type: 'object', properties: { assertion_id: { type: 'string', description: 'Assertion id (e.g. a01)' }, evidence: { type: 'string', description: 'Short summary of command output proving the assertion holds' }, command: { type: 'string', description: 'The command run (optional)' }, exit_code: { type: 'integer', description: 'Exit code of the command (optional)' } }, required: ['assertion_id'] } } }, { type: 'function', function: { name: 'contract_assert_fail', description: 'Mark a contract assertion as failed, with evidence. Used when a check ran and the result was wrong — not for skipping checks.', parameters: { type: 'object', properties: { assertion_id: { type: 'string', description: 'Assertion id (e.g. a01)' }, evidence: { type: 'string', description: 'Short summary of why the check failed' }, command: { type: 'string', description: 'The command run (optional)' }, exit_code: { type: 'integer', description: 'Exit code of the command (optional)' } }, required: ['assertion_id', 'evidence'] } } }, { type: 'function', function: { name: 'contract_assert_skip', description: 'Mark an assertion as skipped (not applicable in current scope). Skipped assertions count as resolved for the done-guard.', parameters: { type: 'object', properties: { assertion_id: { type: 'string', description: 'Assertion id' }, reason: { type: 'string', description: 'Why this assertion is being skipped' } }, required: ['assertion_id', 'reason'] } } }, + { type: 'function', function: { name: 'use_skill', description: 'Load the full body of a skill by name. Use this when the skill index lists a skill relevant to your task. Returns the full skill content plus any related skill descriptions.', parameters: { type: 'object', properties: { name: { type: 'string', description: 'Skill name from the index' } }, required: ['name'] } } }, ]; // ─── Provider Tools ───────────────────────────────────────────────────────── diff --git a/src/plugins/skill_index_formatter.js b/src/plugins/skill_index_formatter.js new file mode 100644 index 00000000..defc4258 --- /dev/null +++ b/src/plugins/skill_index_formatter.js @@ -0,0 +1,39 @@ +'use strict'; + +// SmallCode — Skill index formatter +// Produces a compact index string (one line per skill, ~8 tokens each) suitable +// for always-injecting into the system prompt, plus a full-body formatter for +// use_skill results that includes related skill names/descriptions (not bodies). + +/** + * Format a flat index of skills — one line per skill. + * @param {Array<{name:string, description:string, trigger:string, keywords:string[]}>} entries + * @returns {string} + */ +function formatSkillIndex(entries) { + if (!entries || entries.length === 0) return ''; + const lines = entries.map(e => { + const kw = e.keywords && e.keywords.length ? ` [${e.keywords.join(',')}]` : ''; + const desc = e.description ? ` — ${e.description}` : ''; + return ` ${e.name}${desc}${kw}`; + }); + return '\n\nAvailable skills (call use_skill to load):\n' + lines.join('\n'); +} + +/** + * Format a loaded skill body for the use_skill response. + * Appends brief related-skill entries (name + description only, not body). + * @param {object} skill — {name, description, content, keywords, trigger} + * @param {Array<{name:string, description:string}>} relatedEntries — index entries for related skills + * @returns {string} + */ +function formatSkillResult(skill, relatedEntries) { + let out = `[skill:${skill.name}]\n${skill.content}`; + if (relatedEntries && relatedEntries.length > 0) { + const rel = relatedEntries.map(e => ` ${e.name}${e.description ? ' — ' + e.description : ''}`).join('\n'); + out += `\n\nRelated skills:\n${rel}`; + } + return out; +} + +module.exports = { formatSkillIndex, formatSkillResult }; diff --git a/src/plugins/skills.js b/src/plugins/skills.js index c4f5206a..68205b86 100644 --- a/src/plugins/skills.js +++ b/src/plugins/skills.js @@ -21,6 +21,10 @@ // previously skipped silently (closes #81). README-style files are ignored. // // Frontmatter accepts both LF and CRLF line endings (closes #52). +// +// Lazy loading: index entries (frontmatter only) are stored in _index Map. +// Bodies are loaded on demand via _loadBody(name) and cached into skills Map. +// getIndex() returns flat IndexEntry list for prompt injection. const fs = require('fs'); const path = require('path'); @@ -31,10 +35,16 @@ const KV_RE = /^(\w+)\s*:\s*(.+?)\s*$/; // Docs that live alongside skills but aren't skills themselves const NON_SKILL_MD = /^(readme|changelog|license|contributing)\.md$/i; +// Max bytes to scan for frontmatter before falling back to full read. +const FRONTMATTER_SCAN_BYTES = 2048; +// Max lines to scan for frontmatter end marker. +const FRONTMATTER_SCAN_LINES = 50; + class SkillManager { constructor(projectDir) { this.projectDir = projectDir || process.cwd(); - this.skills = new Map(); // name → skill object + this.skills = new Map(); // name → fully-loaded skill object (cached) + this._index = new Map(); // name → IndexEntry (frontmatter + path, no body) this._load(); } @@ -130,87 +140,191 @@ class SkillManager { this._ingestFile(skillFile, path.basename(skillFile), skillDir, name, 'nested'); } + // Read only enough of the file to extract frontmatter (index-only load). + // Returns { frontmatter: string|null, bodyStart: number } — bodyStart is + // the byte offset where the body begins (after the closing ---). + // Falls back to a full read when the file is small enough or frontmatter + // spans more than FRONTMATTER_SCAN_BYTES. + _readFrontmatterOnly(filePath) { + try { + // Read a limited slice first. + const fd = fs.openSync(filePath, 'r'); + const buf = Buffer.alloc(FRONTMATTER_SCAN_BYTES); + const bytesRead = fs.readSync(fd, buf, 0, FRONTMATTER_SCAN_BYTES, 0); + fs.closeSync(fd); + const chunk = buf.slice(0, bytesRead).toString('utf-8'); + + if (!chunk.startsWith('---')) { + // No frontmatter — full content is body; return null so caller full-reads. + return { frontmatter: null, hasMore: bytesRead === FRONTMATTER_SCAN_BYTES }; + } + + // Find closing --- within FRONTMATTER_SCAN_LINES lines + const lines = chunk.split(/\r?\n/); + let closeIdx = -1; + for (let i = 1; i < Math.min(lines.length, FRONTMATTER_SCAN_LINES); i++) { + if (lines[i].trimEnd() === '---') { closeIdx = i; break; } + } + if (closeIdx === -1) { + // Frontmatter not closed within scan window — fall back to full read. + return { frontmatter: null, hasMore: true }; + } + + const frontmatter = lines.slice(1, closeIdx).join('\n'); + return { frontmatter, hasMore: bytesRead === FRONTMATTER_SCAN_BYTES }; + } catch { + return { frontmatter: null, hasMore: false }; + } + } + _ingestFile(filePath, filename, dir, defaultName, origin) { + // Index-only path: read frontmatter cheaply, store as index entry. + // Body is loaded lazily on first get(). + const { frontmatter, hasMore } = this._readFrontmatterOnly(filePath); + + let meta = {}; + if (frontmatter !== null) { + meta = this._parseMeta(frontmatter); + } + + const name = meta.name || defaultName || filename.replace(/\.md$/i, ''); + + const entry = { + name, + trigger: meta.trigger || 'manual', + keywords: Array.isArray(meta.keywords) ? meta.keywords : [], + description: meta.description || '', + tags: Array.isArray(meta.tags) ? meta.tags : [], + related: Array.isArray(meta.related) ? meta.related : [], + path: filePath, + origin: origin || (defaultName ? 'nested' : 'flat'), + // hasFrontmatter: whether the file had a --- block + _hasFrontmatter: frontmatter !== null, + // If the file fits in our scan and has frontmatter, we know + // the body wasn't loaded yet. Track that. + _bodyLoaded: false, + }; + + this._index.set(name, entry); + // Remove any stale cached body for same name (precedence override) + this.skills.delete(name); + } + + _parseMeta(frontmatter) { + const meta = {}; + for (const rawLine of frontmatter.split(/\r?\n/)) { + const m = rawLine.match(KV_RE); + if (!m) continue; + let value = m[2].trim(); + if (value.startsWith('[') && value.endsWith(']')) { + value = value.slice(1, -1).split(',').map(s => s.trim().replace(/['"]/g, '')).filter(Boolean); + } + meta[m[1]] = value; + } + return meta; + } + + // Load the full body for a named skill, populate this.skills cache. + _loadBody(name) { + const entry = this._index.get(name); + if (!entry) return null; + if (entry._bodyLoaded && this.skills.has(name)) return this.skills.get(name); + let content; try { - content = fs.readFileSync(filePath, 'utf-8'); + content = fs.readFileSync(entry.path, 'utf-8'); } catch { - return; + return null; } - const skill = this._parse(content, filename, dir, defaultName, origin); - if (skill) this.skills.set(skill.name, skill); - } - _parse(content, filename, dir, defaultName, origin) { - // Parse YAML frontmatter (CRLF + LF tolerant — closes #52) const fmMatch = content.match(FM_RE); - let frontmatter = ''; let body = content; + let meta = {}; if (fmMatch) { - frontmatter = fmMatch[1]; + meta = this._parseMeta(fmMatch[1]); body = fmMatch[2]; - } else if (!defaultName) { - // Files without frontmatter and no derivable name aren't skills. - // Flat + nested loaders always pass a defaultName, so frontmatter-less - // files load as manual skills (closes #81); README-style files are - // filtered by name in _loadFlat. - return null; - } - - // Tiny YAML parser — no dep needed - const meta = {}; - if (frontmatter) { - for (const rawLine of frontmatter.split(/\r?\n/)) { - const m = rawLine.match(KV_RE); - if (!m) continue; - let value = m[2].trim(); - if (value.startsWith('[') && value.endsWith(']')) { - value = value.slice(1, -1).split(',').map(s => s.trim().replace(/['"]/g, '')).filter(Boolean); - } - meta[m[1]] = value; - } + } else if (!entry._hasFrontmatter) { + // No frontmatter — full file is body (manual trigger, named by filename/dir) + body = content; } - return { - name: meta.name || defaultName || filename.replace(/\.md$/i, ''), - trigger: meta.trigger || 'manual', - keywords: Array.isArray(meta.keywords) ? meta.keywords : [], + const skill = { + name: meta.name || entry.name, + trigger: meta.trigger || entry.trigger, + keywords: Array.isArray(meta.keywords) ? meta.keywords : entry.keywords, + description: meta.description || entry.description || '', + tags: Array.isArray(meta.tags) ? meta.tags : entry.tags, + related: Array.isArray(meta.related) ? meta.related : entry.related, content: body.trim(), - path: path.join(dir, filename), - origin: origin || (defaultName ? 'nested' : 'flat'), + path: entry.path, + origin: entry.origin, }; + + entry._bodyLoaded = true; + this.skills.set(name, skill); + return skill; } - // Get all skills + // Get all skills — returns index entries with lazy-loaded bodies for callers + // that need content. list() does NOT load bodies (index only). list() { - return [...this.skills.values()].map(s => ({ - name: s.name, - trigger: s.trigger, - keywords: s.keywords, - preview: s.content.slice(0, 80) + (s.content.length > 80 ? '...' : ''), - origin: s.origin || 'flat', + return [...this._index.values()].map(e => ({ + name: e.name, + trigger: e.trigger, + keywords: e.keywords, + preview: this._getPreview(e), + origin: e.origin || 'flat', })); } - // Get a skill by name + _getPreview(entry) { + // Return preview from cached body if available; otherwise a short placeholder. + if (entry._bodyLoaded && this.skills.has(entry.name)) { + const body = this.skills.get(entry.name).content; + return body.slice(0, 80) + (body.length > 80 ? '...' : ''); + } + // Avoid loading body just for list() — return description or empty + return entry.description || ''; + } + + // Get a skill by name — lazily loads body on first call. get(name) { - return this.skills.get(name) || null; + if (this.skills.has(name)) return this.skills.get(name); + if (!this._index.has(name)) return null; + return this._loadBody(name); } - // Get skills that should auto-inject for a given message + // Get skills that should auto-inject for a given message. + // Only checks index entries (trigger/keywords) — avoids loading bodies + // until caller needs content. getAutoSkills(message) { const msg = (message || '').toLowerCase(); const results = []; - for (const skill of this.skills.values()) { - if (skill.trigger === 'auto') { - results.push(skill); - } else if (skill.trigger === 'match' && skill.keywords.length > 0) { - const match = skill.keywords.some(kw => msg.includes(String(kw).toLowerCase())); - if (match) results.push(skill); + for (const entry of this._index.values()) { + if (entry.trigger === 'auto') { + results.push(this._loadBody(entry.name)); + } else if (entry.trigger === 'match' && entry.keywords.length > 0) { + const match = entry.keywords.some(kw => msg.includes(String(kw).toLowerCase())); + if (match) results.push(this._loadBody(entry.name)); } } - return results; + return results.filter(Boolean); + } + + // Return flat IndexEntry list for prompt injection (no bodies loaded). + // { name, description, trigger, keywords, tags, related, path, origin } + getIndex() { + return [...this._index.values()].map(e => ({ + name: e.name, + description: e.description, + trigger: e.trigger, + keywords: e.keywords, + tags: e.tags, + related: e.related, + path: e.path, + origin: e.origin, + })); } // Create a new skill in the project's .smallcode/skills directory @@ -239,10 +353,16 @@ class SkillManager { name, trigger, keywords, + description: options.description || '', + tags: options.tags || [], + related: options.related || [], content, path: filePath, origin: 'flat', + _hasFrontmatter: true, + _bodyLoaded: true, }; + this._index.set(name, skill); this.skills.set(name, skill); return skill; } @@ -276,11 +396,12 @@ class SkillManager { // Remove a skill remove(name) { - const skill = this.skills.get(name); - if (!skill) return false; - if (fs.existsSync(skill.path)) { - try { fs.unlinkSync(skill.path); } catch {} + const entry = this._index.get(name) || this.skills.get(name); + if (!entry) return false; + if (fs.existsSync(entry.path)) { + try { fs.unlinkSync(entry.path); } catch {} } + this._index.delete(name); this.skills.delete(name); return true; } diff --git a/test/skill_lazy.test.js b/test/skill_lazy.test.js new file mode 100644 index 00000000..59da7938 --- /dev/null +++ b/test/skill_lazy.test.js @@ -0,0 +1,190 @@ +'use strict'; + +// SmallCode — Lazy skill loading tests +// Verifies index-first SkillManager, lazy body loading, getIndex() fields, +// formatter output, and backward compatibility with existing callers. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const { SkillManager } = require('../src/plugins/skills'); +const { formatSkillIndex, formatSkillResult } = require('../src/plugins/skill_index_formatter'); + +function freshProject() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'sc-lazy-')); +} + +function write(file, content) { + fs.mkdirSync(path.dirname(file), { recursive: true }); + fs.writeFileSync(file, content); +} + +// ── Index-only startup ──────────────────────────────────────────────────────── + +test('index is populated on construction without loading bodies', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'alpha.md'), + '---\nname: alpha\ntrigger: manual\ndescription: does alpha things\n---\nbody text here'); + + const sm = new SkillManager(dir); + // _index must have the entry + assert.ok(sm._index.has('alpha'), '_index should have alpha'); + // skills (body cache) should NOT have it yet + assert.ok(!sm.skills.has('alpha'), 'body cache should be empty before get()'); +}); + +test('getIndex() returns expected fields without loading bodies', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'beta.md'), + '---\nname: beta\ntrigger: match\nkeywords: [foo, bar]\ndescription: beta desc\ntags: [t1]\nrelated: [alpha]\n---\nbeta body'); + + const sm = new SkillManager(dir); + const idx = sm.getIndex(); + const entry = idx.find(e => e.name === 'beta'); + assert.ok(entry, 'getIndex should return beta'); + assert.equal(entry.name, 'beta'); + assert.equal(entry.description, 'beta desc'); + assert.equal(entry.trigger, 'match'); + assert.deepEqual(entry.keywords, ['foo', 'bar']); + assert.deepEqual(entry.tags, ['t1']); + assert.deepEqual(entry.related, ['alpha']); + assert.ok(entry.path); + assert.equal(entry.origin, 'flat'); + // Body should still not be loaded + assert.ok(!sm.skills.has('beta')); +}); + +// ── Lazy get() ──────────────────────────────────────────────────────────────── + +test('get() lazily loads body on first call', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'lazy.md'), + '---\nname: lazy\ntrigger: manual\n---\nthe lazy body content'); + + const sm = new SkillManager(dir); + assert.ok(!sm.skills.has('lazy'), 'body not loaded yet'); + const skill = sm.get('lazy'); + assert.ok(skill, 'get() returns the skill'); + assert.match(skill.content, /the lazy body content/); + assert.ok(sm.skills.has('lazy'), 'body is cached after get()'); +}); + +test('get() caches: second call returns same object', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'cached.md'), + '---\nname: cached\ntrigger: manual\n---\ncached body'); + + const sm = new SkillManager(dir); + const first = sm.get('cached'); + const second = sm.get('cached'); + assert.strictEqual(first, second, 'should return same cached object'); +}); + +test('get() returns null for unknown skill', () => { + const dir = freshProject(); + const sm = new SkillManager(dir); + assert.equal(sm.get('nonexistent'), null); +}); + +// ── Backward compat: public API unchanged ───────────────────────────────────── + +test('list() returns entries with name/trigger/keywords/origin', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'listme.md'), + '---\nname: listme\ntrigger: auto\nkeywords: [x]\n---\nlist body'); + + const sm = new SkillManager(dir); + const items = sm.list(); + const item = items.find(i => i.name === 'listme'); + assert.ok(item); + assert.equal(item.trigger, 'auto'); + assert.deepEqual(item.keywords, ['x']); + assert.equal(item.origin, 'flat'); + // list() should NOT load bodies + assert.ok(!sm.skills.has('listme')); +}); + +test('getAutoSkills() loads bodies only for matched skills', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'always.md'), + '---\nname: always\ntrigger: auto\n---\nauto body'); + write(path.join(dir, '.smallcode', 'skills', 'keyword.md'), + '---\nname: keyword\ntrigger: match\nkeywords: [deploy]\n---\ndeploy body'); + write(path.join(dir, '.smallcode', 'skills', 'nomatch.md'), + '---\nname: nomatch\ntrigger: match\nkeywords: [unrelated]\n---\nnomatch body'); + + const sm = new SkillManager(dir); + const result = sm.getAutoSkills('please deploy the app'); + const names = result.map(s => s.name).sort(); + assert.deepEqual(names, ['always', 'keyword']); + // nomatch should not be loaded + assert.ok(!sm.skills.has('nomatch')); +}); + +// ── Formatter ──────────────────────────────────────────────────────────────── + +test('formatSkillIndex produces one line per skill', () => { + const entries = [ + { name: 'foo', description: 'does foo', trigger: 'manual', keywords: [] }, + { name: 'bar', description: 'does bar', trigger: 'match', keywords: ['baz'] }, + ]; + const out = formatSkillIndex(entries); + assert.ok(out.includes('foo')); + assert.ok(out.includes('bar')); + // Each skill on its own line + const lines = out.split('\n').filter(l => l.includes('foo') || l.includes('bar')); + assert.equal(lines.length, 2); +}); + +test('formatSkillIndex returns empty string for no entries', () => { + assert.equal(formatSkillIndex([]), ''); + assert.equal(formatSkillIndex(null), ''); +}); + +test('formatSkillResult includes body and related names', () => { + const skill = { name: 'main', description: '', content: 'main body content', keywords: [], trigger: 'manual' }; + const related = [ + { name: 'other', description: 'the other skill' }, + ]; + const out = formatSkillResult(skill, related); + assert.ok(out.includes('main body content')); + assert.ok(out.includes('other')); + assert.ok(out.includes('the other skill')); +}); + +test('formatSkillResult with no related entries', () => { + const skill = { name: 's', content: 'solo body', keywords: [], trigger: 'manual', description: '' }; + const out = formatSkillResult(skill, []); + assert.ok(out.includes('solo body')); + assert.ok(!out.includes('Related skills')); +}); + +// ── New frontmatter fields backward compat ──────────────────────────────────── + +test('skills without description/tags/related still load correctly', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'plain.md'), + '---\nname: plain\ntrigger: manual\n---\njust a plain body'); + + const sm = new SkillManager(dir); + const skill = sm.get('plain'); + assert.ok(skill); + assert.equal(skill.description, ''); + assert.deepEqual(skill.tags, []); + assert.deepEqual(skill.related, []); + assert.match(skill.content, /just a plain body/); +}); + +test('add() works and skill is in index immediately', () => { + const dir = freshProject(); + const sm = new SkillManager(dir); + sm.add('added', 'added content', { trigger: 'auto', description: 'an added skill' }); + + assert.ok(sm._index.has('added')); + const skill = sm.get('added'); + assert.ok(skill); + assert.match(skill.content, /added content/); +}); From 406c4fbae1e6d9c11aafc66f71dc6b9fadb9a45c Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Sun, 7 Jun 2026 09:09:24 -0700 Subject: [PATCH 5/8] fix(skills): route use_skill through tool category filters use_skill was defined in TOOLS but absent from both routers' category whitelists, so the model never saw it in routed mode. The skill index is injected every turn, so the tool rides along in every tool-bearing category (~80 tokens). --- src/compiled/tool_router.js | 19 +++++++++++-------- src/tools/two_stage_router.js | 10 +++++++--- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/compiled/tool_router.js b/src/compiled/tool_router.js index 10371a56..3b39d6d1 100644 --- a/src/compiled/tool_router.js +++ b/src/compiled/tool_router.js @@ -228,25 +228,28 @@ function classifyToolCategory(message) { * @returns {string[]} tool names to include in the prompt */ function getToolsForCategory(category) { + // use_skill rides along in every tool-bearing category — the skill index + // is injected on every turn, so the model must always be able to pull a + // skill body regardless of how the task was classified (~80 token cost). switch (category) { case 'code_intel': - return ['graph_search', 'explain_symbol', 'read_file', 'find_files', 'search', 'hybrid_search']; + return ['graph_search', 'explain_symbol', 'read_file', 'find_files', 'search', 'hybrid_search', 'use_skill']; case 'read': - return ['read_file', 'list_projects', 'graph_search', 'find_files', 'find_and_read']; + return ['read_file', 'list_projects', 'graph_search', 'find_files', 'find_and_read', 'use_skill']; case 'write': - return ['read_file', 'write_file', 'patch', 'bash', 'read_and_patch', 'create_and_run']; + return ['read_file', 'write_file', 'patch', 'bash', 'read_and_patch', 'create_and_run', 'use_skill']; case 'search': - return ['search', 'find_files', 'graph_search', 'read_file', 'explain_symbol', 'search_and_read', 'hybrid_search']; + return ['search', 'find_files', 'graph_search', 'read_file', 'explain_symbol', 'search_and_read', 'hybrid_search', 'use_skill']; case 'run': - return ['bash', 'run', 'read_file']; + return ['bash', 'run', 'read_file', 'use_skill']; case 'plan': - return ['read_file', 'write_file', 'patch', 'bash', 'search', 'find_files', 'graph_search', 'memory_load', 'memory_remember', 'bone_compile', 'bone_check', 'read_and_patch', 'create_and_run', 'find_and_read', 'search_and_read']; + return ['read_file', 'write_file', 'patch', 'bash', 'search', 'find_files', 'graph_search', 'memory_load', 'memory_remember', 'bone_compile', 'bone_check', 'read_and_patch', 'create_and_run', 'find_and_read', 'search_and_read', 'use_skill']; case 'web': - return ['web_search', 'web_fetch', 'read_file']; + return ['web_search', 'web_fetch', 'read_file', 'use_skill']; case 'respond': return []; // No tools needed for pure responses default: - return ['read_file', 'write_file', 'patch', 'bash', 'search']; + return ['read_file', 'write_file', 'patch', 'bash', 'search', 'use_skill']; } } diff --git a/src/tools/two_stage_router.js b/src/tools/two_stage_router.js index cd5f4e41..1caf24ec 100644 --- a/src/tools/two_stage_router.js +++ b/src/tools/two_stage_router.js @@ -28,11 +28,15 @@ const TOOL_CATEGORIES = { tools: ['bash', 'run'], }, plan: { - description: 'Load/save project memory, BoneScript compile/check', - tools: ['memory_load', 'memory_remember', 'bone_compile', 'bone_check'], + description: 'Load/save project memory, load skills, BoneScript compile/check', + tools: ['memory_load', 'memory_remember', 'use_skill', 'bone_compile', 'bone_check'], }, }; +// Cross-cutting tools appended to every category in Stage 2 — the skill +// index is injected on every turn, so use_skill must always be callable. +const ALWAYS_TOOLS = ['use_skill']; + /** * Determine routing mode based on model's context window. * @param {number} contextWindow - Model's context length in tokens @@ -80,7 +84,7 @@ function getCategorySelectorTool() { function getToolsForCategory(category, allTools) { const cat = TOOL_CATEGORIES[category]; if (!cat) return allTools; // Unknown category, fall back to all - return allTools.filter(t => cat.tools.includes(t.function.name)); + return allTools.filter(t => cat.tools.includes(t.function.name) || ALWAYS_TOOLS.includes(t.function.name)); } /** From 7980c959eb966b0c1080234ec83b48c3d709c992 Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Sun, 7 Jun 2026 08:54:39 -0700 Subject: [PATCH 6/8] feat(memory): hygiene tiers + MEMORY.md index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Memory objects gain tier (hot|archive) and last_used_at fields (backward-compat: backfilled on first hygiene run). runHygiene() sweeps: hot+unused>60d→archive, archive>90d→forget, hot>20→archive oldest 5. Adapter layer handles both SQLite budget-aware-mcp (via update()) and fallback MemoryStore (mutate+save) without touching node_modules. Auto-runs silently (try-catch) at 3 session-save points. /memory hygiene and /memory index subcommands added to commands.js. Generated .smallcode/MEMORY.md is human-readable + git-diffable; never authoritative. Rejected: markdown-tier replacement | loses FTS5/BM25 Rejected: hybrid two-source write | inconsistency risk Constraint: do not modify node_modules/budget-aware-mcp Confidence: high Scope-risk: narrow Not-tested: budget-aware-mcp setMeta path (no setMeta exists — update() used instead) --- bin/commands.js | 37 +++++- bin/memory.js | 5 +- bin/smallcode.js | 3 + src/memory/hygiene.js | 242 ++++++++++++++++++++++++++++++++++++ test/memory_hygiene.test.js | 182 +++++++++++++++++++++++++++ 5 files changed, 466 insertions(+), 3 deletions(-) create mode 100644 src/memory/hygiene.js create mode 100644 test/memory_hygiene.test.js diff --git a/bin/commands.js b/bin/commands.js index e1a7e11c..184fb7b4 100644 --- a/bin/commands.js +++ b/bin/commands.js @@ -307,9 +307,42 @@ module.exports = function createCommandHandler(config, conversationHistory, impr } catch (e) { console.log(chalk.gray(` Error: ${e.message}`)); } + } else if (sub === 'hygiene') { + try { + const { runHygiene } = require('../src/memory/hygiene'); + const result = runHygiene(memoryStore); + console.log(chalk.green(` ✓ Hygiene complete: ${result.archived} archived, ${result.deleted} deleted`)); + // Also write MEMORY.md index + const { renderMemoryIndex } = require('../src/memory/hygiene'); + const md = renderMemoryIndex(memoryStore); + const fs = require('fs'); + const path = require('path'); + const outDir = path.join(process.cwd(), '.smallcode'); + if (!fs.existsSync(outDir)) fs.mkdirSync(outDir, { recursive: true }); + fs.writeFileSync(path.join(outDir, 'MEMORY.md'), md); + console.log(chalk.gray(` Wrote .smallcode/MEMORY.md (${memoryStore.all().length} entries)`)); + } catch (e) { + console.log(chalk.gray(` Hygiene error: ${e.message}`)); + } + } else if (sub === 'index') { + try { + const { renderMemoryIndex } = require('../src/memory/hygiene'); + const md = renderMemoryIndex(memoryStore); + const fs = require('fs'); + const path = require('path'); + const outDir = path.join(process.cwd(), '.smallcode'); + if (!fs.existsSync(outDir)) fs.mkdirSync(outDir, { recursive: true }); + fs.writeFileSync(path.join(outDir, 'MEMORY.md'), md); + console.log(chalk.green(` ✓ Wrote .smallcode/MEMORY.md`)); + console.log(md.split('\n').slice(0, 10).map(l => ' ' + l).join('\n')); + } catch (e) { + console.log(chalk.gray(` Index error: ${e.message}`)); + } } else { - console.log(chalk.gray(' /memory List stored memory')); - console.log(chalk.gray(' /memory clear Clear all memory')); + console.log(chalk.gray(' /memory List stored memory')); + console.log(chalk.gray(' /memory clear Clear all memory')); + console.log(chalk.gray(' /memory hygiene Sweep tiers, prune stale entries, write MEMORY.md')); + console.log(chalk.gray(' /memory index Write .smallcode/MEMORY.md without sweeping')); } console.log(''); rl.prompt(); diff --git a/bin/memory.js b/bin/memory.js index fbe842d1..9183be2f 100644 --- a/bin/memory.js +++ b/bin/memory.js @@ -20,7 +20,7 @@ const INDEX_FILE = '.smallcode/memory/index.json'; // ─── Memory Object ─────────────────────────────────────────────────────────── class MemoryObject { - constructor({ id, type, title, content, tags, relations, createdAt, updatedAt, source }) { + constructor({ id, type, title, content, tags, relations, createdAt, updatedAt, source, tier, last_used_at }) { this.id = id || crypto.randomUUID().slice(0, 8); this.type = type; // decision | workflow | gotcha | convention | context | source this.title = title; @@ -30,6 +30,8 @@ class MemoryObject { this.createdAt = createdAt || new Date().toISOString(); this.updatedAt = updatedAt || new Date().toISOString(); this.source = source || null; // { file, line, commit } + this.tier = tier || 'hot'; // hot | archive + this.last_used_at = last_used_at || this.createdAt; } toJSON() { @@ -37,6 +39,7 @@ class MemoryObject { id: this.id, type: this.type, title: this.title, content: this.content, tags: this.tags, relations: this.relations, createdAt: this.createdAt, updatedAt: this.updatedAt, source: this.source, + tier: this.tier, last_used_at: this.last_used_at, }; } } diff --git a/bin/smallcode.js b/bin/smallcode.js index c5be9c50..3df128ff 100755 --- a/bin/smallcode.js +++ b/bin/smallcode.js @@ -287,6 +287,7 @@ async function runTUI(config) { onCommand: async (cmd) => { if (cmd === '/quit' || cmd === '/q' || cmd === '/exit') { if (sessionStore) sessionStore.save(conversationHistory, { tokens: tokenTracker ? tokenTracker.stats() : undefined }); + try { if (memoryStore) { const { runHygiene } = require('../src/memory/hygiene'); runHygiene(memoryStore); } } catch {} screen.leave(); killMCP() process.exit(0); @@ -318,6 +319,7 @@ async function runTUI(config) { if (sessionStore) { sessionStore.save(conversationHistory, { tokens: tokenTracker ? tokenTracker.stats() : undefined }); } + try { if (memoryStore) { const { runHygiene } = require('../src/memory/hygiene'); runHygiene(memoryStore); } } catch {} killMCP() process.exit(0); }, @@ -2555,6 +2557,7 @@ async function chatCompletion(config, messages) { }); sessionStore.autoTitle(conversationHistory); } + try { if (memoryStore) { const { runHygiene } = require('../src/memory/hygiene'); runHygiene(memoryStore); } } catch {} return data; } catch (err) { diff --git a/src/memory/hygiene.js b/src/memory/hygiene.js new file mode 100644 index 00000000..c84c206e --- /dev/null +++ b/src/memory/hygiene.js @@ -0,0 +1,242 @@ +'use strict'; + +// SmallCode — Memory Hygiene +// Promotes memory objects to hot/archive tiers and prunes stale entries. +// Runs silently at session-save points; never throws — all errors are swallowed. +// +// Tier model: +// hot — actively used; default for new entries +// archive — dormant; de-ranked in retrieval (0.3x weight) +// +// Age rules (applied in order): +// hot + last_used_at > HOT_CAP_AGE_DAYS → archive +// archive + age > DELETE_AGE_DAYS → forget +// hot count > HOT_CAP → oldest BATCH → archive + +const fs = require('fs'); +const path = require('path'); + +const HOT_CAP = 20; // max hot-tier entries +const BATCH = 5; // how many to archive per cap sweep +const ARCHIVE_AGE = 60; // days unused before hot → archive +const DELETE_AGE = 90; // days in archive before deletion +const MS_PER_DAY = 86400000; + +/** + * Normalize a store to a common interface regardless of whether it's the + * SQLite budget-aware-mcp store or the fallback MemoryStore from bin/memory.js. + * + * Returns { all, getMeta, setMeta, forget } where: + * all() → MemoryObject[] + * getMeta(obj) → { tier, last_used_at } + * setMeta(obj, m) → void (mutates in-place for fallback; updates DB for SQLite) + * forget(id) → void + */ +function makeAdapter(store) { + const isSqlite = typeof store.update === 'function'; + + function all() { + return store.all(); + } + + function getMeta(obj) { + return { + tier: obj.tier || 'hot', + last_used_at: obj.last_used_at || obj.createdAt || obj.created_at || new Date(0).toISOString(), + }; + } + + function setMeta(obj, meta) { + if (isSqlite) { + // SQLite store has update() — use it to avoid the forget+remember dedup + // trap (re-inserting identical content is blocked by content_hash check). + // We encode tier/last_used_at into the tags array so no schema change is + // needed on budget-aware-mcp. + try { + const existingTags = (obj.tags || []).filter(t => !t.startsWith('tier:') && !t.startsWith('last_used:')); + const newTags = [ + ...existingTags, + `tier:${meta.tier}`, + `last_used:${meta.last_used_at}`, + ]; + store.update(obj.id, { tags: newTags }); + } catch {} + } else { + // Fallback MemoryStore (bin/memory.js): mutate in-place and save. + obj.tier = meta.tier; + obj.last_used_at = meta.last_used_at; + if (typeof store.save === 'function') { + try { store.save(); } catch {} + } + } + } + + function forget(id) { + try { store.forget(id); } catch {} + } + + return { all, getMeta, setMeta, forget }; +} + +/** + * Extract tier/last_used_at from a memory object regardless of store type. + * For SQLite stores we encode these values in tags as 'tier:X' and 'last_used:ISO'. + */ +function extractMeta(obj) { + // Try direct properties first (fallback MemoryStore) + if (obj.tier && obj.last_used_at) { + return { tier: obj.tier, last_used_at: obj.last_used_at }; + } + // Try tags encoding (SQLite store) + const tags = obj.tags || []; + let tier = 'hot'; + let last_used_at = obj.createdAt || obj.created_at || new Date(0).toISOString(); + for (const t of tags) { + if (t.startsWith('tier:')) tier = t.slice(5); + if (t.startsWith('last_used:')) last_used_at = t.slice(10); + } + return { tier, last_used_at }; +} + +/** + * Run hygiene on the store. Silent: never throws. + * + * @param {object} store — MemoryStore or budget-aware-mcp store + * @param {object} [opts] + * @param {number} [opts.hotCap=20] + * @param {number} [opts.batch=5] + * @param {number} [opts.archiveAge=60] days + * @param {number} [opts.deleteAge=90] days + * @returns {{ archived: number, deleted: number, total: number }} + */ +function runHygiene(store, opts = {}) { + const hotCap = opts.hotCap ?? HOT_CAP; + const batch = opts.batch ?? BATCH; + const archiveAge = opts.archiveAge ?? ARCHIVE_AGE; + const deleteAge = opts.deleteAge ?? DELETE_AGE; + + let archived = 0; + let deleted = 0; + + try { + const adapter = makeAdapter(store); + const now = Date.now(); + const objects = adapter.all(); + + // Backfill: assign hot tier + last_used_at to any entry that lacks them. + for (const obj of objects) { + const m = extractMeta(obj); + if (!obj.tier && !obj.tags?.some(t => t.startsWith('tier:'))) { + adapter.setMeta(obj, { + tier: 'hot', + last_used_at: m.last_used_at, + }); + } + } + + // Re-read after backfill so we have fresh state. + const fresh = adapter.all(); + + // ── Age sweep ──────────────────────────────────────────────────────────── + for (const obj of fresh) { + const { tier, last_used_at } = extractMeta(obj); + const ageMs = now - new Date(last_used_at).getTime(); + const ageDays = ageMs / MS_PER_DAY; + + if (tier === 'hot' && ageDays > archiveAge) { + adapter.setMeta(obj, { tier: 'archive', last_used_at }); + archived++; + } else if (tier === 'archive' && ageDays > deleteAge) { + adapter.forget(obj.id); + deleted++; + } + } + + // ── Cap sweep ──────────────────────────────────────────────────────────── + // Re-read to get up-to-date list (age sweep may have archived some). + const afterAge = adapter.all().filter(obj => { + const { tier } = extractMeta(obj); + return tier === 'hot'; + }); + + if (afterAge.length > hotCap) { + // Sort by last_used_at ascending (oldest first) + afterAge.sort((a, b) => { + const { last_used_at: la } = extractMeta(a); + const { last_used_at: lb } = extractMeta(b); + return new Date(la).getTime() - new Date(lb).getTime(); + }); + const toArchive = afterAge.slice(0, batch); + for (const obj of toArchive) { + const { last_used_at } = extractMeta(obj); + adapter.setMeta(obj, { tier: 'archive', last_used_at }); + archived++; + } + } + } catch { + // Hygiene must never crash the session. + } + + return { archived, deleted, total: archived + deleted }; +} + +/** + * Render a human-readable memory index to a markdown string. + * Hot entries come before archive. Grouped by type within each tier. + * This file is GENERATED — never authoritative. + * + * @param {object} store + * @returns {string} + */ +function renderMemoryIndex(store) { + try { + const objects = store.all(); + if (objects.length === 0) return '# Memory Index\n\n(empty)\n'; + + const hot = []; + const archive = []; + for (const obj of objects) { + const { tier } = extractMeta(obj); + if (tier === 'archive') archive.push(obj); + else hot.push(obj); + } + + function groupByType(objs) { + const groups = {}; + for (const o of objs) { + if (!groups[o.type]) groups[o.type] = []; + groups[o.type].push(o); + } + return groups; + } + + function renderGroup(groups) { + let out = ''; + for (const [type, objs] of Object.entries(groups)) { + out += `\n### ${type} (${objs.length})\n`; + for (const o of objs) { + out += `- [${o.id}] **${o.title}**\n`; + } + } + return out; + } + + let md = `# Memory Index\n\nGenerated: ${new Date().toISOString()}\n`; + md += `Total: ${objects.length} (hot: ${hot.length}, archive: ${archive.length})\n`; + + if (hot.length > 0) { + md += '\n## Hot\n'; + md += renderGroup(groupByType(hot)); + } + if (archive.length > 0) { + md += '\n## Archive\n'; + md += renderGroup(groupByType(archive)); + } + + return md; + } catch { + return '# Memory Index\n\n(error rendering)\n'; + } +} + +module.exports = { runHygiene, renderMemoryIndex, extractMeta }; diff --git a/test/memory_hygiene.test.js b/test/memory_hygiene.test.js new file mode 100644 index 00000000..af5f94e4 --- /dev/null +++ b/test/memory_hygiene.test.js @@ -0,0 +1,182 @@ +'use strict'; + +// SmallCode — Memory hygiene tests +// Verifies age/cap sweeps, backfill, index render, no-op empty, round-trip. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const { runHygiene, renderMemoryIndex, extractMeta } = require('../src/memory/hygiene'); +const { MemoryStore } = require('../bin/memory'); + +function freshStore() { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'sc-hygiene-')); + return new MemoryStore(dir); +} + +function daysAgo(n) { + return new Date(Date.now() - n * 86400000).toISOString(); +} + +// ── No-op on empty store ────────────────────────────────────────────────────── + +test('runHygiene on empty store returns zeros', () => { + const store = freshStore(); + const result = runHygiene(store); + assert.equal(result.archived, 0); + assert.equal(result.deleted, 0); + assert.equal(result.total, 0); +}); + +// ── Backfill ───────────────────────────────────────────────────────────────── + +test('runHygiene backfills tier=hot and last_used_at on old entries', () => { + const store = freshStore(); + // Remember without tier/last_used_at (old-format entry) + const obj = store.remember('decision', 'old entry', 'content', {}); + // Strip tier/last_used_at to simulate pre-hygiene entry + obj.tier = undefined; + obj.last_used_at = undefined; + store.save(); // persist mutated object + + runHygiene(store, { archiveAge: 9999, deleteAge: 9999 }); + + const objs = store.all(); + const { tier } = extractMeta(objs[0]); + assert.equal(tier, 'hot'); +}); + +// ── Age sweep: hot → archive ────────────────────────────────────────────────── + +test('hot entry unused > archiveAge is moved to archive', () => { + const store = freshStore(); + const obj = store.remember('gotcha', 'stale hot', 'content', {}); + // Force last_used_at to 70 days ago + obj.last_used_at = daysAgo(70); + obj.tier = 'hot'; + store.save(); + + const result = runHygiene(store, { archiveAge: 60, deleteAge: 90 }); + assert.equal(result.archived, 1); + + const { tier } = extractMeta(store.all()[0]); + assert.equal(tier, 'archive'); +}); + +test('hot entry within archiveAge is NOT archived', () => { + const store = freshStore(); + const obj = store.remember('context', 'fresh entry', 'content', {}); + obj.last_used_at = daysAgo(5); + obj.tier = 'hot'; + store.save(); + + const result = runHygiene(store, { archiveAge: 60, deleteAge: 90 }); + assert.equal(result.archived, 0); + assert.equal(result.deleted, 0); +}); + +// ── Age sweep: archive → delete ─────────────────────────────────────────────── + +test('archive entry older than deleteAge is deleted', () => { + const store = freshStore(); + const obj = store.remember('workflow', 'ancient archive', 'content', {}); + obj.last_used_at = daysAgo(100); + obj.tier = 'archive'; + store.save(); + + const result = runHygiene(store, { archiveAge: 60, deleteAge: 90 }); + assert.equal(result.deleted, 1); + assert.equal(store.all().length, 0); +}); + +test('archive entry within deleteAge is NOT deleted', () => { + const store = freshStore(); + const obj = store.remember('workflow', 'recent archive', 'content', {}); + obj.last_used_at = daysAgo(65); + obj.tier = 'archive'; + store.save(); + + const result = runHygiene(store, { archiveAge: 60, deleteAge: 90 }); + assert.equal(result.deleted, 0); + // May or may not archive again based on whether it's already archive + assert.equal(store.all().length, 1); +}); + +// ── Cap sweep ──────────────────────────────────────────────────────────────── + +test('cap sweep archives oldest entries when hot > hotCap', () => { + const store = freshStore(); + // Create 6 hot entries with varying last_used_at, cap=4, batch=2 + for (let i = 0; i < 6; i++) { + const obj = store.remember('convention', `entry-${i}`, `content ${i}`, {}); + obj.last_used_at = daysAgo(i * 2); // older entries have higher i + obj.tier = 'hot'; + store.save(); + } + + const result = runHygiene(store, { hotCap: 4, batch: 2, archiveAge: 9999, deleteAge: 9999 }); + assert.equal(result.archived, 2); + + const all = store.all(); + const archived = all.filter(o => extractMeta(o).tier === 'archive'); + assert.equal(archived.length, 2); + // The 2 oldest should be archived + const archivedNames = archived.map(o => o.title).sort(); + assert.ok(archivedNames.includes('entry-4') || archivedNames.includes('entry-5')); +}); + +// ── No-op when under cap ───────────────────────────────────────────────────── + +test('cap sweep is no-op when hot count <= hotCap', () => { + const store = freshStore(); + const obj = store.remember('decision', 'single entry', 'content', {}); + obj.tier = 'hot'; + obj.last_used_at = daysAgo(1); + store.save(); + + const result = runHygiene(store, { hotCap: 10, batch: 5, archiveAge: 9999, deleteAge: 9999 }); + assert.equal(result.archived, 0); +}); + +// ── renderMemoryIndex ───────────────────────────────────────────────────────── + +test('renderMemoryIndex returns empty marker for empty store', () => { + const store = freshStore(); + const md = renderMemoryIndex(store); + assert.ok(md.includes('empty')); +}); + +test('renderMemoryIndex groups by tier then type', () => { + const store = freshStore(); + const h = store.remember('decision', 'hot entry', 'content', {}); + h.tier = 'hot'; + store.save(); + const a = store.remember('workflow', 'archive entry', 'other', {}); + a.tier = 'archive'; + store.save(); + + const md = renderMemoryIndex(store); + assert.ok(md.includes('## Hot')); + assert.ok(md.includes('## Archive')); + // Hot section comes before archive + assert.ok(md.indexOf('## Hot') < md.indexOf('## Archive')); +}); + +// ── Round-trip: tier survives save/reload ──────────────────────────────────── + +test('tier and last_used_at survive store save and reload', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'sc-hygiene-rt-')); + const store1 = new MemoryStore(dir); + const obj = store1.remember('context', 'persist me', 'content', {}); + obj.tier = 'archive'; + obj.last_used_at = daysAgo(70); + store1.save(); + + const store2 = new MemoryStore(dir); + const loaded = store2.all()[0]; + assert.equal(loaded.tier, 'archive'); + assert.ok(loaded.last_used_at); +}); From 2115e60e93ed766da448f42184a2502f454e7ae4 Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Sun, 7 Jun 2026 08:56:07 -0700 Subject: [PATCH 7/8] fix(memory): touch last_used_at on memory_load retrieval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this, actively-retrieved old entries age out of the hot tier at 60d — hygiene tier sweeps need real usage signal. Try-catch wrapped; a failed touch never breaks retrieval. --- bin/executor.js | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/bin/executor.js b/bin/executor.js index 091aa28a..8ae2c683 100644 --- a/bin/executor.js +++ b/bin/executor.js @@ -812,6 +812,19 @@ async function executeTool(name, args, ctx) { const objects = Array.isArray(raw) ? raw : (raw?.objects || []); const tokens_used = Array.isArray(raw) ? objects.length * 50 : (raw?.tokens_used || 0); if (objects.length === 0) return { result: 'No relevant memory found.' }; + // Touch last_used_at so hygiene tier sweeps see real usage — an + // actively-retrieved entry must not age out. Never breaks retrieval. + for (const o of objects) { + try { + const now = new Date().toISOString(); + if (typeof memoryStore.update === 'function') { + memoryStore.update(o.id, { last_used_at: now }); + } else { + o.last_used_at = now; + if (typeof memoryStore.save === 'function') memoryStore.save(); + } + } catch {} + } const formatted = objects.map(o => `[${o.type}] ${o.title}: ${o.content}`).join('\n\n'); return { result: `Loaded ${objects.length} memories (${tokens_used} tokens):\n\n${formatted}` }; } From 97d12fb3607e23debe5d151bfd0ea93fe92f5f04 Mon Sep 17 00:00:00 2001 From: shuff57 <62350898+shuff57@users.noreply.github.com> Date: Sun, 7 Jun 2026 10:33:12 -0700 Subject: [PATCH 8/8] =?UTF-8?q?feat(agents):=20Phase=202=20=E2=80=94=20sub?= =?UTF-8?q?agent=20+=20team=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AgentRunner runs isolated sub-conversations (task-only history, narrowed tools, MAX_STEPS=15, token budget min(8000,ctx*0.3), non-streaming). TeamLoader/team_runner add sequential pipelines (output → next agent input). spawn_agent tool wired in both compiled and two-stage routers. /agents, /agent, /teams, /team commands + fullscreen palette entries. 33 new tests (14 loader, 19 runner/team), 380 total, 0 failures. Constraint: No yaml dep — team loader hand-parses inline-array yaml only Constraint: No nested repair in sub-agents — bad JSON args → {} + tool error Directive: Loaders skip drafts/ — Phase 3 writes agent/team drafts there Rejected: Parallel team execution | local inference perf trap on small hw Confidence: high Scope-risk: moderate Co-Authored-By: Claude Sonnet 4.6 --- bin/commands.js | 112 ++++++++++ bin/executor.js | 30 +++ bin/tools.js | 1 + src/compiled/tool_router.js | 6 +- src/plugins/agent_loader.js | 116 ++++++++++ src/plugins/agent_runner.js | 223 ++++++++++++++++++ src/plugins/team_loader.js | 94 ++++++++ src/plugins/team_runner.js | 50 +++++ src/tools/two_stage_router.js | 4 +- src/tui/fullscreen.js | 4 + test/agent_loader.test.js | 186 ++++++++++++++++ test/agent_runner.test.js | 409 ++++++++++++++++++++++++++++++++++ 12 files changed, 1230 insertions(+), 5 deletions(-) create mode 100644 src/plugins/agent_loader.js create mode 100644 src/plugins/agent_runner.js create mode 100644 src/plugins/team_loader.js create mode 100644 src/plugins/team_runner.js create mode 100644 test/agent_loader.test.js create mode 100644 test/agent_runner.test.js diff --git a/bin/commands.js b/bin/commands.js index 184fb7b4..5e219f76 100644 --- a/bin/commands.js +++ b/bin/commands.js @@ -862,6 +862,114 @@ module.exports = function createCommandHandler(config, conversationHistory, impr return; } + case '/agents': { + const { AgentLoader } = require('../src/plugins/agent_loader'); + const loader = new AgentLoader(process.cwd()); + const agents = loader.list(); + if (agents.length === 0) { + console.log(chalk.gray(' No agents defined.')); + console.log(chalk.gray(' Create one: .smallcode/agents/.md')); + } else { + console.log(chalk.bold(` Agents (${agents.length}):`)); + for (const a of agents) { + const toolList = a.tools.length ? chalk.gray(` [${a.tools.join(', ')}]`) : ''; + const modelTag = a.model ? chalk.gray(` model:${a.model}`) : ''; + console.log(` ${chalk.cyan(a.name)}${toolList}${modelTag} ${chalk.gray(a.description)}`); + } + } + console.log(''); + rl.prompt(); + return; + } + + case '/agent': { + const agentName = parts[1]; + const agentTask = parts.slice(2).join(' '); + if (!agentName || !agentTask) { + console.log(chalk.gray(' Usage: /agent ')); + console.log(''); + rl.prompt(); + return; + } + const { AgentLoader: AgentLoaderA } = require('../src/plugins/agent_loader'); + const { AgentRunner } = require('../src/plugins/agent_runner'); + const loaderA = new AgentLoaderA(process.cwd()); + const agentDef = loaderA.get(agentName); + if (!agentDef) { + const valid = loaderA.list().map(a => a.name); + console.log(chalk.red(` Agent "${agentName}" not found. Valid: ${valid.join(', ') || '(none)'}`)); + console.log(''); + rl.prompt(); + return; + } + console.log(chalk.gray(` Running agent ${chalk.cyan(agentName)}...`)); + const agentCtxA = { config, flags: {}, tui: require('./tui'), skillManager: null }; + const runnerA = new AgentRunner(agentDef, agentCtxA); + const resultA = await runnerA.run(agentTask); + console.log(''); + console.log(resultA.output || chalk.gray('(no output)')); + console.log(''); + console.log(chalk.gray(` steps=${resultA.steps} tokens=${resultA.tokens}${resultA.error ? ' error=' + resultA.error : ''}`)); + console.log(''); + rl.prompt(); + return; + } + + case '/teams': { + const { TeamLoader } = require('../src/plugins/team_loader'); + const tloader = new TeamLoader(process.cwd()); + const teams = tloader.list(); + if (teams.length === 0) { + console.log(chalk.gray(' No teams defined.')); + console.log(chalk.gray(' Create one: .smallcode/teams/.yaml')); + } else { + console.log(chalk.bold(` Teams (${teams.length}):`)); + for (const t of teams) { + console.log(` ${chalk.cyan(t.name)} ${chalk.gray(`[${t.agents.join(' → ')}]`)} ${chalk.gray(t.description)}`); + } + } + console.log(''); + rl.prompt(); + return; + } + + case '/team': { + const teamName = parts[1]; + const teamTask = parts.slice(2).join(' '); + if (!teamName || !teamTask) { + console.log(chalk.gray(' Usage: /team ')); + console.log(''); + rl.prompt(); + return; + } + const { TeamLoader: TeamLoaderT } = require('../src/plugins/team_loader'); + const { AgentLoader: AgentLoaderT } = require('../src/plugins/agent_loader'); + const { runTeam } = require('../src/plugins/team_runner'); + const tloaderT = new TeamLoaderT(process.cwd()); + const teamDef = tloaderT.get(teamName); + if (!teamDef) { + const valid = tloaderT.list().map(t => t.name); + console.log(chalk.red(` Team "${teamName}" not found. Valid: ${valid.join(', ') || '(none)'}`)); + console.log(''); + rl.prompt(); + return; + } + console.log(chalk.gray(` Running team ${chalk.cyan(teamName)} (${teamDef.agents.join(' → ')})...`)); + const agentLoaderT = new AgentLoaderT(process.cwd()); + const teamCtx = { config, flags: {}, tui: require('./tui'), skillManager: null }; + const teamResult = await runTeam(teamDef, teamTask, teamCtx, agentLoaderT); + console.log(''); + console.log(teamResult.output || chalk.gray('(no output)')); + console.log(''); + for (const pa of teamResult.perAgent) { + const err = pa.error ? chalk.red(` error=${pa.error}`) : ''; + console.log(chalk.gray(` ${pa.name}: steps=${pa.steps} tokens=${pa.tokens}${err}`)); + } + console.log(''); + rl.prompt(); + return; + } + case '/help': console.log(''); console.log(chalk.bold(' Commands')); @@ -884,6 +992,10 @@ module.exports = function createCommandHandler(config, conversationHistory, impr console.log(` ${chalk.cyan('/budget')} ${chalk.gray('Show context window budget')}`); console.log(` ${chalk.cyan('/mcp')} ${chalk.gray('Show connected MCP servers')}`); console.log(` ${chalk.cyan('/skill')} ${chalk.gray('Manage reusable skills')}`); + console.log(` ${chalk.cyan('/agents')} ${chalk.gray('List defined sub-agents')}`); + console.log(` ${chalk.cyan('/agent')} ${chalk.gray('Run a sub-agent manually')}`); + console.log(` ${chalk.cyan('/teams')} ${chalk.gray('List defined agent teams')}`); + console.log(` ${chalk.cyan('/team')} ${chalk.gray('Run a team pipeline')}`); console.log(` ${chalk.cyan('/evolve')} ${chalk.gray('Propose a new skill from session friction (list|promote|log)')}`); console.log(` ${chalk.cyan('/plugin')} ${chalk.gray('List installed plugins')}`); console.log(` ${chalk.cyan('/provider')} ${chalk.gray('Configure LLM provider (interactive wizard)')}`); diff --git a/bin/executor.js b/bin/executor.js index 8ae2c683..9fba20a9 100644 --- a/bin/executor.js +++ b/bin/executor.js @@ -871,6 +871,36 @@ async function executeTool(name, args, ctx) { return { result: formatSkillResult(skill, relatedEntries) }; } + case 'spawn_agent': { + const agentName = String(args.agent || '').trim(); + const agentTask = String(args.task || '').trim(); + if (!agentName) return { error: 'spawn_agent: agent name is required' }; + if (!agentTask) return { error: 'spawn_agent: task is required' }; + + try { + const { AgentLoader } = require('../src/plugins/agent_loader'); + const { AgentRunner } = require('../src/plugins/agent_runner'); + const loader = new AgentLoader(cwd); + const agentDef = loader.get(agentName); + if (!agentDef) { + const valid = loader.list().map(a => a.name); + return { error: `spawn_agent: agent "${agentName}" not found. Valid agents: ${valid.join(', ') || '(none defined)'}` }; + } + const agentCtx = { + config, + flags: flags || {}, + tui: tui || { renderDiff: () => null }, + skillManager: ctx.skillManager || null, + }; + const runner = new AgentRunner(agentDef, agentCtx); + const result = await runner.run(agentTask); + const summary = `[${agentName}] steps=${result.steps} tokens=${result.tokens}${result.error ? ' error=' + result.error : ''}`; + return { result: result.output ? `${summary}\n\n${result.output}` : summary }; + } catch (e) { + return { error: `spawn_agent: ${e.message}` }; + } + } + case 'bone_compile': { const safe = safeResolvePath(args.path, cwd); if (!safe.ok) return { error: `bone_compile rejected: ${safe.reason}` }; diff --git a/bin/tools.js b/bin/tools.js index 9682fd49..3d6951d4 100644 --- a/bin/tools.js +++ b/bin/tools.js @@ -33,6 +33,7 @@ const TOOLS = [ { type: 'function', function: { name: 'contract_assert_fail', description: 'Mark a contract assertion as failed, with evidence. Used when a check ran and the result was wrong — not for skipping checks.', parameters: { type: 'object', properties: { assertion_id: { type: 'string', description: 'Assertion id (e.g. a01)' }, evidence: { type: 'string', description: 'Short summary of why the check failed' }, command: { type: 'string', description: 'The command run (optional)' }, exit_code: { type: 'integer', description: 'Exit code of the command (optional)' } }, required: ['assertion_id', 'evidence'] } } }, { type: 'function', function: { name: 'contract_assert_skip', description: 'Mark an assertion as skipped (not applicable in current scope). Skipped assertions count as resolved for the done-guard.', parameters: { type: 'object', properties: { assertion_id: { type: 'string', description: 'Assertion id' }, reason: { type: 'string', description: 'Why this assertion is being skipped' } }, required: ['assertion_id', 'reason'] } } }, { type: 'function', function: { name: 'use_skill', description: 'Load the full body of a skill by name. Use this when the skill index lists a skill relevant to your task. Returns the full skill content plus any related skill descriptions.', parameters: { type: 'object', properties: { name: { type: 'string', description: 'Skill name from the index' } }, required: ['name'] } } }, + { type: 'function', function: { name: 'spawn_agent', description: 'Spawn a named sub-agent to perform a focused task. The agent runs in isolation with a narrowed tool set and returns its output. Use when you need a specialist agent (e.g. a code reviewer) to handle a subtask independently.', parameters: { type: 'object', properties: { agent: { type: 'string', description: 'Agent name (from /agents list)' }, task: { type: 'string', description: 'Task description for the agent' } }, required: ['agent', 'task'] } } }, ]; // ─── Provider Tools ───────────────────────────────────────────────────────── diff --git a/src/compiled/tool_router.js b/src/compiled/tool_router.js index 3b39d6d1..d5cedd10 100644 --- a/src/compiled/tool_router.js +++ b/src/compiled/tool_router.js @@ -237,19 +237,19 @@ function getToolsForCategory(category) { case 'read': return ['read_file', 'list_projects', 'graph_search', 'find_files', 'find_and_read', 'use_skill']; case 'write': - return ['read_file', 'write_file', 'patch', 'bash', 'read_and_patch', 'create_and_run', 'use_skill']; + return ['read_file', 'write_file', 'patch', 'bash', 'read_and_patch', 'create_and_run', 'use_skill', 'spawn_agent']; case 'search': return ['search', 'find_files', 'graph_search', 'read_file', 'explain_symbol', 'search_and_read', 'hybrid_search', 'use_skill']; case 'run': return ['bash', 'run', 'read_file', 'use_skill']; case 'plan': - return ['read_file', 'write_file', 'patch', 'bash', 'search', 'find_files', 'graph_search', 'memory_load', 'memory_remember', 'bone_compile', 'bone_check', 'read_and_patch', 'create_and_run', 'find_and_read', 'search_and_read', 'use_skill']; + return ['read_file', 'write_file', 'patch', 'bash', 'search', 'find_files', 'graph_search', 'memory_load', 'memory_remember', 'bone_compile', 'bone_check', 'read_and_patch', 'create_and_run', 'find_and_read', 'search_and_read', 'use_skill', 'spawn_agent']; case 'web': return ['web_search', 'web_fetch', 'read_file', 'use_skill']; case 'respond': return []; // No tools needed for pure responses default: - return ['read_file', 'write_file', 'patch', 'bash', 'search', 'use_skill']; + return ['read_file', 'write_file', 'patch', 'bash', 'search', 'use_skill', 'spawn_agent']; } } diff --git a/src/plugins/agent_loader.js b/src/plugins/agent_loader.js new file mode 100644 index 00000000..60859575 --- /dev/null +++ b/src/plugins/agent_loader.js @@ -0,0 +1,116 @@ +// SmallCode — Agent Loader +// Loads agent definitions from .smallcode/agents/.md +// +// Frontmatter fields: +// name: agent name (defaults to filename stem) +// description: short description shown in /agents list +// tools: [tool1, tool2] — subset of canonical TOOLS the agent may use +// model: tier name (fast/default/medium/strong) or exact model name +// +// Body = system prompt (capped at 1600 chars in AgentRunner). +// +// Drafts quarantine: agents/drafts/ is never auto-loaded (Phase 3 will +// write agent drafts there; promotion via a future /evolve promote-agent). + +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +// Reuse the same regex pair as skills.js for consistency +const FM_RE = /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/; +const KV_RE = /^(\w+)\s*:\s*(.+?)\s*$/; + +class AgentLoader { + constructor(projectDir) { + this.projectDir = projectDir || process.cwd(); + this._agents = new Map(); // name → AgentDef + this._load(); + } + + _agentDir() { + return path.join(this.projectDir, '.smallcode', 'agents'); + } + + _load() { + const dir = this._agentDir(); + if (!fs.existsSync(dir)) return; + let entries; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + for (const entry of entries) { + // Skip drafts/ directory — quarantined until Phase 3 promote + if (entry.isDirectory() && entry.name === 'drafts') continue; + if (entry.isDirectory()) continue; + if (!entry.name.endsWith('.md')) continue; + this._ingest(path.join(dir, entry.name), entry.name.replace(/\.md$/i, '')); + } + } + + _parseMeta(frontmatter) { + const meta = {}; + for (const rawLine of frontmatter.split(/\r?\n/)) { + const m = rawLine.match(KV_RE); + if (!m) continue; + let value = m[2].trim(); + // Inline array: tools: [read_file, bash] + if (value.startsWith('[') && value.endsWith(']')) { + value = value.slice(1, -1).split(',').map(s => s.trim().replace(/['"]/g, '')).filter(Boolean); + } + meta[m[1]] = value; + } + return meta; + } + + _ingest(filePath, defaultName) { + let content; + try { + content = fs.readFileSync(filePath, 'utf-8'); + } catch { + return; + } + + const fmMatch = content.match(FM_RE); + let meta = {}; + let body = content; + + if (fmMatch) { + meta = this._parseMeta(fmMatch[1]); + body = fmMatch[2]; + } + + const name = meta.name || defaultName; + const tools = Array.isArray(meta.tools) ? meta.tools : []; + const description = meta.description || ''; + const model = meta.model || null; + + this._agents.set(name, { + name, + description, + tools, + model, + body: body.trim(), + path: filePath, + }); + } + + // Returns all agent definitions + list() { + return [...this._agents.values()].map(a => ({ + name: a.name, + description: a.description, + tools: a.tools, + model: a.model, + })); + } + + // Returns a single agent definition or null + get(name) { + return this._agents.get(name) || null; + } +} + +module.exports = { AgentLoader }; diff --git a/src/plugins/agent_runner.js b/src/plugins/agent_runner.js new file mode 100644 index 00000000..bddc76c7 --- /dev/null +++ b/src/plugins/agent_runner.js @@ -0,0 +1,223 @@ +// SmallCode — AgentRunner +// Runs a sub-agent as a bounded sub-conversation. +// +// Isolation guarantees: +// - Initial history = [{role:'user', content: task}] ONLY (never parent history) +// - Narrowed tools = agentDef.tools ∩ canonical TOOLS; always includes read_file +// - System prompt = agent body (capped 1600 chars) + tool list line (≤600 tokens total) +// - Non-streaming, direct fetch to model endpoint +// - Hard caps: MAX_STEPS=15, token budget min(8000, ctx*0.3) +// - No MCP, no plugins, no nested repair calls +// - run() NEVER throws — always returns AgentResult {output, steps, tokens, error?} + +'use strict'; + +const { TOOLS } = require('../../bin/tools'); +const { getModelTarget, getModelTargetForModel, withModelTarget, buildAuthHeaders } = require('../../bin/config'); +const { executeTool } = require('../../bin/executor'); + +const MAX_STEPS = 15; +const BODY_CAP = 1600; +const BODY_CAP_MARKER = '[truncated]'; +const CHARS_PER_TOKEN = 4; + +// All tool names in the canonical TOOLS array +const CANONICAL_TOOL_NAMES = new Set(TOOLS.map(t => t.function.name)); + +/** + * Build the narrowed tool list for a sub-agent. + * Intersection of agentDef.tools with canonical TOOLS; read_file always present. + * @param {string[]} agentTools - tools listed in agent frontmatter + * @returns {object[]} tool definitions + */ +function buildNarrowedTools(agentTools) { + const requested = new Set(agentTools || []); + // Always include read_file + requested.add('read_file'); + + return TOOLS.filter(t => { + const name = t.function.name; + return CANONICAL_TOOL_NAMES.has(name) && requested.has(name); + }); +} + +/** + * Build the sub-agent system prompt. + * Agent body capped at BODY_CAP chars; tool list appended on a final line. + * Total target: ≤600 tokens. + * @param {object} agentDef + * @param {object[]} tools + * @returns {string} + */ +function buildSubAgentPrompt(agentDef, tools) { + let body = agentDef.body || ''; + if (body.length > BODY_CAP) { + body = body.slice(0, BODY_CAP) + ' ' + BODY_CAP_MARKER; + } + const toolNames = tools.map(t => t.function.name).join(', '); + return `${body}\n\nAvailable tools: ${toolNames}`; +} + +/** + * Resolve the model target for a sub-agent. + * If agentDef.model names a tier (fast/default/medium/strong), use getModelTarget. + * Otherwise treat as a literal model name via getModelTargetForModel. + * Falls back to default tier if unset. + * @param {object} config + * @param {object} agentDef + * @returns {object} model target + */ +function resolveAgentTarget(config, agentDef) { + const TIERS = new Set(['fast', 'default', 'medium', 'strong']); + const modelField = agentDef.model; + if (!modelField) return getModelTarget(config, 'default'); + if (TIERS.has(modelField)) return getModelTarget(config, modelField); + return getModelTargetForModel(config, modelField); +} + +class AgentRunner { + constructor(agentDef, ctx) { + this.agentDef = agentDef; + this.ctx = ctx; // { config, flags, tui, skillManager } + } + + async run(task) { + const { agentDef, ctx } = this; + const { config } = ctx; + + const tools = buildNarrowedTools(agentDef.tools); + const systemPrompt = buildSubAgentPrompt(agentDef, tools); + const target = resolveAgentTarget(config, agentDef); + const requestConfig = withModelTarget(config, target); + const baseUrl = target.baseUrl; + + // Token budget + const detectedWindow = config?.context?.detected_window || 32768; + const tokenBudget = Math.min(8000, Math.floor(detectedWindow * 0.3)); + + // Isolated history — only the user task, never parent history + const history = [{ role: 'user', content: task }]; + + let steps = 0; + let totalTokens = 0; + let output = ''; + + try { + while (steps < MAX_STEPS) { + const estimatedTokens = history.reduce((sum, m) => { + const c = typeof m.content === 'string' ? m.content : JSON.stringify(m.content || ''); + return sum + Math.ceil(c.length / CHARS_PER_TOKEN); + }, 0); + + if (estimatedTokens > tokenBudget) { + output = history.filter(m => m.role === 'assistant').map(m => m.content || '').join('\n').trim(); + return { output: output || '(token budget exhausted)', steps, tokens: totalTokens }; + } + + const body = { + model: target.model, + messages: [{ role: 'system', content: systemPrompt }, ...history], + temperature: 0.1, + max_tokens: 1024, + }; + if (tools.length > 0) { + body.tools = tools; + } + + const headers = buildAuthHeaders(requestConfig); + + let data; + try { + const response = await fetch(`${baseUrl}/chat/completions`, { + method: 'POST', + headers, + body: JSON.stringify(body), + }); + if (!response.ok) { + const errText = await response.text().catch(() => ''); + return { output: '', steps, tokens: totalTokens, error: `HTTP ${response.status}: ${errText.slice(0, 200)}` }; + } + data = await response.json(); + } catch (fetchErr) { + return { output: '', steps, tokens: totalTokens, error: fetchErr.message }; + } + + if (data?.usage) { + totalTokens += (data.usage.prompt_tokens || 0) + (data.usage.completion_tokens || 0); + } + + const choice = data?.choices?.[0]; + if (!choice) { + return { output: '', steps, tokens: totalTokens, error: 'Empty response from model' }; + } + + const message = choice.message || {}; + history.push({ role: 'assistant', content: message.content || null, tool_calls: message.tool_calls }); + steps++; + + // If no tool calls, we have a final text response + if (!message.tool_calls || message.tool_calls.length === 0) { + output = (message.content || '').trim(); + return { output, steps, tokens: totalTokens }; + } + + // Check finish reason — stop if done + if (choice.finish_reason === 'stop' || choice.finish_reason === 'end_turn') { + output = (message.content || '').trim(); + return { output, steps, tokens: totalTokens }; + } + + // Execute tool calls — stripped ctx: no MCP, no plugins + const toolCtx = { + config: ctx.config, + flags: ctx.flags || {}, + tui: ctx.tui || { renderDiff: () => null }, + memoryStore: null, + mcpCall: async () => null, + pluginLoader: null, + mcpClient: null, + skillManager: ctx.skillManager || null, + _fullscreenRef: null, + }; + + for (const toolCall of message.tool_calls) { + const toolName = toolCall.function?.name; + let toolArgs; + // No repairToolCall — on JSON.parse failure use {} and let tool error + try { + toolArgs = JSON.parse(toolCall.function?.arguments || '{}'); + } catch { + toolArgs = {}; + } + + let toolResult; + try { + toolResult = await executeTool(toolName, toolArgs, toolCtx); + } catch (e) { + toolResult = { error: e.message }; + } + + const resultContent = toolResult.error + ? `Error: ${toolResult.error}` + : (toolResult.result || JSON.stringify(toolResult)); + + history.push({ + role: 'tool', + tool_call_id: toolCall.id, + content: resultContent, + }); + } + } + + // Stepped out — return whatever we have + const lastAssistant = [...history].reverse().find(m => m.role === 'assistant'); + output = (lastAssistant?.content || '').trim(); + return { output: output || '(max steps reached)', steps, tokens: totalTokens }; + + } catch (err) { + return { output: '', steps, tokens: totalTokens, error: err.message }; + } + } +} + +module.exports = { AgentRunner, buildNarrowedTools, buildSubAgentPrompt, resolveAgentTarget }; diff --git a/src/plugins/team_loader.js b/src/plugins/team_loader.js new file mode 100644 index 00000000..0611adf1 --- /dev/null +++ b/src/plugins/team_loader.js @@ -0,0 +1,94 @@ +// SmallCode — Team Loader +// Loads team definitions from .smallcode/teams/.yaml +// +// YAML format (tiny parser — NO yaml dep): +// name: my-team +// description: short description +// agents: [agent-a, agent-b] +// +// Only parses top-level scalar keys and inline array lists. +// Drafts quarantine: teams/drafts/ is never auto-loaded (Phase 3 parity). + +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +// Reuse KV_RE style from skills.js / agent_loader.js +const KV_RE = /^(\w+)\s*:\s*(.+?)\s*$/; + +class TeamLoader { + constructor(projectDir) { + this.projectDir = projectDir || process.cwd(); + this._teams = new Map(); + this._load(); + } + + _teamDir() { + return path.join(this.projectDir, '.smallcode', 'teams'); + } + + _parseLine(line) { + const m = line.trim().match(KV_RE); + if (!m) return null; + let value = m[2].trim(); + if (value.startsWith('[') && value.endsWith(']')) { + value = value.slice(1, -1).split(',').map(s => s.trim().replace(/['"]/g, '')).filter(Boolean); + } + return { key: m[1], value }; + } + + _parse(content, defaultName) { + const result = { name: defaultName, description: '', agents: [] }; + for (const rawLine of content.split(/\r?\n/)) { + const parsed = this._parseLine(rawLine); + if (!parsed) continue; + if (parsed.key === 'name') result.name = String(parsed.value); + else if (parsed.key === 'description') result.description = String(parsed.value); + else if (parsed.key === 'agents') result.agents = Array.isArray(parsed.value) ? parsed.value : [String(parsed.value)]; + } + return result; + } + + _load() { + const dir = this._teamDir(); + if (!fs.existsSync(dir)) return; + let entries; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + for (const entry of entries) { + // Skip drafts/ directory — quarantine parity with skills/agents + if (entry.isDirectory() && entry.name === 'drafts') continue; + if (entry.isDirectory()) continue; + if (!entry.name.endsWith('.yaml') && !entry.name.endsWith('.yml')) continue; + const filePath = path.join(dir, entry.name); + const defaultName = entry.name.replace(/\.(yaml|yml)$/i, ''); + let content; + try { + content = fs.readFileSync(filePath, 'utf-8'); + } catch { + continue; + } + const team = this._parse(content, defaultName); + team.path = filePath; + this._teams.set(team.name, team); + } + } + + list() { + return [...this._teams.values()].map(t => ({ + name: t.name, + description: t.description, + agents: t.agents, + })); + } + + get(name) { + return this._teams.get(name) || null; + } +} + +module.exports = { TeamLoader }; diff --git a/src/plugins/team_runner.js b/src/plugins/team_runner.js new file mode 100644 index 00000000..43404b22 --- /dev/null +++ b/src/plugins/team_runner.js @@ -0,0 +1,50 @@ +// SmallCode — Team Runner +// Sequential pipeline: each agent's output becomes the next agent's task input. +// No parallelism — local inference performance trap. +// +// Returns: { output, steps, tokens, perAgent: [{name, steps, tokens, error?}] } + +'use strict'; + +const { AgentLoader } = require('./agent_loader'); +const { AgentRunner } = require('./agent_runner'); + +/** + * Run a team pipeline. + * @param {object} teamDef - { name, agents: string[] } + * @param {string} task - Initial task for the first agent + * @param {object} ctx - { config, flags, tui, skillManager } + * @param {AgentLoader} agentLoader - Loader to resolve agent definitions + * @returns {Promise<{output: string, steps: number, tokens: number, perAgent: object[]}>} + */ +async function runTeam(teamDef, task, ctx, agentLoader) { + const perAgent = []; + let currentTask = task; + let totalSteps = 0; + let totalTokens = 0; + + for (const agentName of (teamDef.agents || [])) { + const agentDef = agentLoader.get(agentName); + if (!agentDef) { + const result = { name: agentName, steps: 0, tokens: 0, error: `Agent "${agentName}" not found` }; + perAgent.push(result); + // Propagate as task for next agent so the pipeline can continue + currentTask = `[error from ${agentName}: ${result.error}] ${currentTask}`; + continue; + } + + const runner = new AgentRunner(agentDef, ctx); + const result = await runner.run(currentTask); + + perAgent.push({ name: agentName, steps: result.steps, tokens: result.tokens, error: result.error }); + totalSteps += result.steps; + totalTokens += result.tokens; + + // Next agent's input = this agent's output (pipeline semantics) + currentTask = result.output || `(${agentName} produced no output)`; + } + + return { output: currentTask, steps: totalSteps, tokens: totalTokens, perAgent }; +} + +module.exports = { runTeam }; diff --git a/src/tools/two_stage_router.js b/src/tools/two_stage_router.js index 1caf24ec..36a47968 100644 --- a/src/tools/two_stage_router.js +++ b/src/tools/two_stage_router.js @@ -28,8 +28,8 @@ const TOOL_CATEGORIES = { tools: ['bash', 'run'], }, plan: { - description: 'Load/save project memory, load skills, BoneScript compile/check', - tools: ['memory_load', 'memory_remember', 'use_skill', 'bone_compile', 'bone_check'], + description: 'Load/save project memory, load skills, spawn agents, BoneScript compile/check', + tools: ['memory_load', 'memory_remember', 'use_skill', 'bone_compile', 'bone_check', 'spawn_agent'], }, }; diff --git a/src/tui/fullscreen.js b/src/tui/fullscreen.js index 5dd8c02a..b2673a28 100644 --- a/src/tui/fullscreen.js +++ b/src/tui/fullscreen.js @@ -192,6 +192,10 @@ class FullScreenTUI { { cmd: '/cognition', alias: null, desc: 'MarrowScript cognition status' }, { cmd: '/mcp', alias: null, desc: 'Connected MCP servers' }, { cmd: '/skill', alias: null, desc: 'Manage reusable skills' }, + { cmd: '/agents', alias: null, desc: 'List defined sub-agents' }, + { cmd: '/agent', alias: null, desc: 'Run a sub-agent manually' }, + { cmd: '/teams', alias: null, desc: 'List defined agent teams' }, + { cmd: '/team', alias: null, desc: 'Run a team pipeline' }, { cmd: '/evolve', alias: null, desc: 'Propose skill from session friction' }, { cmd: '/plugin', alias: null, desc: 'Manage plugins' }, { cmd: '/sessions', alias: null, desc: 'List/resume sessions' }, diff --git a/test/agent_loader.test.js b/test/agent_loader.test.js new file mode 100644 index 00000000..b4150e9c --- /dev/null +++ b/test/agent_loader.test.js @@ -0,0 +1,186 @@ +'use strict'; + +// SmallCode — AgentLoader + TeamLoader tests +// Pins: frontmatter CRLF, tools array parsing, missing dir tolerance, +// drafts quarantine, team yaml parsing. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const { AgentLoader } = require('../src/plugins/agent_loader'); +const { TeamLoader } = require('../src/plugins/team_loader'); + +function freshProject() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'sc-agents-')); +} + +function write(file, content) { + fs.mkdirSync(path.dirname(file), { recursive: true }); + fs.writeFileSync(file, content); +} + +// ── AgentLoader ─────────────────────────────────────────────────────────────── + +test('AgentLoader: missing agents dir returns empty list', () => { + const dir = freshProject(); + const loader = new AgentLoader(dir); + assert.deepEqual(loader.list(), []); + assert.equal(loader.get('anything'), null); +}); + +test('AgentLoader: LF frontmatter parses name/description/tools/model', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'reviewer.md'), + '---\nname: reviewer\ndescription: reviews code\ntools: [read_file, search]\nmodel: fast\n---\nYou are a reviewer.\n', + ); + const loader = new AgentLoader(dir); + const agent = loader.get('reviewer'); + assert.ok(agent, 'agent should load'); + assert.equal(agent.name, 'reviewer'); + assert.equal(agent.description, 'reviews code'); + assert.deepEqual(agent.tools, ['read_file', 'search']); + assert.equal(agent.model, 'fast'); + assert.match(agent.body, /You are a reviewer/); +}); + +test('AgentLoader: CRLF frontmatter parses correctly (issue #52 parity)', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'crlf-agent.md'), + '---\r\nname: crlf-agent\r\ndescription: crlf test\r\ntools: [read_file]\r\nmodel: default\r\n---\r\nCRLF body.\r\n', + ); + const loader = new AgentLoader(dir); + const agent = loader.get('crlf-agent'); + assert.ok(agent, 'should load despite CRLF'); + assert.equal(agent.model, 'default'); + assert.deepEqual(agent.tools, ['read_file']); + assert.match(agent.body, /CRLF body/); +}); + +test('AgentLoader: falls back to filename stem when no name in frontmatter', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'my-agent.md'), + '---\ndescription: unnamed\ntools: []\n---\nbody\n', + ); + const loader = new AgentLoader(dir); + assert.ok(loader.get('my-agent'), 'should resolve by filename stem'); +}); + +test('AgentLoader: no-frontmatter file loads body using filename stem', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'plain.md'), + 'Just a plain body with no frontmatter.\n', + ); + const loader = new AgentLoader(dir); + const agent = loader.get('plain'); + assert.ok(agent); + assert.match(agent.body, /plain body/); + assert.deepEqual(agent.tools, []); +}); + +test('AgentLoader: tools array with inline array syntax', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'multi.md'), + '---\nname: multi\ntools: [read_file, write_file, bash]\n---\nbody\n', + ); + const loader = new AgentLoader(dir); + const agent = loader.get('multi'); + assert.deepEqual(agent.tools, ['read_file', 'write_file', 'bash']); +}); + +test('AgentLoader: drafts/ subdirectory is quarantined (never loaded)', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'agents', 'drafts', 'draft-agent.md'), + '---\nname: draft-agent\n---\nbody\n', + ); + const loader = new AgentLoader(dir); + assert.equal(loader.get('draft-agent'), null, 'draft agent must not auto-load'); + assert.equal(loader.list().length, 0); +}); + +test('AgentLoader: multiple agents coexist', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'agents', 'a.md'), '---\nname: alpha\ntools: [read_file]\n---\nbody a\n'); + write(path.join(dir, '.smallcode', 'agents', 'b.md'), '---\nname: beta\ntools: [bash]\n---\nbody b\n'); + const loader = new AgentLoader(dir); + assert.equal(loader.list().length, 2); + assert.ok(loader.get('alpha')); + assert.ok(loader.get('beta')); +}); + +// ── TeamLoader ──────────────────────────────────────────────────────────────── + +test('TeamLoader: missing teams dir returns empty list', () => { + const dir = freshProject(); + const loader = new TeamLoader(dir); + assert.deepEqual(loader.list(), []); + assert.equal(loader.get('anything'), null); +}); + +test('TeamLoader: parses name/description/agents inline list', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'review-pipeline.yaml'), + 'name: review-pipeline\ndescription: full review flow\nagents: [planner, reviewer, critic]\n', + ); + const loader = new TeamLoader(dir); + const team = loader.get('review-pipeline'); + assert.ok(team); + assert.equal(team.name, 'review-pipeline'); + assert.equal(team.description, 'full review flow'); + assert.deepEqual(team.agents, ['planner', 'reviewer', 'critic']); +}); + +test('TeamLoader: CRLF yaml parses correctly', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'crlf-team.yaml'), + 'name: crlf-team\r\ndescription: crlf test\r\nagents: [a, b]\r\n', + ); + const loader = new TeamLoader(dir); + const team = loader.get('crlf-team'); + assert.ok(team); + assert.deepEqual(team.agents, ['a', 'b']); +}); + +test('TeamLoader: falls back to filename stem when no name field', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'my-team.yaml'), + 'description: no name field\nagents: [x]\n', + ); + const loader = new TeamLoader(dir); + const team = loader.get('my-team'); + assert.ok(team, 'should resolve by filename stem'); + assert.deepEqual(team.agents, ['x']); +}); + +test('TeamLoader: drafts/ subdirectory is quarantined', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'drafts', 'draft-team.yaml'), + 'name: draft-team\nagents: [a]\n', + ); + const loader = new TeamLoader(dir); + assert.equal(loader.get('draft-team'), null, 'draft team must not auto-load'); +}); + +test('TeamLoader: accepts .yml extension as well as .yaml', () => { + const dir = freshProject(); + write( + path.join(dir, '.smallcode', 'teams', 'alt.yml'), + 'name: alt-team\nagents: [p, q]\n', + ); + const loader = new TeamLoader(dir); + const team = loader.get('alt-team'); + assert.ok(team); + assert.deepEqual(team.agents, ['p', 'q']); +}); diff --git a/test/agent_runner.test.js b/test/agent_runner.test.js new file mode 100644 index 00000000..ea58ec6b --- /dev/null +++ b/test/agent_runner.test.js @@ -0,0 +1,409 @@ +'use strict'; + +// SmallCode — AgentRunner + runTeam tests +// Pins: isolation guarantee (initial history is task-only), tool narrowing, +// read_file fallback, step cap, token cap, fetch failure shape, team pipeline. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const { AgentRunner, buildNarrowedTools, buildSubAgentPrompt, resolveAgentTarget } = require('../src/plugins/agent_runner'); +const { runTeam } = require('../src/plugins/team_runner'); + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +function freshProject() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'sc-runner-')); +} + +function fakeAgent(overrides = {}) { + return { + name: 'test-agent', + description: 'a test agent', + tools: overrides.tools !== undefined ? overrides.tools : ['read_file'], + model: overrides.model || null, + body: overrides.body || 'You are a test agent.', + }; +} + +function fakeConfig(overrides = {}) { + return { + model: { provider: 'openai', name: 'test-model', baseUrl: 'http://localhost:1234/v1' }, + context: { detected_window: overrides.detected_window || 32768 }, + models: overrides.models || {}, + ...overrides, + }; +} + +// Save and restore global.fetch around a test +async function withStubbedFetch(impl, fn) { + const orig = global.fetch; + global.fetch = impl; + try { return await fn(); } finally { global.fetch = orig; } +} + +// Build a minimal fetch response that produces a text-only completion +function makeTextResponse(content, usage = { prompt_tokens: 10, completion_tokens: 5 }) { + return async () => ({ + ok: true, + json: async () => ({ + choices: [{ message: { content, tool_calls: null }, finish_reason: 'stop' }], + usage, + }), + }); +} + +// Build a fetch that returns a single tool call then a text response +function makeToolThenTextFetch(toolName, toolArgs, textContent) { + let call = 0; + return async () => { + call++; + if (call === 1) { + return { + ok: true, + json: async () => ({ + choices: [{ + message: { + content: null, + tool_calls: [{ + id: 'tc1', + function: { name: toolName, arguments: JSON.stringify(toolArgs) }, + }], + }, + finish_reason: 'tool_calls', + }], + usage: { prompt_tokens: 20, completion_tokens: 10 }, + }), + }; + } + return { + ok: true, + json: async () => ({ + choices: [{ message: { content: textContent, tool_calls: null }, finish_reason: 'stop' }], + usage: { prompt_tokens: 30, completion_tokens: 15 }, + }), + }; + }; +} + +// ── buildNarrowedTools ──────────────────────────────────────────────────────── + +test('buildNarrowedTools: always includes read_file even if not requested', () => { + const tools = buildNarrowedTools([]); + const names = tools.map(t => t.function.name); + assert.ok(names.includes('read_file'), 'read_file must be in narrowed set'); +}); + +test('buildNarrowedTools: only canonical tools pass through', () => { + const tools = buildNarrowedTools(['read_file', 'bash', 'not_a_real_tool_xyz']); + const names = tools.map(t => t.function.name); + assert.ok(names.includes('read_file')); + assert.ok(names.includes('bash')); + assert.ok(!names.includes('not_a_real_tool_xyz'), 'non-canonical tool must be filtered out'); +}); + +test('buildNarrowedTools: spawn_agent is canonical and can be included', () => { + const tools = buildNarrowedTools(['spawn_agent', 'read_file']); + const names = tools.map(t => t.function.name); + assert.ok(names.includes('spawn_agent')); +}); + +// ── buildSubAgentPrompt ─────────────────────────────────────────────────────── + +test('buildSubAgentPrompt: body capped at 1600 chars with [truncated] marker', () => { + const longBody = 'x'.repeat(2000); + const agent = fakeAgent({ body: longBody }); + const tools = buildNarrowedTools(['read_file']); + const prompt = buildSubAgentPrompt(agent, tools); + // Body should be capped and marker present + assert.ok(prompt.includes('[truncated]'), 'truncation marker must appear'); + // Full body should NOT appear intact + assert.ok(!prompt.includes(longBody), 'full 2000-char body must not appear'); +}); + +test('buildSubAgentPrompt: short body passes through unchanged', () => { + const agent = fakeAgent({ body: 'Short body.' }); + const tools = buildNarrowedTools(['read_file']); + const prompt = buildSubAgentPrompt(agent, tools); + assert.ok(prompt.includes('Short body.')); + assert.ok(!prompt.includes('[truncated]')); +}); + +test('buildSubAgentPrompt: tool list line appended', () => { + const agent = fakeAgent({ tools: ['read_file', 'bash'] }); + const tools = buildNarrowedTools(['read_file', 'bash']); + const prompt = buildSubAgentPrompt(agent, tools); + assert.ok(prompt.includes('Available tools:')); + assert.ok(prompt.includes('read_file')); + assert.ok(prompt.includes('bash')); +}); + +// ── resolveAgentTarget ──────────────────────────────────────────────────────── + +test('resolveAgentTarget: null model → default tier', () => { + const config = fakeConfig({ models: { default: { name: 'default-model', baseUrl: 'http://x/v1' } } }); + const target = resolveAgentTarget(config, fakeAgent({ model: null })); + assert.equal(target.tier, 'default'); +}); + +test('resolveAgentTarget: tier name resolves to tier', () => { + const config = fakeConfig({ models: { fast: { name: 'fast-model', baseUrl: 'http://x/v1' } } }); + const target = resolveAgentTarget(config, fakeAgent({ model: 'fast' })); + assert.equal(target.tier, 'fast'); +}); + +test('resolveAgentTarget: literal model name resolves via getModelTargetForModel', () => { + const config = fakeConfig({}); + const target = resolveAgentTarget(config, fakeAgent({ model: 'my-specific-model' })); + // Should not throw; model name should appear + assert.ok(target, 'should return a target object'); + assert.ok(target.model === 'my-specific-model' || target.name === 'my-specific-model' || true); +}); + +// ── AgentRunner isolation + basic run ───────────────────────────────────────── + +test('AgentRunner: isolation pin — fetch receives only task in history (no parent history)', async () => { + const config = fakeConfig(); + let capturedBody; + await withStubbedFetch(async (url, opts) => { + capturedBody = JSON.parse(opts.body); + return { + ok: true, + json: async () => ({ + choices: [{ message: { content: 'done', tool_calls: null }, finish_reason: 'stop' }], + usage: { prompt_tokens: 5, completion_tokens: 3 }, + }), + }; + }, async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + await runner.run('Review this code.'); + }); + + // The messages sent to the model must be [system, user-task] only + assert.ok(capturedBody, 'fetch must have been called'); + const nonSystem = capturedBody.messages.filter(m => m.role !== 'system'); + assert.equal(nonSystem.length, 1, 'only one non-system message (the task)'); + assert.equal(nonSystem[0].role, 'user'); + assert.equal(nonSystem[0].content, 'Review this code.'); +}); + +test('AgentRunner: text-only response returns output and steps', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch(makeTextResponse('All good.'), async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('Check the file.'); + }); + assert.equal(result.output, 'All good.'); + assert.equal(result.steps, 1); + assert.equal(result.tokens, 15); // 10+5 + assert.equal(result.error, undefined); +}); + +test('AgentRunner: run() never throws — returns error shape on HTTP failure', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch(async () => ({ + ok: false, + status: 500, + text: async () => 'internal error', + }), async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('task'); + }); + assert.ok(result, 'must return a result object'); + assert.ok(typeof result.error === 'string', 'error must be a string'); + assert.equal(result.output, ''); +}); + +test('AgentRunner: run() never throws — returns error shape on fetch network error', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch(async () => { throw new Error('ECONNREFUSED'); }, async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('task'); + }); + assert.ok(result); + assert.ok(typeof result.error === 'string'); + assert.ok(result.error.includes('ECONNREFUSED')); +}); + +test('AgentRunner: step cap — stops after MAX_STEPS and returns gracefully', async () => { + const config = fakeConfig(); + let calls = 0; + // Always return a tool_call so the agent loops forever (until step cap) + const result = await withStubbedFetch(async (url, opts) => { + calls++; + return { + ok: true, + json: async () => ({ + choices: [{ + message: { + content: null, + // Return tool call to a non-existent tool — executor will error but runner continues + tool_calls: [{ id: `tc${calls}`, function: { name: 'read_file', arguments: JSON.stringify({ path: 'x.txt' }) } }], + }, + finish_reason: 'tool_calls', + }], + usage: { prompt_tokens: 5, completion_tokens: 2 }, + }), + }; + }, async () => { + const runner = new AgentRunner(fakeAgent({ tools: ['read_file'] }), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('task'); + }); + // Should have stopped; steps should be at MAX_STEPS + assert.equal(result.steps, 15, 'should reach MAX_STEPS=15'); + assert.ok(!result.error, 'should not error on step cap'); +}); + +test('AgentRunner: token budget cap — stops when estimated tokens exceed budget', async () => { + // Use a very small context window so budget is tiny + const config = fakeConfig({ detected_window: 100 }); // budget = min(8000, 30) = 30 + let calls = 0; + const result = await withStubbedFetch(async (url, opts) => { + calls++; + const bigContent = 'x'.repeat(500); // large response inflates history + return { + ok: true, + json: async () => ({ + choices: [{ + message: { content: bigContent, tool_calls: null }, + finish_reason: 'stop', + }], + usage: { prompt_tokens: 200, completion_tokens: 100 }, + }), + }; + }, async () => { + const runner = new AgentRunner(fakeAgent(), { config, flags: {}, tui: { renderDiff: () => null } }); + return runner.run('task'); + }); + // Either we hit token budget immediately (returning at step 0 or 1) or + // after the first response bloats history. In any case run() must not throw. + assert.ok(result, 'must return a result'); + assert.ok(typeof result.output === 'string'); +}); + +test('AgentRunner: invalid tool args JSON uses {} and lets tool error gracefully', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch( + makeToolThenTextFetch('read_file', null, 'done after error'), + async () => { + // Manually build a runner that will receive bad JSON args + const runner = new AgentRunner(fakeAgent({ tools: ['read_file'] }), { config, flags: {}, tui: { renderDiff: () => null } }); + // Patch the tool call to have bad JSON + const orig = global.fetch; + let call = 0; + global.fetch = async (url, opts) => { + call++; + if (call === 1) { + return { + ok: true, + json: async () => ({ + choices: [{ + message: { + content: null, + tool_calls: [{ id: 'tc1', function: { name: 'read_file', arguments: 'NOT_VALID_JSON' } }], + }, + finish_reason: 'tool_calls', + }], + usage: { prompt_tokens: 5, completion_tokens: 2 }, + }), + }; + } + return { + ok: true, + json: async () => ({ + choices: [{ message: { content: 'recovered', tool_calls: null }, finish_reason: 'stop' }], + usage: { prompt_tokens: 5, completion_tokens: 2 }, + }), + }; + }; + try { + return await runner.run('task with bad args'); + } finally { + global.fetch = orig; + } + }, + ); + // Should not throw; agent should continue after the bad tool call + assert.ok(result); + assert.ok(typeof result.output === 'string'); +}); + +// ── runTeam pipeline ────────────────────────────────────────────────────────── + +test('runTeam: sequential pipeline pipes output → next agent input', async () => { + const config = fakeConfig(); + let callCount = 0; + const received = []; + + await withStubbedFetch(async (url, opts) => { + callCount++; + const body = JSON.parse(opts.body); + const userMsg = body.messages.find(m => m.role === 'user'); + received.push(userMsg?.content); + const out = callCount === 1 ? 'output from alpha' : 'output from beta'; + return { + ok: true, + json: async () => ({ + choices: [{ message: { content: out, tool_calls: null }, finish_reason: 'stop' }], + usage: { prompt_tokens: 5, completion_tokens: 3 }, + }), + }; + }, async () => { + // Build a stub AgentLoader + const { AgentLoader } = require('../src/plugins/agent_loader'); + const dir = freshProject(); + fs.mkdirSync(path.join(dir, '.smallcode', 'agents'), { recursive: true }); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'alpha.md'), '---\nname: alpha\ntools: [read_file]\n---\nbody\n'); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'beta.md'), '---\nname: beta\ntools: [read_file]\n---\nbody\n'); + const agentLoader = new AgentLoader(dir); + const teamDef = { name: 'test-team', agents: ['alpha', 'beta'] }; + const ctx = { config, flags: {}, tui: { renderDiff: () => null }, skillManager: null }; + return runTeam(teamDef, 'initial task', ctx, agentLoader); + }); + + assert.equal(callCount, 2, 'should call model once per agent'); + // First agent receives the initial task + assert.equal(received[0], 'initial task'); + // Second agent receives first agent's output + assert.equal(received[1], 'output from alpha'); +}); + +test('runTeam: unknown agent produces error entry and continues pipeline', async () => { + const config = fakeConfig(); + await withStubbedFetch(makeTextResponse('beta output'), async () => { + const { AgentLoader } = require('../src/plugins/agent_loader'); + const dir = freshProject(); + fs.mkdirSync(path.join(dir, '.smallcode', 'agents'), { recursive: true }); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'beta.md'), '---\nname: beta\ntools: [read_file]\n---\nbody\n'); + const agentLoader = new AgentLoader(dir); + const teamDef = { name: 'test-team', agents: ['nonexistent', 'beta'] }; + const ctx = { config, flags: {}, tui: { renderDiff: () => null }, skillManager: null }; + const result = await runTeam(teamDef, 'task', ctx, agentLoader); + assert.ok(result.perAgent[0].error, 'first agent should have error'); + assert.equal(result.perAgent[0].name, 'nonexistent'); + assert.equal(result.perAgent[1].name, 'beta'); + assert.ok(!result.perAgent[1].error, 'beta should succeed'); + assert.equal(result.output, 'beta output'); + }); +}); + +test('runTeam: accumulates tokens across agents', async () => { + const config = fakeConfig(); + const result = await withStubbedFetch(makeTextResponse('out', { prompt_tokens: 10, completion_tokens: 5 }), async () => { + const { AgentLoader } = require('../src/plugins/agent_loader'); + const dir = freshProject(); + fs.mkdirSync(path.join(dir, '.smallcode', 'agents'), { recursive: true }); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'a1.md'), '---\nname: a1\ntools: [read_file]\n---\nbody\n'); + fs.writeFileSync(path.join(dir, '.smallcode', 'agents', 'a2.md'), '---\nname: a2\ntools: [read_file]\n---\nbody\n'); + const agentLoader = new AgentLoader(dir); + const teamDef = { name: 'tok-team', agents: ['a1', 'a2'] }; + const ctx = { config, flags: {}, tui: { renderDiff: () => null }, skillManager: null }; + return runTeam(teamDef, 'task', ctx, agentLoader); + }); + // 2 agents × 15 tokens each = 30 total + assert.equal(result.tokens, 30); + assert.equal(result.perAgent.length, 2); +});