diff --git a/bin/commands.js b/bin/commands.js index 9ba5ebc8..e1a7e11c 100644 --- a/bin/commands.js +++ b/bin/commands.js @@ -851,6 +851,7 @@ module.exports = function createCommandHandler(config, conversationHistory, impr console.log(` ${chalk.cyan('/budget')} ${chalk.gray('Show context window budget')}`); console.log(` ${chalk.cyan('/mcp')} ${chalk.gray('Show connected MCP servers')}`); console.log(` ${chalk.cyan('/skill')} ${chalk.gray('Manage reusable skills')}`); + console.log(` ${chalk.cyan('/evolve')} ${chalk.gray('Propose a new skill from session friction (list|promote|log)')}`); console.log(` ${chalk.cyan('/plugin')} ${chalk.gray('List installed plugins')}`); console.log(` ${chalk.cyan('/provider')} ${chalk.gray('Configure LLM provider (interactive wizard)')}`); console.log(` ${chalk.cyan('/sessions')} ${chalk.gray('List/resume saved sessions')}`); @@ -863,6 +864,161 @@ module.exports = function createCommandHandler(config, conversationHistory, impr rl.prompt(); return; + case '/evolve': { + const { SkillManager } = require('../src/plugins/skills'); + const sm = new SkillManager(process.cwd()); + const sub = (parts[1] || '').trim(); + + if (sub === 'list') { + const drafts = sm.listDrafts(); + if (drafts.length === 0) { + console.log(chalk.gray(' No skill drafts. Run /evolve to analyze recent sessions.')); + } else { + console.log(chalk.bold(` Drafts (${drafts.length}) — promote with /evolve promote :`)); + for (const d of drafts) console.log(` ${chalk.cyan(d)}`); + } + console.log(''); + rl.prompt(); + return; + } + + if (sub === 'promote') { + const name = (parts[2] || '').trim(); + if (!name) { console.log(chalk.gray(' Usage: /evolve promote ')); } + else { + const target = sm.promoteDraft(name); + if (target) console.log(` ${chalk.green('✓')} Promoted to ${chalk.cyan(target)} — active next session.`); + else console.log(chalk.red(` Draft "${name}" not found (or a live skill with that name exists).`)); + } + console.log(''); + rl.prompt(); + return; + } + + if (sub === 'log') { + const { readEntries } = require('../src/plugins/audit_log'); + const entries = readEntries(path.join(process.cwd(), '.smallcode', 'evolver-audit.jsonl'), 10); + if (entries.length === 0) console.log(chalk.gray(' No evolution events logged yet.')); + for (const e of entries) { + console.log(` ${chalk.gray(e.ts)} ${chalk.cyan(e.name)} ${chalk.gray(e.rationale.slice(0, 60))}`); + } + console.log(''); + rl.prompt(); + return; + } + + // No sub-command: run an evolution pass + const { TraceRecorder } = require('./trace_recorder'); + const { extractFrictionSignals, formatReportForPrompt } = require('../src/plugins/friction_analyzer'); + const evolver = require('../src/plugins/evolver'); + + const tr = new TraceRecorder(process.cwd()); + const traceList = tr.list().slice(0, 20); + if (traceList.length < 3) { + console.log(chalk.gray(` Only ${traceList.length} trace(s) recorded — need at least 3 sessions of data.`)); + console.log(''); + rl.prompt(); + return; + } + const traces = traceList.map(t => tr.load(t.id)).filter(Boolean); + + const skillKeywords = sm.list().flatMap(s => s.keywords || []); + const report = extractFrictionSignals(traces, { skillKeywords }); + const signalCount = report.repeated_patterns.length + report.tool_retry_loops.length; + if (signalCount === 0) { + console.log(chalk.gray(` No friction patterns in last ${traces.length} traces. Nothing to evolve.`)); + console.log(''); + rl.prompt(); + return; + } + + console.log(chalk.bold(` Friction signals (${signalCount}):`)); + console.log(chalk.gray(formatReportForPrompt(report).split('\n').map(l => ' ' + l).join('\n'))); + + // LLM judgment — route to the strong tier when configured + const { getModelTarget, buildAuthHeaders, withModelTarget } = require('./config'); + const target = getModelTarget(config, 'strong'); + process.stdout.write(chalk.gray(` Asking ${target.model} for a proposal... `)); + + const sysPrompt = 'You design reusable skills for a coding agent. A skill is a short markdown instruction injected when relevant. Given friction signals from recent sessions, propose ONE skill addressing the most impactful pattern. Respond with ONLY a JSON object: {"name": "kebab-case-name", "description": "one line", "trigger": "match", "keywords": ["k1","k2"], "body": "markdown instructions for the agent", "rationale": "why this helps"}'; + let proposalRaw = null; + try { + const resp = await fetch(`${target.baseUrl}/chat/completions`, { + method: 'POST', + headers: buildAuthHeaders(withModelTarget(config, target)), + body: JSON.stringify({ + model: target.model, + messages: [ + { role: 'system', content: sysPrompt }, + { role: 'user', content: `Friction signals:\n${formatReportForPrompt(report)}` }, + ], + temperature: 0.2, + max_tokens: 1024, + }), + }); + if (resp.ok) { + const data = await resp.json(); + proposalRaw = data?.choices?.[0]?.message?.content || null; + } else { + console.log(chalk.red(`HTTP ${resp.status}`)); + } + } catch (e) { + console.log(chalk.red(e.message)); + } + if (!proposalRaw) { console.log(''); rl.prompt(); return; } + + // Forgiving parse: strict JSON → fenced JSON → abort with raw output + let parsed = null; + try { parsed = JSON.parse(proposalRaw); } catch { + const m = proposalRaw.match(/\{[\s\S]*\}/); + if (m) { try { parsed = JSON.parse(m[0]); } catch {} } + } + if (!parsed) { + console.log(chalk.yellow('could not parse')); + console.log(chalk.gray(' Raw model output (nothing written):')); + console.log(chalk.gray(' ' + proposalRaw.slice(0, 500).split('\n').join('\n '))); + console.log(''); + rl.prompt(); + return; + } + console.log(chalk.green('ok')); + + const proposal = evolver.buildSkillProposal( + String(parsed.name || ''), String(parsed.description || ''), String(parsed.body || ''), + { trigger: parsed.trigger, keywords: parsed.keywords, rationale: String(parsed.rationale || '') } + ); + const errors = evolver.validateProposal(proposal); + if (errors.length) { + console.log(chalk.red(` Proposal rejected: ${errors.join('; ')}`)); + console.log(''); + rl.prompt(); + return; + } + const collision = evolver.checkNameCollision(proposal.name, process.cwd()); + if (collision) { + console.log(chalk.red(` Name collision with ${collision} — nothing written.`)); + console.log(''); + rl.prompt(); + return; + } + + const run = new evolver.EvolverRun(); + const draftPath = run.writeDraft(proposal, process.cwd()); + evolver.logCreateEvent( + path.join(process.cwd(), '.smallcode', 'evolver-audit.jsonl'), + proposal, proposal.rationale, + report.repeated_patterns.flatMap(p => p.traceIds).concat(report.tool_retry_loops.flatMap(l => l.traceIds)) + ); + + console.log(''); + console.log(` ${chalk.green('✓')} Draft: ${chalk.cyan(draftPath)}`); + console.log(chalk.gray(` "${proposal.description}"`)); + console.log(chalk.gray(` Review the file, then: /evolve promote ${proposal.name}`)); + console.log(''); + rl.prompt(); + return; + } + case '/provider': { const sub = (parts[1] || '').trim(); if (sub === 'status' || sub === '--status' || sub === '-s') { diff --git a/src/plugins/audit_log.js b/src/plugins/audit_log.js new file mode 100644 index 00000000..12b2591d --- /dev/null +++ b/src/plugins/audit_log.js @@ -0,0 +1,33 @@ +// SmallCode — Evolution Audit Log +// Thin JSONL appender/reader for evolver create events. One JSON object per +// line; append-only. Writes are atomic (tmp + rename) so a crash mid-write +// never corrupts existing history. + +const fs = require('fs'); +const path = require('path'); + +function appendEntry(filePath, entry) { + const dir = path.dirname(filePath); + if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); + const line = JSON.stringify(entry) + '\n'; + // Read-modify-write atomically: copy existing content + new line to a tmp + // file, then rename over the original. + let existing = ''; + try { existing = fs.readFileSync(filePath, 'utf-8'); } catch {} + const tmpPath = filePath + `.tmp.${process.pid}.${Date.now()}`; + fs.writeFileSync(tmpPath, existing + line, 'utf-8'); + fs.renameSync(tmpPath, filePath); +} + +function readEntries(filePath, limit = 100) { + let content = ''; + try { content = fs.readFileSync(filePath, 'utf-8'); } catch { return []; } + const entries = []; + for (const line of content.split('\n')) { + if (!line.trim()) continue; + try { entries.push(JSON.parse(line)); } catch {} + } + return entries.slice(-limit); +} + +module.exports = { appendEntry, readEntries }; diff --git a/src/plugins/evolver.js b/src/plugins/evolver.js new file mode 100644 index 00000000..139a3123 --- /dev/null +++ b/src/plugins/evolver.js @@ -0,0 +1,174 @@ +// SmallCode — Evolver (create-mode mechanics) +// Deterministic mechanics behind the /evolve command: proposal building, +// validation, name-collision checking, quarantined draft writing, audit +// logging, and structural enforcement of the 1-create-per-run cap. +// +// The fuzzy judgment (is this friction worth a skill?) happens in the +// command handler via an LLM call. Everything here is pure mechanics so it +// can be unit-tested without a model. +// +// Safety rules (mirrors the create-mode evolver pattern): +// - Drafts only: writes go to .smallcode/skills/drafts/, never live dirs +// - Never deletes, never commits +// - validateProposal must pass before any write +// - EvolverRun raises on the 2nd create in a single run + +const fs = require('fs'); +const path = require('path'); +const { appendEntry } = require('./audit_log'); + +const MAX_CREATES_PER_RUN = 1; +const NAME_RE = /^[A-Za-z0-9_-]+$/; +const VALID_TRIGGERS = new Set(['manual', 'auto', 'match']); + +class ProposalCapExceededError extends Error {} + +// ── Builders ────────────────────────────────────────────────────────────── + +function buildSkillProposal(name, description, body, options = {}) { + return { + kind: 'create', + artefact: 'skill', + name, + description, + body, + trigger: options.trigger || 'manual', + keywords: Array.isArray(options.keywords) ? options.keywords : [], + rationale: options.rationale || '', + }; +} + +// ── Validation ──────────────────────────────────────────────────────────── + +function validateProposal(proposal) { + const errors = []; + if (!proposal || typeof proposal !== 'object') return ['proposal must be an object']; + + if (proposal.artefact !== 'skill') { + errors.push(`artefact must be "skill", got ${JSON.stringify(proposal.artefact)}`); + } + if (typeof proposal.name !== 'string' || !NAME_RE.test(proposal.name)) { + errors.push('name must be a non-empty alphanumeric/-_ string'); + } + if (typeof proposal.description !== 'string' || !proposal.description.trim()) { + errors.push('description must be a non-empty string'); + } else if (/[\r\n]/.test(proposal.description)) { + errors.push('description must not contain newlines (frontmatter-injection risk)'); + } + if (typeof proposal.body !== 'string' || !proposal.body.trim()) { + errors.push('body must be a non-empty string'); + } + if (!VALID_TRIGGERS.has(proposal.trigger)) { + errors.push(`trigger must be one of manual|auto|match, got ${JSON.stringify(proposal.trigger)}`); + } + if (proposal.trigger === 'match' && (!Array.isArray(proposal.keywords) || proposal.keywords.length === 0)) { + errors.push('trigger "match" requires a non-empty keywords list'); + } + return errors; +} + +// ── Name-collision check ────────────────────────────────────────────────── + +// Look for an existing skill with this name across the standard skill dirs +// (live and drafts). Returns the first matching path or null. +function checkNameCollision(name, projectDir) { + const os = require('os'); + const roots = [ + path.join(projectDir, '.smallcode', 'skills'), + path.join(os.homedir(), '.smallcode', 'skills'), + path.join(os.homedir(), '.config', 'smallcode', 'skills'), + ]; + for (const root of roots) { + for (const candidate of [ + path.join(root, `${name}.md`), + path.join(root, name, 'SKILL.md'), + path.join(root, 'drafts', `${name}.md`), + ]) { + if (fs.existsSync(candidate)) return candidate; + } + } + return null; +} + +// ── Draft writer ────────────────────────────────────────────────────────── + +function _skillMd(proposal) { + const fm = [ + '---', + `name: ${proposal.name}`, + `description: ${proposal.description}`, + `trigger: ${proposal.trigger}`, + proposal.keywords.length ? `keywords: [${proposal.keywords.join(', ')}]` : null, + '---', + ].filter(Boolean).join('\n'); + let body = proposal.body.trim() + '\n'; + if (proposal.rationale) { + body += `\n/g, '')} -->\n`; + } + return `${fm}\n${body}`; +} + +function writeDraft(proposal, projectDir) { + const errors = validateProposal(proposal); + if (errors.length) throw new Error(`invalid proposal: ${errors.join('; ')}`); + + const draftsDir = path.resolve(projectDir, '.smallcode', 'skills', 'drafts'); + const target = path.resolve(draftsDir, `${proposal.name}.md`); + // Path containment — name is already validated, but defend anyway + if (!target.startsWith(draftsDir + path.sep)) { + throw new Error(`draft path escapes drafts dir: ${target}`); + } + if (!fs.existsSync(draftsDir)) fs.mkdirSync(draftsDir, { recursive: true }); + const tmpPath = target + `.tmp.${process.pid}.${Date.now()}`; + fs.writeFileSync(tmpPath, _skillMd(proposal), 'utf-8'); + fs.renameSync(tmpPath, target); + return target; +} + +// ── Audit log ───────────────────────────────────────────────────────────── + +function logCreateEvent(auditPath, proposal, rationale, sourceTraceIds) { + appendEntry(auditPath, { + ts: new Date().toISOString(), + kind: 'create', + artefact: proposal.artefact, + name: proposal.name, + rationale: rationale || proposal.rationale || '', + source_traces: Array.isArray(sourceTraceIds) ? sourceTraceIds : [], + }); +} + +// ── Per-run cap (structural) ────────────────────────────────────────────── + +// Stateful tracker enforcing the create cap by construction. Use this, not +// writeDraft directly, when running an evolution pass. +class EvolverRun { + constructor(maxCreates = MAX_CREATES_PER_RUN) { + this.maxCreates = maxCreates; + this.createsSoFar = 0; + this.written = []; + } + + writeDraft(proposal, projectDir) { + if (proposal && proposal.kind === 'create' && this.createsSoFar >= this.maxCreates) { + throw new ProposalCapExceededError( + `already wrote ${this.createsSoFar} create(s); cap is ${this.maxCreates}` + ); + } + const target = writeDraft(proposal, projectDir); + if (proposal.kind === 'create') this.createsSoFar++; + this.written.push(target); + return target; + } +} + +module.exports = { + buildSkillProposal, + validateProposal, + checkNameCollision, + writeDraft, + logCreateEvent, + EvolverRun, + ProposalCapExceededError, + MAX_CREATES_PER_RUN, +}; diff --git a/src/plugins/friction_analyzer.js b/src/plugins/friction_analyzer.js new file mode 100644 index 00000000..0165db58 --- /dev/null +++ b/src/plugins/friction_analyzer.js @@ -0,0 +1,133 @@ +// SmallCode — Friction Analyzer +// Deterministic friction-signal extraction from saved traces. No LLM calls — +// this produces the evidence the /evolve command hands to the model for +// judgment. +// +// Signals: +// - repeated_patterns: near-duplicate prompts appearing 3+ times with no +// matching skill keyword (the user keeps asking for the same thing by hand) +// - tool_retry_loops: 3+ consecutive failed calls of the same tool against +// the same file within a trace (the model keeps fighting the same wall) + +const REPEAT_THRESHOLD = 3; +const RETRY_THRESHOLD = 3; +const SIMILARITY_THRESHOLD = 0.5; + +// Filler words carry no task identity but dilute Jaccard similarity — +// "another seating chart please" must cluster with "a seating chart for..." +const STOPWORDS = new Set([ + 'the', 'and', 'for', 'with', 'that', 'this', 'these', 'those', 'from', + 'into', 'onto', 'please', 'can', 'you', 'could', 'would', 'will', + 'another', 'again', 'new', 'now', 'just', 'some', 'all', 'any', + 'make', 'give', 'get', 'want', 'need', 'like', +]); + +function _wordSet(text) { + return new Set( + String(text || '').toLowerCase().split(/[^a-z0-9]+/) + .filter(w => w.length > 2 && !STOPWORDS.has(w)) + ); +} + +function _jaccard(a, b) { + if (a.size === 0 && b.size === 0) return 0; + let inter = 0; + for (const w of a) if (b.has(w)) inter++; + return inter / (a.size + b.size - inter); +} + +function _isError(result) { + const s = String(result || ''); + return s.startsWith('✗') || /"error"\s*:/.test(s) || /^Error[:\s]/.test(s); +} + +// Group traces whose prompts are near-duplicates (Jaccard on word sets). +function _findRepeatedPatterns(traces, skillKeywords) { + const groups = []; // { words, prompts, traceIds } + for (const t of traces) { + const words = _wordSet(t.prompt); + if (words.size === 0) continue; + let placed = false; + for (const g of groups) { + if (_jaccard(words, g.words) >= SIMILARITY_THRESHOLD) { + g.prompts.push(t.prompt); + g.traceIds.push(t.id); + for (const w of words) g.words.add(w); + placed = true; + break; + } + } + if (!placed) groups.push({ words, prompts: [t.prompt], traceIds: [t.id] }); + } + + return groups + .filter(g => g.prompts.length >= REPEAT_THRESHOLD) + // Skip patterns a skill already covers (any keyword hits the group words) + .filter(g => !skillKeywords.some(kw => g.words.has(String(kw).toLowerCase()))) + .map(g => ({ + pattern: g.prompts[0].slice(0, 120), + count: g.prompts.length, + traceIds: g.traceIds, + })); +} + +// Detect consecutive failed calls of the same tool+file within each trace. +function _findToolRetryLoops(traces) { + const loops = []; + for (const t of traces) { + let runTool = null, runFile = null, failCount = 0; + const flush = () => { + if (failCount >= RETRY_THRESHOLD) { + loops.push({ tool: runTool, file: runFile, failCount, traceIds: [t.id] }); + } + runTool = null; runFile = null; failCount = 0; + }; + for (const step of t.steps || []) { + if (step.type !== 'tool_call') continue; + let file = ''; + try { + const args = typeof step.args === 'string' ? JSON.parse(step.args) : (step.args || {}); + file = args.path || args.file || ''; + } catch {} + const failed = _isError(step.result); + if (failed && step.name === runTool && file === runFile) { + failCount++; + } else { + flush(); + if (failed) { runTool = step.name; runFile = file; failCount = 1; } + } + } + flush(); + } + return loops; +} + +/** + * @param {object[]} traces - full trace objects (TraceRecorder.load shape) + * @param {object} options - { skillKeywords: string[] } keywords of existing skills + * @returns FrictionReport + */ +function extractFrictionSignals(traces, options = {}) { + const skillKeywords = options.skillKeywords || []; + const safe = (traces || []).filter(t => t && typeof t === 'object'); + return { + repeated_patterns: _findRepeatedPatterns(safe, skillKeywords), + tool_retry_loops: _findToolRetryLoops(safe), + analyzed_traces: safe.length, + }; +} + +// Compact text rendering of a friction report for the LLM prompt — counts +// and short descriptions only, never full trace content (budget guard). +function formatReportForPrompt(report) { + const lines = []; + for (const p of report.repeated_patterns) { + lines.push(`- Repeated request (${p.count}x): "${p.pattern}"`); + } + for (const l of report.tool_retry_loops) { + lines.push(`- Tool retry loop: ${l.tool} failed ${l.failCount}x in a row on ${l.file || '(no file)'}`); + } + return lines.join('\n').slice(0, 2000); +} + +module.exports = { extractFrictionSignals, formatReportForPrompt }; diff --git a/src/plugins/skills.js b/src/plugins/skills.js index 60c06bb1..c4f5206a 100644 --- a/src/plugins/skills.js +++ b/src/plugins/skills.js @@ -16,6 +16,10 @@ // `.agents/skills` or `.claude/skills` typically have no frontmatter — they // are treated as `manual`-trigger skills named after their parent directory. // +// The standard skill dirs also accept the nested `/SKILL.md` layout and +// flat `.md` files without frontmatter (named after the file) — both were +// previously skipped silently (closes #81). README-style files are ignored. +// // Frontmatter accepts both LF and CRLF line endings (closes #52). const fs = require('fs'); @@ -24,6 +28,8 @@ const os = require('os'); const FM_RE = /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/; const KV_RE = /^(\w+)\s*:\s*(.+?)\s*$/; +// Docs that live alongside skills but aren't skills themselves +const NON_SKILL_MD = /^(readme|changelog|license|contributing)\.md$/i; class SkillManager { constructor(projectDir) { @@ -71,14 +77,23 @@ class SkillManager { if (!dir || !fs.existsSync(dir)) return; let entries; try { - entries = fs.readdirSync(dir); + entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; } for (const entry of entries) { - if (!entry.endsWith('.md')) continue; - const full = path.join(dir, entry); - this._ingestFile(full, entry, dir); + if (entry.isDirectory()) { + // drafts/ is quarantined — evolver proposals live there until a + // human promotes them (/evolve promote ). Never auto-load. + if (entry.name === 'drafts') continue; + // //SKILL.md inside a standard skill dir — users following + // the Claude Code layout expect this to work (closes #81) + this._loadSkillFolder(path.join(dir, entry.name), entry.name); + continue; + } + if (!entry.name.endsWith('.md') || NON_SKILL_MD.test(entry.name)) continue; + const full = path.join(dir, entry.name); + this._ingestFile(full, entry.name, dir, entry.name.replace(/\.md$/i, ''), 'flat'); } } @@ -92,38 +107,41 @@ class SkillManager { } for (const d of dirs) { if (!d.isDirectory()) continue; - const skillDir = path.join(root, d.name); - // Look for SKILL.md, skill.md, or any .md file inside the folder. - let skillFile = null; - const candidates = ['SKILL.md', 'skill.md', 'Skill.md']; - for (const c of candidates) { - const p = path.join(skillDir, c); - if (fs.existsSync(p)) { skillFile = p; break; } - } - if (!skillFile) { - // Fall back to first .md in the folder - try { - const md = fs.readdirSync(skillDir).find(f => f.endsWith('.md')); - if (md) skillFile = path.join(skillDir, md); - } catch {} - } - if (!skillFile) continue; - this._ingestFile(skillFile, path.basename(skillFile), skillDir, d.name); + this._loadSkillFolder(path.join(root, d.name), d.name); + } + } + + _loadSkillFolder(skillDir, name) { + // Look for SKILL.md, skill.md, or any .md file inside the folder. + let skillFile = null; + const candidates = ['SKILL.md', 'skill.md', 'Skill.md']; + for (const c of candidates) { + const p = path.join(skillDir, c); + if (fs.existsSync(p)) { skillFile = p; break; } + } + if (!skillFile) { + // Fall back to first .md in the folder + try { + const md = fs.readdirSync(skillDir).find(f => f.endsWith('.md')); + if (md) skillFile = path.join(skillDir, md); + } catch {} } + if (!skillFile) return; + this._ingestFile(skillFile, path.basename(skillFile), skillDir, name, 'nested'); } - _ingestFile(filePath, filename, dir, defaultName) { + _ingestFile(filePath, filename, dir, defaultName, origin) { let content; try { content = fs.readFileSync(filePath, 'utf-8'); } catch { return; } - const skill = this._parse(content, filename, dir, defaultName); + const skill = this._parse(content, filename, dir, defaultName, origin); if (skill) this.skills.set(skill.name, skill); } - _parse(content, filename, dir, defaultName) { + _parse(content, filename, dir, defaultName, origin) { // Parse YAML frontmatter (CRLF + LF tolerant — closes #52) const fmMatch = content.match(FM_RE); let frontmatter = ''; @@ -133,9 +151,10 @@ class SkillManager { frontmatter = fmMatch[1]; body = fmMatch[2]; } else if (!defaultName) { - // Flat-layout files without frontmatter aren't skills (could be a - // README). Nested-layout (.agents/skills//SKILL.md) files are - // accepted as plain-body skills using the parent directory name. + // Files without frontmatter and no derivable name aren't skills. + // Flat + nested loaders always pass a defaultName, so frontmatter-less + // files load as manual skills (closes #81); README-style files are + // filtered by name in _loadFlat. return null; } @@ -155,11 +174,11 @@ class SkillManager { return { name: meta.name || defaultName || filename.replace(/\.md$/i, ''), - trigger: meta.trigger || (defaultName ? 'manual' : 'manual'), + trigger: meta.trigger || 'manual', keywords: Array.isArray(meta.keywords) ? meta.keywords : [], content: body.trim(), path: path.join(dir, filename), - origin: defaultName ? 'nested' : 'flat', + origin: origin || (defaultName ? 'nested' : 'flat'), }; } @@ -228,6 +247,33 @@ class SkillManager { return skill; } + // Promote a quarantined draft (.smallcode/skills/drafts/.md) into + // the live project skill dir and load it. Returns the new path or null. + promoteDraft(name) { + const safe = String(name || '').replace(/[^a-z0-9-_]/gi, ''); + if (!safe) return null; + const draftsDir = path.join(this.projectDir, '.smallcode', 'skills', 'drafts'); + const source = path.join(draftsDir, `${safe}.md`); + if (!fs.existsSync(source)) return null; + const target = path.join(this.projectDir, '.smallcode', 'skills', `${safe}.md`); + if (fs.existsSync(target)) return null; // never overwrite a live skill + fs.renameSync(source, target); + this._ingestFile(target, `${safe}.md`, path.dirname(target), safe, 'flat'); + return target; + } + + // List quarantined drafts (names only) + listDrafts() { + const draftsDir = path.join(this.projectDir, '.smallcode', 'skills', 'drafts'); + try { + return fs.readdirSync(draftsDir) + .filter(f => f.endsWith('.md')) + .map(f => f.replace(/\.md$/i, '')); + } catch { + return []; + } + } + // Remove a skill remove(name) { const skill = this.skills.get(name); diff --git a/src/tui/fullscreen.js b/src/tui/fullscreen.js index 6a4c935f..5dd8c02a 100644 --- a/src/tui/fullscreen.js +++ b/src/tui/fullscreen.js @@ -192,6 +192,7 @@ class FullScreenTUI { { cmd: '/cognition', alias: null, desc: 'MarrowScript cognition status' }, { cmd: '/mcp', alias: null, desc: 'Connected MCP servers' }, { cmd: '/skill', alias: null, desc: 'Manage reusable skills' }, + { cmd: '/evolve', alias: null, desc: 'Propose skill from session friction' }, { cmd: '/plugin', alias: null, desc: 'Manage plugins' }, { cmd: '/sessions', alias: null, desc: 'List/resume sessions' }, { cmd: '/session', alias: null, desc: 'Parallel sessions' }, diff --git a/test/evolver.test.js b/test/evolver.test.js new file mode 100644 index 00000000..4fcf3d0e --- /dev/null +++ b/test/evolver.test.js @@ -0,0 +1,253 @@ +'use strict'; + +// SmallCode — Evolver (create-mode) tests +// Pins the deterministic mechanics behind /evolve: proposal validation, +// quarantined draft writing, the structural 1-create-per-run cap, friction +// extraction from traces, and the SkillManager drafts quarantine. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('node:fs'); +const os = require('node:os'); +const path = require('node:path'); + +const evolver = require('../src/plugins/evolver'); +const { extractFrictionSignals, formatReportForPrompt } = require('../src/plugins/friction_analyzer'); +const { appendEntry, readEntries } = require('../src/plugins/audit_log'); +const { SkillManager } = require('../src/plugins/skills'); + +function freshProject() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'sc-evolver-')); +} + +function trace(id, prompt, steps = []) { + return { id, prompt, steps, tokens: { prompt: 0, completion: 0 } }; +} + +function failedStep(tool, file) { + return { type: 'tool_call', name: tool, args: JSON.stringify({ path: file }), result: '✗ failed' }; +} + +// ── Proposal building + validation ─────────────────────────────────────── + +test('buildSkillProposal returns a complete create proposal', () => { + const p = evolver.buildSkillProposal('my-skill', 'does things', 'Body here.', { + trigger: 'match', keywords: ['foo'], rationale: 'seen 3x', + }); + assert.equal(p.kind, 'create'); + assert.equal(p.artefact, 'skill'); + assert.equal(p.trigger, 'match'); + assert.deepEqual(p.keywords, ['foo']); +}); + +test('validateProposal accepts a valid proposal', () => { + const p = evolver.buildSkillProposal('ok-name', 'desc', 'body'); + assert.deepEqual(evolver.validateProposal(p), []); +}); + +test('validateProposal rejects bad names, empty fields, newline descriptions', () => { + const bad = (over) => evolver.validateProposal({ + ...evolver.buildSkillProposal('ok', 'desc', 'body'), ...over, + }); + assert.ok(bad({ name: 'has space' }).length > 0); + assert.ok(bad({ name: '../traverse' }).length > 0); + assert.ok(bad({ name: '' }).length > 0); + assert.ok(bad({ description: '' }).length > 0); + assert.ok(bad({ description: 'line1\nline2' }).length > 0, 'newline = frontmatter injection'); + assert.ok(bad({ body: ' ' }).length > 0); + assert.ok(bad({ trigger: 'bogus' }).length > 0); +}); + +test('validateProposal requires keywords for match trigger', () => { + const p = evolver.buildSkillProposal('m', 'd', 'b', { trigger: 'match', keywords: [] }); + assert.ok(evolver.validateProposal(p).length > 0); +}); + +// ── Collision check ─────────────────────────────────────────────────────── + +test('checkNameCollision finds existing flat and draft skills', () => { + const dir = freshProject(); + const skillsDir = path.join(dir, '.smallcode', 'skills'); + fs.mkdirSync(path.join(skillsDir, 'drafts'), { recursive: true }); + fs.writeFileSync(path.join(skillsDir, 'live-skill.md'), '---\nname: live-skill\n---\nx'); + fs.writeFileSync(path.join(skillsDir, 'drafts', 'pending.md'), '---\nname: pending\n---\nx'); + + assert.ok(evolver.checkNameCollision('live-skill', dir)); + assert.ok(evolver.checkNameCollision('pending', dir)); + assert.equal(evolver.checkNameCollision('brand-new', dir), null); +}); + +// ── Draft writing + cap ─────────────────────────────────────────────────── + +test('writeDraft writes to drafts/ quarantine with frontmatter', () => { + const dir = freshProject(); + const p = evolver.buildSkillProposal('drafted', 'a draft', 'Draft body.', { rationale: 'why' }); + const target = evolver.writeDraft(p, dir); + assert.match(target, /[\\/]drafts[\\/]drafted\.md$/); + const content = fs.readFileSync(target, 'utf-8'); + assert.match(content, /^---\nname: drafted\n/); + assert.match(content, /Draft body\./); + assert.match(content, /Rationale: why/); +}); + +test('writeDraft refuses invalid proposals', () => { + const dir = freshProject(); + assert.throws(() => evolver.writeDraft({ artefact: 'skill', name: 'x y', body: 'b' }, dir)); +}); + +test('EvolverRun allows one create, raises on the second', () => { + const dir = freshProject(); + const run = new evolver.EvolverRun(); + run.writeDraft(evolver.buildSkillProposal('first', 'd', 'b'), dir); + assert.throws( + () => run.writeDraft(evolver.buildSkillProposal('second', 'd', 'b'), dir), + evolver.ProposalCapExceededError + ); + assert.equal(run.createsSoFar, 1); +}); + +// ── Friction analysis ───────────────────────────────────────────────────── + +test('extractFrictionSignals returns empty report for no traces', () => { + const r = extractFrictionSignals([]); + assert.deepEqual(r.repeated_patterns, []); + assert.deepEqual(r.tool_retry_loops, []); + assert.equal(r.analyzed_traces, 0); +}); + +test('three near-identical prompts flag a repeated pattern', () => { + const traces = [ + trace('a1', 'convert this csv file to json format'), + trace('a2', 'convert the csv file into json format please'), + trace('a3', 'csv file convert to json format again'), + trace('b1', 'write unit tests for the auth module'), + ]; + const r = extractFrictionSignals(traces); + assert.equal(r.repeated_patterns.length, 1); + assert.equal(r.repeated_patterns[0].count, 3); + assert.deepEqual(r.repeated_patterns[0].traceIds.sort(), ['a1', 'a2', 'a3']); +}); + +test('rephrased prompts with filler-word drift still cluster (field regression)', () => { + // Exact prompts from a real session that failed to cluster before + // stopword filtering: the third drops the names and adds filler. + const traces = [ + trace('s1', 'generate a random seating chart for my classroom students Ana, Ben, Cara, Dan, Eli and Fay'), + trace('s2', 'generate a new random seating chart for the classroom students Ana, Ben, Cara, Dan, Eli and Fay'), + trace('s3', 'generate another random seating chart for my classroom students please'), + ]; + const r = extractFrictionSignals(traces); + assert.equal(r.repeated_patterns.length, 1); + assert.equal(r.repeated_patterns[0].count, 3); +}); + +test('repeated pattern covered by an existing skill keyword is suppressed', () => { + const traces = [ + trace('a1', 'convert this csv file to json format'), + trace('a2', 'convert the csv file into json format please'), + trace('a3', 'csv file convert to json format again'), + ]; + const r = extractFrictionSignals(traces, { skillKeywords: ['csv'] }); + assert.equal(r.repeated_patterns.length, 0); +}); + +test('three consecutive same-tool failures flag a retry loop', () => { + const t = trace('t1', 'fix the parser', [ + failedStep('patch', 'src/parser.js'), + failedStep('patch', 'src/parser.js'), + failedStep('patch', 'src/parser.js'), + ]); + const r = extractFrictionSignals([t]); + assert.equal(r.tool_retry_loops.length, 1); + assert.equal(r.tool_retry_loops[0].failCount, 3); + assert.equal(r.tool_retry_loops[0].tool, 'patch'); +}); + +test('interrupted failures do not flag a retry loop', () => { + const t = trace('t1', 'fix it', [ + failedStep('patch', 'a.js'), + failedStep('patch', 'a.js'), + { type: 'tool_call', name: 'read_file', args: '{"path":"a.js"}', result: 'content' }, + failedStep('patch', 'a.js'), + ]); + const r = extractFrictionSignals([t]); + assert.equal(r.tool_retry_loops.length, 0); +}); + +test('formatReportForPrompt stays compact', () => { + const r = extractFrictionSignals([ + trace('a1', 'x'.repeat(500) + ' aaa bbb ccc'), + ]); + assert.ok(formatReportForPrompt(r).length <= 2000); +}); + +// ── Drafts quarantine in SkillManager ───────────────────────────────────── + +test('SkillManager never auto-loads skills from drafts/', () => { + const dir = freshProject(); + const draftsDir = path.join(dir, '.smallcode', 'skills', 'drafts'); + fs.mkdirSync(draftsDir, { recursive: true }); + fs.writeFileSync(path.join(draftsDir, 'lurker.md'), '---\nname: lurker\ntrigger: auto\n---\nshould not load'); + + const sm = new SkillManager(dir); + assert.equal(sm.get('lurker'), null, 'draft must stay quarantined'); +}); + +test('promoteDraft moves draft live and a fresh SkillManager loads it', () => { + const dir = freshProject(); + evolver.writeDraft(evolver.buildSkillProposal('riser', 'promoted skill', 'Now live.'), dir); + + const sm = new SkillManager(dir); + assert.equal(sm.get('riser'), null); + const target = sm.promoteDraft('riser'); + assert.ok(target); + assert.ok(sm.get('riser'), 'promoted skill loads in the same manager'); + + const sm2 = new SkillManager(dir); + assert.ok(sm2.get('riser'), 'promoted skill loads in a fresh manager'); + assert.equal(sm2.listDrafts().length, 0); +}); + +test('promoteDraft never overwrites an existing live skill', () => { + const dir = freshProject(); + const skillsDir = path.join(dir, '.smallcode', 'skills'); + fs.mkdirSync(skillsDir, { recursive: true }); + fs.writeFileSync(path.join(skillsDir, 'taken.md'), '---\nname: taken\n---\noriginal'); + evolver.writeDraft(evolver.buildSkillProposal('taken', 'd', 'impostor'), dir); + + const sm = new SkillManager(dir); + assert.equal(sm.promoteDraft('taken'), null); + assert.match(fs.readFileSync(path.join(skillsDir, 'taken.md'), 'utf-8'), /original/); +}); + +test('listDrafts reports quarantined names', () => { + const dir = freshProject(); + evolver.writeDraft(evolver.buildSkillProposal('one', 'd', 'b'), dir); + const sm = new SkillManager(dir); + assert.deepEqual(sm.listDrafts(), ['one']); +}); + +// ── Audit log ───────────────────────────────────────────────────────────── + +test('audit log appends and reads back entries', () => { + const dir = freshProject(); + const file = path.join(dir, '.smallcode', 'evolver-audit.jsonl'); + appendEntry(file, { ts: 't1', kind: 'create', name: 'a' }); + appendEntry(file, { ts: 't2', kind: 'create', name: 'b' }); + const entries = readEntries(file); + assert.equal(entries.length, 2); + assert.equal(entries[1].name, 'b'); +}); + +test('logCreateEvent writes a well-formed audit row', () => { + const dir = freshProject(); + const file = path.join(dir, '.smallcode', 'evolver-audit.jsonl'); + const p = evolver.buildSkillProposal('logged', 'd', 'b', { rationale: 'because' }); + evolver.logCreateEvent(file, p, 'because', ['t1', 't2']); + const [e] = readEntries(file); + assert.equal(e.kind, 'create'); + assert.equal(e.artefact, 'skill'); + assert.equal(e.name, 'logged'); + assert.deepEqual(e.source_traces, ['t1', 't2']); + assert.ok(e.ts); +}); diff --git a/test/skills.test.js b/test/skills.test.js index 087d756c..75ecf92f 100644 --- a/test/skills.test.js +++ b/test/skills.test.js @@ -98,6 +98,42 @@ test('list() reports nested skills with origin marker', () => { assert.equal(nested.origin, 'nested'); }); +test('issue #81: nested /SKILL.md inside .smallcode/skills is detected', () => { + const dir = freshProject(); + const skillFile = path.join(dir, '.smallcode', 'skills', 'my-skill', 'SKILL.md'); + write(skillFile, '# my skill\n\nDo nested things.'); + + const sm = new SkillManager(dir); + const got = sm.get('my-skill'); + assert.ok(got, 'nested skill inside .smallcode/skills should load'); + assert.equal(got.origin, 'nested'); + assert.match(got.content, /Do nested things\./); +}); + +test('issue #81: flat .md without frontmatter loads as manual skill', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'plain.md'), + '# Plain Skill\n\nNo frontmatter here.'); + + const sm = new SkillManager(dir); + const got = sm.get('plain'); + assert.ok(got, 'frontmatter-less flat skill should load'); + assert.equal(got.trigger, 'manual'); + assert.equal(got.origin, 'flat'); + assert.match(got.content, /No frontmatter here\./); +}); + +test('issue #81: README-style files in skill dirs are not skills', () => { + const dir = freshProject(); + write(path.join(dir, '.smallcode', 'skills', 'README.md'), '# About these skills'); + write(path.join(dir, '.smallcode', 'skills', 'real.md'), + '---\nname: real\ntrigger: manual\n---\nreal body'); + + const sm = new SkillManager(dir); + assert.equal(sm.get('README'), null); + assert.ok(sm.get('real')); +}); + test('add() persists a new skill and round-trips through .smallcode/skills', () => { const dir = freshProject(); const sm = new SkillManager(dir);