From 4193125ac00f7d8ea495fd12fe727eb2ea4fdb1a Mon Sep 17 00:00:00 2001 From: vadimvlasenko Date: Wed, 1 Jul 2026 12:16:36 +0300 Subject: [PATCH 01/10] feat(analytics): add session-source detector with strategy-pattern registry --- .../__tests__/session-source-detector.test.ts | 61 ++++++++++++ .../report/session-source-detector.ts | 93 +++++++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 src/cli/commands/analytics/report/__tests__/session-source-detector.test.ts create mode 100644 src/cli/commands/analytics/report/session-source-detector.ts diff --git a/src/cli/commands/analytics/report/__tests__/session-source-detector.test.ts b/src/cli/commands/analytics/report/__tests__/session-source-detector.test.ts new file mode 100644 index 00000000..b373007f --- /dev/null +++ b/src/cli/commands/analytics/report/__tests__/session-source-detector.test.ts @@ -0,0 +1,61 @@ +import { describe, it, expect } from 'vitest'; +import { detectSessionSource, SESSION_SOURCE_DETECTORS } from '../session-source-detector.js'; +import type { NamedInvocationStats } from '../../types.js'; + +function names(list: string[]): NamedInvocationStats[] { + return list.map((name) => ({ name, totalCalls: 1, successCount: 1, failureCount: 0 })); +} + +function session(over: { skill?: string[]; agent?: string[]; command?: string[] }) { + return { + skillInvocations: names(over.skill ?? []), + agentInvocations: names(over.agent ?? []), + commandInvocations: names(over.command ?? []), + }; +} + +describe('detectSessionSource', () => { + it('labels a session with a namespaced sdlc-factory agent invocation as CodeMie AI Factory', () => { + expect(detectSessionSource(session({ agent: ['sdlc-factory:tech-analyst'] }))).toBe('CodeMie AI Factory'); + }); + + it('labels a session with an unnamespaced sdlc-light/-task/-autonomous slash command as CodeMie AI Factory', () => { + expect(detectSessionSource(session({ command: ['sdlc-light'] }))).toBe('CodeMie AI Factory'); + expect(detectSessionSource(session({ command: ['sdlc-task'] }))).toBe('CodeMie AI Factory'); + expect(detectSessionSource(session({ command: ['sdlc-autonomous'] }))).toBe('CodeMie AI Factory'); + }); + + it('labels a session with a superpowers skill as Superpowers when no sdlc-factory signal is present', () => { + expect(detectSessionSource(session({ skill: ['superpowers:test-driven-development'] }))).toBe('Superpowers'); + }); + + it('prioritizes CodeMie AI Factory over Superpowers when both are present in one session', () => { + expect(detectSessionSource(session({ skill: ['superpowers:brainstorming'], command: ['sdlc-light'] }))).toBe('CodeMie AI Factory'); + }); + + it('labels a session with an openspec-named invocation as OpenSpec', () => { + expect(detectSessionSource(session({ skill: ['openspec:apply'] }))).toBe('OpenSpec'); + expect(detectSessionSource(session({ command: ['open-spec-init'] }))).toBe('OpenSpec'); + }); + + it('labels a session with a speckit-named invocation as SpecKit', () => { + expect(detectSessionSource(session({ agent: ['speckit-planner'] }))).toBe('SpecKit'); + }); + + it('labels a session with a bmad-named invocation as BMAD', () => { + expect(detectSessionSource(session({ skill: ['bmad:architect'] }))).toBe('BMAD'); + }); + + it('falls back to Pure chat when no known signal is found', () => { + expect(detectSessionSource(session({ skill: ['some-other-skill'], command: ['analytics'] }))).toBe('Pure chat'); + expect(detectSessionSource(session({}))).toBe('Pure chat'); + }); + + it('matches case-insensitively', () => { + expect(detectSessionSource(session({ agent: ['SDLC-Factory:Foo'] }))).toBe('CodeMie AI Factory'); + }); + + it('exposes the ordered detector list for callers that need custom ordering/extension', () => { + expect(SESSION_SOURCE_DETECTORS.map((d) => d.name)).toEqual(['sdlc-factory', 'superpowers', 'openspec', 'speckit', 'bmad']); + }); +}); diff --git a/src/cli/commands/analytics/report/session-source-detector.ts b/src/cli/commands/analytics/report/session-source-detector.ts new file mode 100644 index 00000000..14553a78 --- /dev/null +++ b/src/cli/commands/analytics/report/session-source-detector.ts @@ -0,0 +1,93 @@ +/** + * Session-source classification: labels each session by the SDLC tooling/framework + * signal found in its skill/agent/command invocation names. Ordered, first-match-wins + * strategy list — new bundles register by adding an entry here, not by editing a + * branching if/else chain. Detection is name-based only (skill/agent/command names + * already captured by the report pipeline); it does not inspect the filesystem. + */ + +import type { NamedInvocationStats } from '../types.js'; + +export interface SessionInvocationNames { + skillInvocations: NamedInvocationStats[]; + agentInvocations: NamedInvocationStats[]; + commandInvocations: NamedInvocationStats[]; +} + +export interface SessionSourceDetector { + /** Stable id, e.g. 'sdlc-factory'. */ + name: string; + /** Display label shown in the report's Source column when this detector matches. */ + label: string; + /** True if any invocation name (already lowercased) signals this bundle. */ + matches(names: string[]): boolean; +} + +export const PURE_CHAT_LABEL = 'Pure chat'; + +function collectNames(session: SessionInvocationNames): string[] { + return [...session.skillInvocations, ...session.agentInvocations, ...session.commandInvocations].map((n) => + n.name.toLowerCase() + ); +} + +function hasPrefix(names: string[], prefix: string): boolean { + return names.some((n) => n.startsWith(prefix)); +} + +function hasExact(names: string[], candidates: string[]): boolean { + return names.some((n) => candidates.includes(n)); +} + +function hasSubstring(names: string[], substrings: string[]): boolean { + return names.some((n) => substrings.some((s) => n.includes(s))); +} + +// Unnamespaced slash commands from the external sdlc-factory skill bundle — distinct +// from the namespaced `sdlc-factory:` prefix seen on skill/agent invocations. +const SDLC_FACTORY_COMMANDS = ['sdlc-light', 'sdlc-task', 'sdlc-autonomous']; + +export const SESSION_SOURCE_DETECTORS: SessionSourceDetector[] = [ + { + name: 'sdlc-factory', + label: 'CodeMie AI Factory', + matches: (names) => hasPrefix(names, 'sdlc-factory:') || hasExact(names, SDLC_FACTORY_COMMANDS), + }, + { + name: 'superpowers', + label: 'Superpowers', + matches: (names) => hasPrefix(names, 'superpowers:'), + }, + { + name: 'openspec', + label: 'OpenSpec', + matches: (names) => hasSubstring(names, ['openspec', 'open-spec']), + }, + { + name: 'speckit', + label: 'SpecKit', + matches: (names) => hasSubstring(names, ['speckit', 'spec-kit']), + }, + { + name: 'bmad', + label: 'BMAD', + matches: (names) => hasSubstring(names, ['bmad']), + }, +]; + +/** + * Classify a session's tooling/framework source. Detectors are tried in order; + * the first match wins. Falls back to {@link PURE_CHAT_LABEL} when none match. + */ +export function detectSessionSource( + session: SessionInvocationNames, + detectors: SessionSourceDetector[] = SESSION_SOURCE_DETECTORS +): string { + const names = collectNames(session); + for (const detector of detectors) { + if (detector.matches(names)) { + return detector.label; + } + } + return PURE_CHAT_LABEL; +} From b91fc52967cee9663ad0a2acf0d3a0421eb47fc1 Mon Sep 17 00:00:00 2001 From: vadimvlasenko Date: Wed, 1 Jul 2026 12:17:54 +0300 Subject: [PATCH 02/10] feat(analytics): compute sessionSource in the report payload builder --- .../report/__tests__/payload-builder.test.ts | 19 +++++++++++++++++++ .../analytics/report/payload-builder.ts | 11 ++++++++--- src/cli/commands/analytics/report/types.ts | 2 ++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/cli/commands/analytics/report/__tests__/payload-builder.test.ts b/src/cli/commands/analytics/report/__tests__/payload-builder.test.ts index ed045f13..520edd6d 100644 --- a/src/cli/commands/analytics/report/__tests__/payload-builder.test.ts +++ b/src/cli/commands/analytics/report/__tests__/payload-builder.test.ts @@ -232,6 +232,25 @@ describe('buildPayload', () => { expect(s.commandInvocations).toEqual(commandInvocations); }); + it('classifies sessionSource from invocation names, defaulting to Pure chat', () => { + const withCommand = { + ...root, + projects: [{ + projectPath: '/repo/app', + branches: [{ branchName: 'main', sessions: [session({ commandInvocations: [{ name: 'sdlc-light', totalCalls: 1, successCount: 1, failureCount: 0 }] })] }], + }], + } as unknown as RootAnalytics; + const payload = buildPayload(withCommand, costIndex, summary, { + rangeLabel: 'all', projectFilter: 'all', generatedAt: '2026-06-08T00:00:00Z', + }); + expect(payload.sessions[0].sessionSource).toBe('CodeMie AI Factory'); + + const bare = buildPayload(root, costIndex, summary, { + rangeLabel: 'all', projectFilter: 'all', generatedAt: '2026-06-08T00:00:00Z', + }); + expect(bare.sessions[0].sessionSource).toBe('Pure chat'); + }); + it('maps costSeries from the SessionCost when present', () => { const idx: SessionCostIndex = new Map([ ['s1', { sessionId: 's1', tokens: { input: 0, output: 0, cacheRead: 0, cacheCreation: 0, total: 250 }, costUSD: 1, perModel: [], priced: true, hadLog: true, costSeries: [{ t: 1, cost: 0.5, tokens: 100 }, { t: 2, cost: 1, tokens: 250 }] }], diff --git a/src/cli/commands/analytics/report/payload-builder.ts b/src/cli/commands/analytics/report/payload-builder.ts index 40146749..8b765872 100644 --- a/src/cli/commands/analytics/report/payload-builder.ts +++ b/src/cli/commands/analytics/report/payload-builder.ts @@ -8,6 +8,7 @@ import type { RootAnalytics } from '../types.js'; import type { SessionCostIndex, CostSummary, AgentCoverage } from '../cost/types.js'; import { emptyUsage } from '../cost/cost-calculator.js'; import type { ReportPayload, ReportSessionRecord, ReportMeta } from './types.js'; +import { detectSessionSource } from './session-source-detector.js'; export interface PayloadContext { rangeLabel: string; @@ -40,6 +41,9 @@ export function buildPayload( seen.add(s.sessionId); const cost = costIndex.get(s.sessionId); agents.add(s.agentName); + const skillInvocations = s.skillInvocations ?? []; + const agentInvocations = s.agentInvocations ?? []; + const commandInvocations = s.commandInvocations ?? []; const cov = coverageMap.get(s.agentName) ?? { agentName: s.agentName, total: 0, priced: 0, withLog: 0 }; cov.total += 1; if (cost?.hadLog) { @@ -81,9 +85,10 @@ export function buildPayload( perModelCost: cost?.perModel ?? [], ...(cost?.costSeries && cost.costSeries.length ? { costSeries: cost.costSeries } : {}), ...(cost?.dispatches && cost.dispatches.length ? { dispatches: cost.dispatches } : {}), - skillInvocations: s.skillInvocations ?? [], - agentInvocations: s.agentInvocations ?? [], - commandInvocations: s.commandInvocations ?? [], + skillInvocations, + agentInvocations, + commandInvocations, + sessionSource: detectSessionSource({ skillInvocations, agentInvocations, commandInvocations }), }); } } diff --git a/src/cli/commands/analytics/report/types.ts b/src/cli/commands/analytics/report/types.ts index 4cc15cbd..3c237801 100644 --- a/src/cli/commands/analytics/report/types.ts +++ b/src/cli/commands/analytics/report/types.ts @@ -34,6 +34,8 @@ export interface ReportSessionRecord { skillInvocations: NamedInvocationStats[]; agentInvocations: NamedInvocationStats[]; commandInvocations: NamedInvocationStats[]; + /** Tooling/framework classified from the invocation names above — see session-source-detector.ts. */ + sessionSource: string; tokens: TokenUsage; costUSD: number; cacheReadCostUSD: number; // USD attributable to cache reads (subset of costUSD) From b16d7ed7f3b466621bca82ddfec34ddcbee6f90f Mon Sep 17 00:00:00 2001 From: vadimvlasenko Date: Wed, 1 Jul 2026 12:19:43 +0300 Subject: [PATCH 03/10] feat(analytics): render Source column in the Sessions table --- src/cli/commands/analytics/report/client/app.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/cli/commands/analytics/report/client/app.js b/src/cli/commands/analytics/report/client/app.js index 18403d91..4cf97855 100644 --- a/src/cli/commands/analytics/report/client/app.js +++ b/src/cli/commands/analytics/report/client/app.js @@ -802,21 +802,22 @@ var list = fs.slice().sort(function (a, b) { return b.startTime - a.startTime; }); if (q) { var ql = q.toLowerCase(); - list = list.filter(function (s) { return (s.sessionId + ' ' + s.agentName + ' ' + s.project + ' ' + s.branch + ' ' + (s.title || '')).toLowerCase().indexOf(ql) >= 0; }); + list = list.filter(function (s) { return (s.sessionId + ' ' + s.agentName + ' ' + s.project + ' ' + s.branch + ' ' + (s.title || '') + ' ' + (s.sessionSource || '')).toLowerCase().indexOf(ql) >= 0; }); } var shown = list.slice(0, 300); holder.innerHTML = tableHTML( - ['Date', 'Prompt', 'Agent', 'Project', 'Branch', 'Turns', 'Net lines', 'Input', 'Output', 'Cached', 'Cost'], + ['Date', 'Prompt', 'Agent', 'Project', 'Branch', 'Source', 'Turns', 'Net lines', 'Input', 'Output', 'Cached', 'Cost'], shown.map(function (s) { var branchCell = s.branch ? '' + esc(s.branch) + '' : '—'; var promptCell = '' + esc(truncStr(s.title || '—', 80)) + ''; + var sourceCell = '' + esc(s.sessionSource || 'Pure chat') + ''; return [new Date(s.startTime).toISOString().slice(0, 16).replace('T', ' '), promptCell, '' + esc(s.agentName) + '', - '' + esc(shortPath(s.project)) + '', branchCell, + '' + esc(shortPath(s.project)) + '', branchCell, sourceCell, fmtNum(s.turns), fmtNum(s.netLines), fmtTokens(tkIn(s)), fmtTokens(tkOut(s)), fmtTokens(tkCached(s)), fmtUSD(s.costUSD)]; }), - [false, false, false, false, false, true, true, true, true, true, true], + [false, false, false, false, false, false, true, true, true, true, true, true], shown.map(function (s) { return 'class="clickable" data-session="' + esc(s.sessionId) + '"'; })); if (list.length > 300) holder.appendChild(el('p', 'text-muted', 'Showing first 300 of ' + list.length + '.')); } From a1dededa56ba6abea9bd4675efd23868d5305b3c Mon Sep 17 00:00:00 2001 From: vadimvlasenko Date: Wed, 1 Jul 2026 12:29:48 +0300 Subject: [PATCH 04/10] chore: add SDLC artifacts for analytics session-source-column task --- .../plan.md | 452 ++++++++++++++++++ .../qa-report.md | 29 ++ .../technical-analysis.md | 199 ++++++++ 3 files changed, 680 insertions(+) create mode 100644 docs/superpowers/tasks/2026-07-01-analytics-session-source-column/plan.md create mode 100644 docs/superpowers/tasks/2026-07-01-analytics-session-source-column/qa-report.md create mode 100644 docs/superpowers/tasks/2026-07-01-analytics-session-source-column/technical-analysis.md diff --git a/docs/superpowers/tasks/2026-07-01-analytics-session-source-column/plan.md b/docs/superpowers/tasks/2026-07-01-analytics-session-source-column/plan.md new file mode 100644 index 00000000..e0daee59 --- /dev/null +++ b/docs/superpowers/tasks/2026-07-01-analytics-session-source-column/plan.md @@ -0,0 +1,452 @@ +# Analytics Sessions: "Source" Column Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a "Source" column to the analytics report's Sessions table (right after "Branch") that classifies each session by which SDLC tooling/framework it used — CodeMie AI Factory, Superpowers, OpenSpec, SpecKit, BMAD, or "Pure chat" when no signal is found. + +**Architecture:** A new pure, ordered Strategy-pattern module (`session-source-detector.ts`) inspects the skill/agent/command invocation names already flowing through the report pipeline (`skillInvocations`/`agentInvocations`/`commandInvocations` on each session) and returns the first matching bundle's label, first-match-wins. It is wired in once, at `payload-builder.ts` (the single place `ReportSessionRecord` objects are constructed), adding one new `sessionSource: string` field. The client (`app.js`) renders that field as a new table column. No new telemetry/capture is needed — the raw invocation-name data already reaches this layer. + +**Tech Stack:** TypeScript (ES modules), Vitest, vanilla JS client (`report/client/app.js`). + +--- + +## Clarification assumptions + +- Per user decision: SpecKit/BMAD detection uses invocation-name matching only (same mechanism as the other bundles) — no filesystem/workingDirectory marker inspection, even though those two frameworks are elsewhere implemented in this repo via `.specify/`/`_bmad/` init markers. This is a known, accepted under-detection risk for sessions where SpecKit/BMAD are used purely via filesystem workflow with no named skill/agent/command invocation captured. +- Priority order (first-match-wins) follows the user's stated precedence: CodeMie AI Factory (sdlc-factory) > Superpowers > OpenSpec > SpecKit > BMAD > Pure chat. +- CSV export (`report/exporter.ts`) is explicitly out of scope — the request is about the dashboard table only. +- `SessionAnalytics`/`aggregator.ts` are NOT touched — `ReportSessionRecord` already carries `skillInvocations`/`agentInvocations`/`commandInvocations`, so classification happens once, at payload-build time, keeping the change surface minimal. + +--- + +### Task 1: Session-source detector module (Strategy pattern) + +**Files:** +- Create: `src/cli/commands/analytics/report/session-source-detector.ts` +- Test: `src/cli/commands/analytics/report/__tests__/session-source-detector.test.ts` + +**Test-first: yes — detector returns the correct label per bundle and falls back to "Pure chat" with no signal.** + +- [ ] **Step 1: Write the failing test** + +```typescript +// src/cli/commands/analytics/report/__tests__/session-source-detector.test.ts +import { describe, it, expect } from 'vitest'; +import { detectSessionSource, SESSION_SOURCE_DETECTORS } from '../session-source-detector.js'; +import type { NamedInvocationStats } from '../../types.js'; + +function names(list: string[]): NamedInvocationStats[] { + return list.map((name) => ({ name, totalCalls: 1, successCount: 1, failureCount: 0 })); +} + +function session(over: { skill?: string[]; agent?: string[]; command?: string[] }) { + return { + skillInvocations: names(over.skill ?? []), + agentInvocations: names(over.agent ?? []), + commandInvocations: names(over.command ?? []), + }; +} + +describe('detectSessionSource', () => { + it('labels a session with a namespaced sdlc-factory agent invocation as CodeMie AI Factory', () => { + expect(detectSessionSource(session({ agent: ['sdlc-factory:tech-analyst'] }))).toBe('CodeMie AI Factory'); + }); + + it('labels a session with an unnamespaced sdlc-light/-task/-autonomous slash command as CodeMie AI Factory', () => { + expect(detectSessionSource(session({ command: ['sdlc-light'] }))).toBe('CodeMie AI Factory'); + expect(detectSessionSource(session({ command: ['sdlc-task'] }))).toBe('CodeMie AI Factory'); + expect(detectSessionSource(session({ command: ['sdlc-autonomous'] }))).toBe('CodeMie AI Factory'); + }); + + it('labels a session with a superpowers skill as Superpowers when no sdlc-factory signal is present', () => { + expect(detectSessionSource(session({ skill: ['superpowers:test-driven-development'] }))).toBe('Superpowers'); + }); + + it('prioritizes CodeMie AI Factory over Superpowers when both are present in one session', () => { + expect(detectSessionSource(session({ skill: ['superpowers:brainstorming'], command: ['sdlc-light'] }))).toBe('CodeMie AI Factory'); + }); + + it('labels a session with an openspec-named invocation as OpenSpec', () => { + expect(detectSessionSource(session({ skill: ['openspec:apply'] }))).toBe('OpenSpec'); + expect(detectSessionSource(session({ command: ['open-spec-init'] }))).toBe('OpenSpec'); + }); + + it('labels a session with a speckit-named invocation as SpecKit', () => { + expect(detectSessionSource(session({ agent: ['speckit-planner'] }))).toBe('SpecKit'); + }); + + it('labels a session with a bmad-named invocation as BMAD', () => { + expect(detectSessionSource(session({ skill: ['bmad:architect'] }))).toBe('BMAD'); + }); + + it('falls back to Pure chat when no known signal is found', () => { + expect(detectSessionSource(session({ skill: ['some-other-skill'], command: ['analytics'] }))).toBe('Pure chat'); + expect(detectSessionSource(session({}))).toBe('Pure chat'); + }); + + it('matches case-insensitively', () => { + expect(detectSessionSource(session({ agent: ['SDLC-Factory:Foo'] }))).toBe('CodeMie AI Factory'); + }); + + it('exposes the ordered detector list for callers that need custom ordering/extension', () => { + expect(SESSION_SOURCE_DETECTORS.map((d) => d.name)).toEqual(['sdlc-factory', 'superpowers', 'openspec', 'speckit', 'bmad']); + }); +}); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `npx vitest run src/cli/commands/analytics/report/__tests__/session-source-detector.test.ts` +Expected: FAIL — `Cannot find module '../session-source-detector.js'` + +- [ ] **Step 3: Write the implementation** + +```typescript +// src/cli/commands/analytics/report/session-source-detector.ts +/** + * Session-source classification: labels each session by the SDLC tooling/framework + * signal found in its skill/agent/command invocation names. Ordered, first-match-wins + * strategy list — new bundles register by adding an entry here, not by editing a + * branching if/else chain. Detection is name-based only (skill/agent/command names + * already captured by the report pipeline); it does not inspect the filesystem. + */ + +import type { NamedInvocationStats } from '../types.js'; + +export interface SessionInvocationNames { + skillInvocations: NamedInvocationStats[]; + agentInvocations: NamedInvocationStats[]; + commandInvocations: NamedInvocationStats[]; +} + +export interface SessionSourceDetector { + /** Stable id, e.g. 'sdlc-factory'. */ + name: string; + /** Display label shown in the report's Source column when this detector matches. */ + label: string; + /** True if any invocation name (already lowercased) signals this bundle. */ + matches(names: string[]): boolean; +} + +export const PURE_CHAT_LABEL = 'Pure chat'; + +function collectNames(session: SessionInvocationNames): string[] { + return [...session.skillInvocations, ...session.agentInvocations, ...session.commandInvocations].map((n) => + n.name.toLowerCase() + ); +} + +function hasPrefix(names: string[], prefix: string): boolean { + return names.some((n) => n.startsWith(prefix)); +} + +function hasExact(names: string[], candidates: string[]): boolean { + return names.some((n) => candidates.includes(n)); +} + +function hasSubstring(names: string[], substrings: string[]): boolean { + return names.some((n) => substrings.some((s) => n.includes(s))); +} + +// Unnamespaced slash commands from the external sdlc-factory skill bundle — distinct +// from the namespaced `sdlc-factory:` prefix seen on skill/agent invocations. +const SDLC_FACTORY_COMMANDS = ['sdlc-light', 'sdlc-task', 'sdlc-autonomous']; + +export const SESSION_SOURCE_DETECTORS: SessionSourceDetector[] = [ + { + name: 'sdlc-factory', + label: 'CodeMie AI Factory', + matches: (names) => hasPrefix(names, 'sdlc-factory:') || hasExact(names, SDLC_FACTORY_COMMANDS), + }, + { + name: 'superpowers', + label: 'Superpowers', + matches: (names) => hasPrefix(names, 'superpowers:'), + }, + { + name: 'openspec', + label: 'OpenSpec', + matches: (names) => hasSubstring(names, ['openspec', 'open-spec']), + }, + { + name: 'speckit', + label: 'SpecKit', + matches: (names) => hasSubstring(names, ['speckit', 'spec-kit']), + }, + { + name: 'bmad', + label: 'BMAD', + matches: (names) => hasSubstring(names, ['bmad']), + }, +]; + +/** + * Classify a session's tooling/framework source. Detectors are tried in order; + * the first match wins. Falls back to {@link PURE_CHAT_LABEL} when none match. + */ +export function detectSessionSource( + session: SessionInvocationNames, + detectors: SessionSourceDetector[] = SESSION_SOURCE_DETECTORS +): string { + const names = collectNames(session); + for (const detector of detectors) { + if (detector.matches(names)) { + return detector.label; + } + } + return PURE_CHAT_LABEL; +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `npx vitest run src/cli/commands/analytics/report/__tests__/session-source-detector.test.ts` +Expected: PASS (11 tests) + +- [ ] **Step 5: Commit** + +```bash +git add src/cli/commands/analytics/report/session-source-detector.ts src/cli/commands/analytics/report/__tests__/session-source-detector.test.ts +git commit -m "feat(analytics): add session-source detector with strategy-pattern registry" +``` + +--- + +### Task 2: Add `sessionSource` field to `ReportSessionRecord` + +**Files:** +- Modify: `src/cli/commands/analytics/report/types.ts:34-36` + +**Test-first: no — type-only change; covered by Task 3's payload-builder test (a TS compile error surfaces immediately if the field is missing/misspelled).** + +- [ ] **Step 1: Add the field** + +In `src/cli/commands/analytics/report/types.ts`, change: + +```typescript + skillInvocations: NamedInvocationStats[]; + agentInvocations: NamedInvocationStats[]; + commandInvocations: NamedInvocationStats[]; +``` + +to: + +```typescript + skillInvocations: NamedInvocationStats[]; + agentInvocations: NamedInvocationStats[]; + commandInvocations: NamedInvocationStats[]; + /** Tooling/framework classified from the invocation names above — see session-source-detector.ts. */ + sessionSource: string; +``` + +- [ ] **Step 2: Confirm the type-checker flags the now-incomplete object literal** + +Run: `npx tsc --noEmit -p tsconfig.json` +Expected: FAIL — `Property 'sessionSource' is missing in type ... ReportSessionRecord` at `payload-builder.ts` (fixed in Task 3). + +- [ ] **Step 3: Commit is deferred to Task 3** (keeping the type change and its only call site in one commit avoids a broken intermediate state). + +--- + +### Task 3: Wire the detector into `payload-builder.ts` + +**Files:** +- Modify: `src/cli/commands/analytics/report/payload-builder.ts:10, 84-86` +- Test: `src/cli/commands/analytics/report/__tests__/payload-builder.test.ts` + +**Test-first: yes — payload builder must compute and attach `sessionSource` per record.** + +- [ ] **Step 1: Write the failing test** + +Add to `src/cli/commands/analytics/report/__tests__/payload-builder.test.ts` (after the existing "passes skillInvocations..." test, around line 233): + +```typescript + it('classifies sessionSource from invocation names, defaulting to Pure chat', () => { + const withCommand = { + ...root, + projects: [{ + projectPath: '/repo/app', + branches: [{ branchName: 'main', sessions: [session({ commandInvocations: [{ name: 'sdlc-light', totalCalls: 1, successCount: 1, failureCount: 0 }] })] }], + }], + } as unknown as RootAnalytics; + const payload = buildPayload(withCommand, costIndex, summary, { + rangeLabel: 'all', projectFilter: 'all', generatedAt: '2026-06-08T00:00:00Z', + }); + expect(payload.sessions[0].sessionSource).toBe('CodeMie AI Factory'); + + const bare = buildPayload(root, costIndex, summary, { + rangeLabel: 'all', projectFilter: 'all', generatedAt: '2026-06-08T00:00:00Z', + }); + expect(bare.sessions[0].sessionSource).toBe('Pure chat'); + }); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `npx vitest run src/cli/commands/analytics/report/__tests__/payload-builder.test.ts` +Expected: FAIL — `expect(received).toBe(expected)` — `sessionSource` is `undefined`. + +- [ ] **Step 3: Wire the detector in the implementation** + +In `src/cli/commands/analytics/report/payload-builder.ts`, add the import (line 10 area): + +```typescript +import type { ReportPayload, ReportSessionRecord, ReportMeta } from './types.js'; +import { detectSessionSource } from './session-source-detector.js'; +``` + +Then change the invocation block (currently lines 84-86) plus the object literal that pushes into `sessions`. Replace: + +```typescript + agents.add(s.agentName); +``` + +through: + +```typescript + skillInvocations: s.skillInvocations ?? [], + agentInvocations: s.agentInvocations ?? [], + commandInvocations: s.commandInvocations ?? [], + }); +``` + +with: + +```typescript + agents.add(s.agentName); + const skillInvocations = s.skillInvocations ?? []; + const agentInvocations = s.agentInvocations ?? []; + const commandInvocations = s.commandInvocations ?? []; +``` + +(keep the rest of the existing block — `cov`, etc. — unchanged), and change the tail of the `sessions.push({...})` object literal from: + +```typescript + skillInvocations: s.skillInvocations ?? [], + agentInvocations: s.agentInvocations ?? [], + commandInvocations: s.commandInvocations ?? [], + }); +``` + +to: + +```typescript + skillInvocations, + agentInvocations, + commandInvocations, + sessionSource: detectSessionSource({ skillInvocations, agentInvocations, commandInvocations }), + }); +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `npx vitest run src/cli/commands/analytics/report/__tests__/payload-builder.test.ts` +Expected: PASS (all cases, including the new one) + +- [ ] **Step 5: Type-check the whole change** + +Run: `npx tsc --noEmit -p tsconfig.json` +Expected: PASS (no errors — Task 2's field is now populated) + +- [ ] **Step 6: Commit** + +```bash +git add src/cli/commands/analytics/report/types.ts src/cli/commands/analytics/report/payload-builder.ts src/cli/commands/analytics/report/__tests__/payload-builder.test.ts +git commit -m "feat(analytics): compute sessionSource in the report payload builder" +``` + +--- + +### Task 4: Render the "Source" column in the Sessions table (client) + +**Files:** +- Modify: `src/cli/commands/analytics/report/client/app.js:805, 809, 811-819` + +**Test-first: no — this is a vanilla-JS client view with no existing unit-test harness for rendered markup (confirmed in research: `report-generator.test.ts` only exercises HTML-shell injection/escaping, not `VIEWS.sessions` output). Verify manually per Step 3 below.** + +- [ ] **Step 1: Update the three coordinated arrays plus the search filter in `VIEWS.sessions`** + +In `src/cli/commands/analytics/report/client/app.js`, inside `VIEWS.sessions`'s `draw` function, change: + +```javascript + function draw(q) { + var list = fs.slice().sort(function (a, b) { return b.startTime - a.startTime; }); + if (q) { + var ql = q.toLowerCase(); + list = list.filter(function (s) { return (s.sessionId + ' ' + s.agentName + ' ' + s.project + ' ' + s.branch + ' ' + (s.title || '')).toLowerCase().indexOf(ql) >= 0; }); + } + var shown = list.slice(0, 300); + holder.innerHTML = tableHTML( + ['Date', 'Prompt', 'Agent', 'Project', 'Branch', 'Turns', 'Net lines', 'Input', 'Output', 'Cached', 'Cost'], + shown.map(function (s) { + var branchCell = s.branch ? '' + esc(s.branch) + '' : '—'; + var promptCell = '' + esc(truncStr(s.title || '—', 80)) + ''; + return [new Date(s.startTime).toISOString().slice(0, 16).replace('T', ' '), + promptCell, + '' + esc(s.agentName) + '', + '' + esc(shortPath(s.project)) + '', branchCell, + fmtNum(s.turns), fmtNum(s.netLines), fmtTokens(tkIn(s)), fmtTokens(tkOut(s)), fmtTokens(tkCached(s)), fmtUSD(s.costUSD)]; + }), + [false, false, false, false, false, true, true, true, true, true, true], + shown.map(function (s) { return 'class="clickable" data-session="' + esc(s.sessionId) + '"'; })); + if (list.length > 300) holder.appendChild(el('p', 'text-muted', 'Showing first 300 of ' + list.length + '.')); + } +``` + +to: + +```javascript + function draw(q) { + var list = fs.slice().sort(function (a, b) { return b.startTime - a.startTime; }); + if (q) { + var ql = q.toLowerCase(); + list = list.filter(function (s) { return (s.sessionId + ' ' + s.agentName + ' ' + s.project + ' ' + s.branch + ' ' + (s.title || '') + ' ' + (s.sessionSource || '')).toLowerCase().indexOf(ql) >= 0; }); + } + var shown = list.slice(0, 300); + holder.innerHTML = tableHTML( + ['Date', 'Prompt', 'Agent', 'Project', 'Branch', 'Source', 'Turns', 'Net lines', 'Input', 'Output', 'Cached', 'Cost'], + shown.map(function (s) { + var branchCell = s.branch ? '' + esc(s.branch) + '' : '—'; + var promptCell = '' + esc(truncStr(s.title || '—', 80)) + ''; + var sourceCell = '' + esc(s.sessionSource || 'Pure chat') + ''; + return [new Date(s.startTime).toISOString().slice(0, 16).replace('T', ' '), + promptCell, + '' + esc(s.agentName) + '', + '' + esc(shortPath(s.project)) + '', branchCell, sourceCell, + fmtNum(s.turns), fmtNum(s.netLines), fmtTokens(tkIn(s)), fmtTokens(tkOut(s)), fmtTokens(tkCached(s)), fmtUSD(s.costUSD)]; + }), + [false, false, false, false, false, false, true, true, true, true, true, true], + shown.map(function (s) { return 'class="clickable" data-session="' + esc(s.sessionId) + '"'; })); + if (list.length > 300) holder.appendChild(el('p', 'text-muted', 'Showing first 300 of ' + list.length + '.')); + } +``` + +Note the three coordinated edits: header array gained `'Source'` at index 5; row-cell array gained `sourceCell` at index 5; alignment-mask array gained one more `false` at index 5 (Turns/Net lines/etc. shift right by one, their `true` entries unchanged in relative order). + +- [ ] **Step 2: Run the existing report-generator test to confirm the HTML shell still builds correctly** + +Run: `npx vitest run src/cli/commands/analytics/report/__tests__/report-generator.test.ts` +Expected: PASS (this test does not snapshot `VIEWS.sessions` output, so it validates the app.js bundle still embeds/parses without syntax errors) + +- [ ] **Step 3: Manual verification** + +Run: `codemie analytics --report --open` (or `node bin/codemie.js analytics --report --open` from repo root) against a local `~/.codemie/sessions/` directory that has at least one session with a captured `sdlc-light`/`sdlc-task`/`sdlc-autonomous` command and one plain-chat session. Confirm in the browser: +- The Sessions tab shows a "Source" column immediately after "Branch". +- A session that used `sdlc-light` shows "CodeMie AI Factory". +- A session with no matching signal shows "Pure chat". +- Typing "pure chat" or "factory" in the search box filters accordingly. + +- [ ] **Step 4: Commit** + +```bash +git add src/cli/commands/analytics/report/client/app.js +git commit -m "feat(analytics): render Source column in the Sessions table" +``` + +--- + +## Self-review notes + +- **Spec coverage:** (1) CodeMie AI Factory detection via `sdlc-factory:` prefix + `sdlc-light`/`sdlc-task`/`sdlc-autonomous` exact-match — Task 1. (2) Superpowers/OpenSpec/SpecKit/BMAD as additional ordered detectors — Task 1. (3) "Pure chat" fallback — Task 1. (4) Extensible via Strategy pattern (ordered array of `{name, label, matches}`, not if/else) — Task 1. (5) New column after Branch — Task 4. +- **Placeholder scan:** none — every step has complete, runnable code. +- **Type consistency:** `SessionInvocationNames` (Task 1) matches the shape passed from `payload-builder.ts` (Task 3): `{ skillInvocations, agentInvocations, commandInvocations }`, all `NamedInvocationStats[]`. `sessionSource: string` (Task 2) is the exact property name read by `app.js` (Task 4) and asserted in the Task 3 test. diff --git a/docs/superpowers/tasks/2026-07-01-analytics-session-source-column/qa-report.md b/docs/superpowers/tasks/2026-07-01-analytics-session-source-column/qa-report.md new file mode 100644 index 00000000..da1b9e2f --- /dev/null +++ b/docs/superpowers/tasks/2026-07-01-analytics-session-source-column/qa-report.md @@ -0,0 +1,29 @@ +# QA Gate Report — analytics-session-source-column + +**Branch**: feat/analytics-session-source-column +**Runner**: npm +**Started**: 2026-07-01T12:20:00Z +**Status**: PASSED + +## Gates + +| Gate | Status | Command | Notes | +|---|---|---|---| +| license-check | PASS | `npm run license-check` | no missing/stale headers | +| lint | PASS (scoped) | `npm run lint` | 150 pre-existing errors in unrelated `.mjs`/`.cjs` scripts (build-report.mjs, statusline.mjs, compare-codex-conversations.mjs, etc.) — confirmed 0 lint messages on all 5 files this task touched | +| typecheck | PASS | `npm run typecheck` | no diagnostics | +| build | PASS | `npm run build` | dist/ rebuilt, plugin assets copied | +| unit | PASS | `npm run test:unit` | 145 files, 2185 passed / 1 skipped | +| integration | PASS | `npm run test:integration` | 27 files, 220 passed / 1 skipped | + +## Failure detail + +None — all gates in scope passed. + +## Pre-existing lint debt (out of scope) + +The repo-wide `npm run lint` run reports 150 errors, entirely in `.mjs`/`.cjs` utility scripts untouched by this task (last modified in an unrelated prior commit). None of the 5 files changed by this feature (`session-source-detector.ts`, its test, `payload-builder.ts`, `payload-builder.test.ts`, `types.ts`, `client/app.js`) produced any lint messages. + +## Drift signal + +no diff --git a/docs/superpowers/tasks/2026-07-01-analytics-session-source-column/technical-analysis.md b/docs/superpowers/tasks/2026-07-01-analytics-session-source-column/technical-analysis.md new file mode 100644 index 00000000..53b19e4a --- /dev/null +++ b/docs/superpowers/tasks/2026-07-01-analytics-session-source-column/technical-analysis.md @@ -0,0 +1,199 @@ +# Technical Research + +**Task**: analytics session report sdlc-factory detection +**Generated**: 2026-07-01 +**Research path**: filesystem (codegraph MCP tool not available — filesystem Explore fallback used) + +--- + +## 1. Original Context + +On the analytics dashboard produced by the `codemie analytics --report ...` command, there is a Sessions tab/table. Add a new column (positioned right after the 'Branch' column) that signals what tooling/framework was used during that session. Specifically: +1. Detect CodeMie SDLC Factory usage: if any of the skills `sdlc-light`, `sdlc-task`, `sdlc-autonomous` (or other sdlc-factory skills) were invoked in the session, label it as 'CodeMie AI Factory' (or similar). +2. If SDLC Factory was NOT used, but skills/commands from other known frameworks/bundles were used, detect those too: 'superpowers' skill bundle, 'open-spec'/'openspec', 'speckit', 'bmad'. Each should map to its own label. +3. If none of the above signals are found in the session, label it 'Pure chat'. +4. The detection mechanism must be extensible/pluggable — implement it using a design pattern like Strategy or Command/Chain-of-Responsibility, so new frameworks/bundles can be added as new detectors without modifying a big if/else chain. + +I need you to research: +- Where the analytics report generation code lives (likely something like an OTEL analytics source / session-detail report, given a recent commit 95fb54a 'feat(analytics): add OTEL analytics source with session-detail report'). Find the exact files. +- How the Sessions tab/table is rendered (HTML/React/template) — find where columns like 'Branch' are defined, so I know where to insert the new column. +- What data is available per session that could reveal which skills/commands were invoked (e.g., transcript logs, tool_use events, skill invocation events, slash commands) — find how skill/command invocations are currently captured or parsed in analytics data, if at all. +- Any existing mapping/registry patterns in this codebase (e.g. plugin registry, provider registry) that already follow Strategy/Command patterns I should mirror for consistency with `.ai-run/guides/architecture/architecture.md`. +- Relevant guide content from `.ai-run/guides/architecture/architecture.md` and `.ai-run/guides/development/development-practices.md` and `.ai-run/guides/standards/code-quality.md` that constrains how this should be implemented. + +--- + +## 2. Codebase Findings + +### Existing Implementations + +**Analytics pipeline (data flow: source → aggregator → payload → HTML/client):** + +- `src/cli/commands/analytics/index.ts` — CLI entry point; `createAnalyticsCommand()` / `runAnalytics()`; defines both `analytics` (default `SessionsSource`) and `analytics otel` (`OtelSource`) subcommands (lines 16–40). +- `src/cli/commands/analytics/sources/types.ts` — `AnalyticsSource` interface (the existing pluggable-source seam, effectively a Strategy pattern already in production). +- `src/cli/commands/analytics/sources/sessions-source.ts` — `SessionsSource` implementation (local tracked sessions + native agent logs). +- `src/cli/commands/analytics/sources/otel-source.ts` — `OtelSource` implementation (flattened `otel-events.jsonl`). +- `src/cli/commands/analytics/aggregator.ts` — `AnalyticsAggregator.aggregate()` builds `SessionAnalytics` from `MetricDelta[]`; aggregates named invocations at lines 433–435 via `aggregateNamedInvocations()`. +- `src/cli/commands/analytics/otel-loader.ts` — parses OTEL events; `skill_activated` events carry `skill.name` (line ~223), `subagent_completed` carries `agent_type` (line ~249); mapped into `DispatchEvent` timeline entries. Also documents at line 3: native Claude Code emits no cwd/git branch, so Project/Branch are only available when the codemie-claude-otel plugin was active (native-only sessions fall back to "Unknown"). +- `src/cli/commands/analytics/native-loader.ts` — applies the same named-invocation extraction to native (untracked) sessions. +- `src/cli/commands/analytics/report/payload-builder.ts` — dedupes sessions, builds flat `ReportSessionRecord[]` for the client; carries `skillInvocations`/`agentInvocations`/`commandInvocations` at lines 84–86. +- `src/cli/commands/analytics/report/types.ts` — `ReportSessionRecord` (lines 10–43) with `skillInvocations`, `agentInvocations`, `commandInvocations` (`NamedInvocationStats[]`), `dispatches?: DispatchEvent[]`. No `tooling`/`framework`/`sessionSource` field exists today. +- `src/cli/commands/analytics/types.ts` — `SessionAnalytics` (lines 79–133), the per-session aggregate; also has no framework/source field yet. +- `src/cli/commands/analytics/cost/cost-enricher.ts` — demonstrates the codebase's DI-for-testability convention (`EnricherDeps` interface, line 29–34); relevant precedent for how a new detector module should be structured (pure function + injected deps, unit-tested). +- `src/agents/plugins/claude/session/claude-named-invocations.ts` — single source of truth for extracting skill/agent/command names from Claude message logs via `extractNamedInvocations()` (lines 61–89): + - Skills: `tool_use` block with `name === 'Skill'` → `input.skill` (e.g. `"codemie:msgraph"`, `"superpowers:test-driven-development"`). + - Agents: `tool_use` block with `name === 'Agent'` (this CLI) or `name === 'Task'` (standard Claude Code) → `input.subagent_type` (e.g. `"sdlc-factory:tech-analyst"`, `"superpowers:brainstorming"`, `"Explore"`). + - Slash commands: user message text containing `...` with a sibling `` (ensures a real invocation, not documentation) → e.g. `"sdlc-task"`, `"sdlc-light"`, `"tech-lead"`. +- `src/agents/plugins/claude/session/processors/claude.metrics-processor.ts` — calls `extractNamedInvocations()` live during session recording. + +**Framework/bundle name inventory found in this repo:** + +| Bundle | Where defined in this repo | Invocation shape observed | +|---|---|---| +| SDLC Factory | Skills live externally (`epm-cdme/codemie-public-skills`, path `ai-packages/sdlc-factory/skills/{name}/SKILL.md`); referenced in `skills-lock.json` (`complexity-scoring`, `memory`, `product-owner`, `requirements-intake`) | As skill: unnamespaced name (e.g. `memory`) or namespaced `sdlc-factory:memory` in `agentInvocations`. As slash command: `sdlc-task`, `sdlc-light`, `sdlc-autonomous` (unnamespaced, no `sdlc-factory:` prefix on commands) | +| superpowers | Cached locally under `.codex/plugins/cache/openai-curated/superpowers/{version}/skills/` (13 skills: `brainstorming`, `writing-plans`, `test-driven-development`, etc.) | Namespaced `superpowers:` in skill/agent invocations | +| speckit | `src/frameworks/plugins/speckit.plugin.ts` line 21 — framework name `'speckit'`, display "SpecKit", supported agents claude/gemini, init marker directory `.specify/` | Filesystem marker `.specify/`, or possible `speckit` slash-command/agent name | +| bmad | `src/frameworks/plugins/bmad.plugin.ts` line 36 — framework name `'bmad'`, display "BMAD Method", init marker `_bmad/` (also checks `.bmad/`) | Filesystem marker `_bmad/`/`.bmad/`, or possible `bmad` agent name | +| openspec / open-spec | **Not found anywhere in this repo** — no plugin, no skill, no config reference | Unknown — must be pattern-matched heuristically (e.g. skill/command name containing `openspec` or `open-spec`) since there is no existing integration to mirror | + +Note: `sdlc-light`, `sdlc-task`, `sdlc-autonomous` themselves are not found as registered skills/plugins in this repo (they are slash commands from the external `sdlc-factory` skill bundle, consistent with how they appear in `commandInvocations`). + +### Architecture and Layers Affected + +Per `.ai-run/guides/architecture/architecture.md` (plugin-based 5-layer architecture: CLI → Registry → Plugin → Core → Utils), this feature touches: + +- **Core/Report-pipeline layer** — new pure detection logic, analogous to `cost-enricher.ts` (DI + unit tested). Recommended new module e.g. `src/cli/commands/analytics/report/framework-detector.ts` (or `src/analytics/...` if a dedicated sub-tree is preferred) exposing `detectSessionSource(session): SessionSourceLabel`. +- **Registry layer** — a small registry of detectors (`SessionLabelRegistry`/detector array), mirroring existing registries (`AgentRegistry`, `ProviderRegistry`, `FrameworkRegistry`, `MigrationRegistry`). +- **Payload-builder / aggregator layer** — `SessionAnalytics` and `ReportSessionRecord` need a new field (e.g. `sessionSource: string`) populated during payload building, since all the raw invocation data (`skillInvocations`, `agentInvocations`, `commandInvocations`) is already flowing through this pipeline — no new telemetry/capture is required. +- **Client rendering layer** — `src/cli/commands/analytics/report/client/app.js`, function `VIEWS.sessions` (lines ~786–825), which builds the Sessions table. + +### Integration Points + +- **Exact 'Branch' column location**: `src/cli/commands/analytics/report/client/app.js`, `VIEWS.sessions`. + - Header array (line ~809): `['Date', 'Prompt', 'Agent', 'Project', 'Branch', 'Turns', 'Net lines', 'Input', 'Output', 'Cached', 'Cost']` — Branch is index 4. + - Row-cell array (lines ~813–817): `branchCell` is emitted at the same index 4, immediately before `fmtNum(s.turns)`. + - Right-align mask array (line ~819): `[false, false, false, false, false, true, true, true, true, true, true]` — must also get a new entry inserted at index 5. + - New column must be inserted **at index 5** (right after Branch), shifting Turns/Net lines/etc. one position each. All three arrays (header, row cells, alignment mask) must be updated together or columns misalign — the alignment mask is flagged as the most commonly missed edit. + - Optional: the search filter string (line ~805) currently concatenates sessionId/agentName/project/branch/title; decide whether the new label should be searchable too. +- **Data already available to the client**: `s.skillInvocations`, `s.agentInvocations`, `s.commandInvocations` are already present on each session record reaching `app.js`; a `dispatchCounts(session, 'skill'|'agent'|'command')` helper (lines ~861–869) already normalizes access across both dispatch-events and invocation-array representations — useful if detection is implemented client-side instead of server-side. +- **CSV exporter** (`src/cli/commands/analytics/report/exporter.ts`, lines 39/59) has its own Branch column and would need a parallel update if CSV export should also carry the new label (out of scope unless requested). +- **Multiple frameworks per session**: a session can invoke both an SDLC Factory command and a superpowers skill. The task specifies a single label, so a priority order is needed (task explicitly places SDLC Factory detection first, i.e., "if SDLC Factory was NOT used, then check others" — this maps directly to first-match-wins Chain-of-Responsibility ordering: SDLC Factory > superpowers > openspec > speckit > bmad > Pure chat). + +### Patterns and Conventions + +**Registry patterns already in the codebase (mirror one of these for consistency):** + +| Registry | File | Pattern | +|---|---|---| +| AgentRegistry | `src/agents/registry.ts` | Lazy-init Map-based registry, static methods `getAgent()`/`getAllAgents()`/`getAgentNames()` | +| ProviderRegistry | `src/providers/core/registry.ts` | Multi-map registry; polymorphic `.supports(provider)` predicate on each registered component to find a match | +| FrameworkRegistry | `src/frameworks/core/registry.ts` | Map-based with availability flag; `registerFramework(adapter)`, filtered by `.metadata.supportedAgents` | +| MigrationRegistry | `src/migrations/registry.ts` | Simplest: `static migrations: Migration[]`, `register()`/`getAll()`/`get(id)` | +| WorkflowRegistry | `src/workflows/registry.ts` | Functional (no class), static template arrays, pure query functions | + +**No dedicated Strategy/Chain-of-Responsibility class exists yet**, but `ProviderRegistry`'s `.supports()` predicate pattern and the `AnalyticsSource` interface (`sources/types.ts`) are the closest existing precedents for a pluggable, first-match-wins seam. `src/workflows/detector.ts` (VCS provider detection) is a procedural if/else chain — explicitly the anti-pattern this task wants to avoid replicating. + +**Recommended shape** (consistent with the above, and with `NamedInvocationStats`/`ReportSessionRecord` shapes already in the codebase): + +```typescript +export interface SessionLabelDetector { + name: string; // e.g. 'sdlc-factory', 'superpowers', 'openspec', 'speckit', 'bmad' + label: string; // display label, e.g. 'CodeMie AI Factory' + detect(session: ReportSessionRecord | SessionAnalytics): boolean; +} +``` +A registry holds an ordered list of detectors (SDLC Factory first per requirement #2), iterates in order, returns the first match's `label`, and falls back to `'Pure chat'` if none match — a first-match-wins Chain-of-Responsibility, consistent with existing `.supports()`-style predicate matching in `ProviderRegistry`. + +--- + +## 3. Documentation Findings + +### Guides and Architecture Docs + +- `.ai-run/guides/architecture/architecture.md` — documents the plugin-based 5-layer architecture (CLI → Registry → Plugin → Core → Utils); used above to place the new detector module. +- `.ai-run/guides/development/development-practices.md` — error handling (specific error classes, `createErrorContext`, `logger.error` + `formatErrorForUser`), logging (`logger.debug`/`info`/`warn`/`error`, always `sanitizeLogArgs()`), async/await only, `exec()` from `src/utils/processes.ts` for process spawning. +- `.ai-run/guides/standards/code-quality.md` — naming (`camelCase` vars/functions, `PascalCase` classes/interfaces, `UPPER_SNAKE_CASE` constants, `kebab-case.ts` files, `*.test.ts` tests), explicit return types on exported functions, `interface` over `type` for object shapes, functions <50 lines, files <500 lines, single responsibility, no `console.log` (use `logger.debug()`). +- `.ai-run/guides/integration/external-integrations.md` — documents the analytics session flow (`onSessionEnd` hook, `SessionSyncer` reading JSONL deltas, POST to `v1/metrics`); background context for how session data reaches the analytics pipeline in the first place. +- `docs/ANALYTICS-REPORT.md` — authoritative spec for the analytics dashboard; documents all eight views (Overview, Agents, Projects, Tools & Models, Activity, Efficiency, Cost, Sessions) and the session-detail modal, which already surfaces "Skills / Agent subtypes / Slash commands" chip lists (i.e., the raw data this feature would classify is already user-visible elsewhere in the UI). +- `docs/superpowers/plans/2026-06-19-analytics-source-seam.md` — the design doc that introduced the `AnalyticsSource` seam (`SessionsSource`/`OtelSource`), explicitly intended so that future backends "become drop-in `AnalyticsSource` implementations without changing the CLI." This is the closest architectural precedent/ADR for building another pluggable seam (the session-label detector). +- `docs/superpowers/tasks/2026-06-23-codex-analytics-report/spec.md` — Codex analytics parity spec; shows how different agent types feed the same `ReportSessionRecord` shape. + +### Architectural Decisions + +- The `AnalyticsSource` seam (per the plan doc above) is the closest recorded ADR for "pluggable analytics component" — the new detector registry should follow the same spirit (interface + registry + ordered/first-match strategy) rather than ad hoc branching. +- `otel-loader.ts` inline note: Project/Branch are only populated when the codemie-claude-otel plugin was active; native-only sessions fall back to "Unknown" — same caveat likely applies to invocation-array completeness for native/untracked sessions, which is a relevant risk for the new detector's coverage. + +### Derived Conventions + +- New per-session pure-function modules in this pipeline (e.g. `cost-enricher.ts`) take an explicit `Deps` object for injected dependencies and are unit tested in a co-located `__tests__/` directory — the detector module should follow the same shape (`detectSessionSource(session, deps?)`, unit-tested independently of the full report pipeline). + +--- + +## 4. Testing Landscape + +### Existing Coverage + +- `tests/integration/analytics.test.ts` — E2E pipeline validation against a golden dataset (`tests/integration/metrics/fixtures/claude/expected-session.json`, `expected-metrics.jsonl`). +- `src/cli/commands/analytics/__tests__/aggregator.test.ts` — 14 cases covering branch attribution, change metrics, title parsing, and **named-invocation aggregation** (skill/agent/command arrays) — this is the layer a new "session source" field would be added to. +- `src/cli/commands/analytics/__tests__/native-loader.test.ts` — native session discovery/synthesis/dedup. +- `src/cli/commands/analytics/__tests__/otel-loader.test.ts` — OTEL event parsing. +- `src/cli/commands/analytics/__tests__/otel-report.integration.test.ts` — OTEL source → report pipeline end-to-end. +- `src/cli/commands/analytics/report/__tests__/report-generator.test.ts` — 8 cases for HTML injection/escaping/Chart.js inlining. +- `src/cli/commands/analytics/report/__tests__/payload-builder.test.ts` — 11 cases covering session dedup, multi-branch attribution, cost mapping, and skill/agent/command invocation pass-through (lines 215–233, 248–255) — this is exactly where a new `sessionSource` field would need a pass-through test. +- `src/agents/plugins/claude/session/__tests__/claude-named-invocations.test.ts` — confirms `sdlc-factory:tech-analyst`, `superpowers:code-reviewer` (agent invocations) and `tech-lead`, `analytics` (slash commands) are already extracted correctly today (lines 39, 40, 62–66). + +### Testing Framework and Patterns + +- Vitest throughout; unit tests co-located in `__tests__/`, integration tests under `tests/integration/`. +- Dynamic-import mocking convention (per `.ai-run/guides/testing/testing-patterns.md`): spies must be set up in `beforeEach` and the target module imported dynamically inside the test body (`await import(...)`) — static top-level imports bypass spies. +- `setupTestIsolation()` / `getTestHome()` (`tests/helpers/test-isolation.ts`) give each test a unique `CODEMIE_HOME` for parallel-safe execution. + +### Coverage Gaps + +- No detector/classification module exists yet, so there is (necessarily) no test coverage for "session source" detection — this is net-new test surface, not a regression risk. +- `SessionAnalytics` (types.ts, lines 79–133) and `ReportSessionRecord` (report/types.ts, lines 10–43) currently have no `sessionSource`/`framework` field — adding one requires new unit tests in `aggregator.test.ts` and `payload-builder.test.ts` plus a new dedicated test file for the detector module itself (e.g. `src/cli/commands/analytics/__tests__/session-source-detector.test.ts`). +- `app.js` (the client renderer) has no existing unit test harness in the threads' findings — if detection or column rendering logic is added there, verify whether `report-generator.test.ts` snapshot-tests rendered HTML/JS output that would need updating. + +--- + +## 5. Configuration and Environment + +### Environment Variables + +- General CodeMie config env vars (`src/utils/config.ts`): `CODEMIE_PROVIDER`, `CODEMIE_API_KEY`, `CODEMIE_BASE_URL`, `CODEMIE_MODEL`, `CODEMIE_TIMEOUT`, `CODEMIE_DEBUG`, `CODEMIE_URL`, `CODEMIE_AUTH_METHOD`, `CODEMIE_INTEGRATION_ID`, `CODEMIE_INTEGRATION_ALIAS`. +- No dedicated `OTEL_*` or `ANALYTICS_*` env vars were found; OTEL data is loaded via an explicit `--file` CLI flag (`loadOtelSessions({ file })`), not environment configuration. + +### Configuration Files + +- `~/.codemie/codemie-cli.config.json` (global) / `.codemie/codemie-cli.config.json` (project) — provider/profile config, not analytics-specific. +- `~/.codemie/sessions/` — tracked CodeMie session storage (JSON + JSONL metrics), the primary input to `SessionsSource`. +- `skills-lock.json` — registered skill manifest, source of the SDLC Factory skill name inventory used above. + +### Feature Flags and Deployment Concerns + +- No feature flags gate this area currently. The `analytics` command has a `--no-scan-native` flag (skip native agent-log discovery) and the `analytics otel` subcommand has `--file`/`--user` flags — none are directly relevant to the new column but confirm the CLI surface that would carry the feature end-to-end. +- Framework markers for `speckit` (`.specify/`) and `bmad` (`_bmad/`/`.bmad/`) are filesystem directories created by `src/frameworks/plugins/{speckit,bmad}.plugin.ts` at framework-init time — these are workspace-level markers, not session-transcript signals, and are a materially different detection surface than skill/agent/command invocation names. This is a design decision the implementer needs to resolve explicitly (see risks). + +--- + +## 6. Risk Indicators + +- No codegraph MCP available for this repo — filesystem Explore fallback was used for all five research dimensions; if codegraph is expected to be available in this environment, treat this as an environment setup gap rather than a repo gap. +- **openspec/open-spec has zero footprint in this repo** — no plugin, no skill manifest entry, no config reference was found anywhere. The detector for this bundle cannot mirror an existing pattern and must be built from an assumed/heuristic naming convention (e.g. matching `openspec`/`open-spec` substrings in skill or command names) — this is the least-grounded of the five detectors and carries the highest risk of silent false negatives. +- **Detection-surface inconsistency across bundles**: SDLC Factory/superpowers signals live in transcript-derived data (`skillInvocations`/`agentInvocations`/`commandInvocations`), while speckit/bmad signals (as currently implemented elsewhere in this repo) are *filesystem markers* (`.specify/`, `_bmad/`) created at project-init time, not per-session events. A detector keyed only on invocation names will likely never fire for speckit/bmad unless those frameworks also emit skill/agent/command names into the session transcript. This needs an explicit product decision: should the detector also inspect `session.workingDirectory` for marker directories, or is invocation-name matching (e.g. an agent/skill literally named `speckit`/`bmad`) considered sufficient? Not resolved by this research. +- `sdlc-light`, `sdlc-task`, `sdlc-autonomous` appear only as **slash commands** (`commandInvocations`), not as skill or agent invocations, and are **not namespaced** with an `sdlc-factory:` prefix — a detector that only checks for a `sdlc-factory:` prefix on skills/agents would miss sessions that only used these top-level slash commands. The detector must check all three invocation arrays (`skillInvocations`, `agentInvocations`, `commandInvocations`) and use both prefix-match (`sdlc-factory:`) and exact/allow-list match (`sdlc-light`, `sdlc-task`, `sdlc-autonomous`, and any other declared "sdlc-factory skills") to avoid false negatives. +- No existing multi-label session data model: `ReportSessionRecord`/`SessionAnalytics` currently store zero framework/source classification. Adding a single-string field forces a priority decision when multiple bundles appear in one session (recommend first-match-wins ordering seeded from the ticket's own precedence: SDLC Factory checked first, then the other bundles, then `Pure chat`). +- Coverage-limited scenario for native/untracked sessions: `otel-loader.ts` documents that native Claude Code sessions have no cwd/git branch by default, so branch data is "Unknown" for native-only sessions — invocation-array completeness for native sessions was not independently verified and may have similar limitations, which could make the new column silently degrade for that session type. Needs a follow-up check of `native-loader.ts` extraction coverage before implementation is considered feature-complete. +- Three-place UI edit risk in `app.js` (header array, row-cell array, alignment-mask array) is explicitly documented as easy to get wrong — the alignment-mask array is called out as the most commonly missed edit when inserting a column. +- No dedicated Strategy/Chain-of-Responsibility base class exists in the codebase to extend; the closest patterns (`ProviderRegistry.supports()`, `AnalyticsSource` interface, `AgentRegistry`) are all slightly different shapes, so the new detector registry will be a new (small) pattern rather than a drop-in reuse of an existing one — moderate design risk, low implementation risk given the small surface area. +- No test coverage exists yet for a "session source" concept anywhere in the codebase (expected, since the feature doesn't exist) — full new unit-test surface required per project convention (`aggregator.test.ts`, `payload-builder.test.ts`, and a new detector test file), consistent with the "Tests Only On Explicit Request" policy in AGENTS.md if the user separately requests test coverage. + +--- + +## 7. Summary for Complexity Assessment + +This task touches four architectural layers in the existing analytics pipeline: (1) a new small **Core** module for pluggable detection (new file, e.g. `report/framework-detector.ts` or a new `src/analytics/` sub-tree with `core/types.ts` + `registry.ts` + `plugins/*.detector.ts`, mirroring existing registries like `ProviderRegistry`/`FrameworkRegistry`), (2) the **aggregation/payload-builder layer** (`aggregator.ts`, `payload-builder.ts`, `types.ts`, `report/types.ts`) to add and populate a new `sessionSource`/`framework` field on `SessionAnalytics` and `ReportSessionRecord`, (3) the **client rendering layer** (`report/client/app.js`, `VIEWS.sessions`) requiring three coordinated array edits (header, row cells, alignment mask) to insert the column after Branch, and optionally (4) `report/exporter.ts` if CSV export should carry the same label. Estimated file-change surface is roughly 6-10 files: 1-4 new detector files, 2-3 modified pipeline files, 1 modified client file, plus corresponding test files. This sits at the boundary of Medium/High per the project's own complexity guidance (2-5 files standard vs 6+ architecture-sensitive), leaning High mainly because of the new pluggable-pattern design work rather than raw line count. + +Technical novelty is moderate: all the raw data needed (skill/agent/command invocation names) is **already captured and flowing through the pipeline today** — no new telemetry or transcript parsing is required, which meaningfully de-risks the task. The novel part is (a) introducing a first Strategy/Chain-of-Responsibility-style registry into this pipeline (no exact precedent exists, though `ProviderRegistry`'s `.supports()` predicate and the `AnalyticsSource` seam are close analogues to mirror), and (b) resolving the ambiguity around speckit/bmad detection, which in this repo are currently only implemented as *filesystem init markers*, not transcript signals — meaning invocation-name-only detection may under-detect those two bundles unless the product accepts that limitation or the detector also inspects `session.workingDirectory`. + +Test coverage posture for the surrounding pipeline is strong (comprehensive Vitest suites for aggregator, payload-builder, native-loader, otel-loader, report-generator, and named-invocation extraction, all with dozens of passing cases), so the new detector module can be added with high confidence and testability, following the existing DI-and-pure-function convention seen in `cost-enricher.ts`. Key risk factors that should weight the complexity score up: (1) openspec/open-spec has zero existing footprint in the repo and requires a heuristic, unverified detection approach; (2) the sdlc-factory command names (`sdlc-light`/`sdlc-task`/`sdlc-autonomous`) are unnamespaced slash commands distinct from the namespaced `sdlc-factory:` skill/agent prefix, so the detector needs both a prefix-match and an explicit allow-list to avoid false negatives; (3) the three-place client-side array edit in `app.js` is a known footgun for column misalignment; (4) a single-label-per-session model requires an explicit priority/precedence decision when a session touches multiple bundles. From 099eaa9f913c351a1348bd5a1cd8e0f2c07408ba Mon Sep 17 00:00:00 2001 From: vadimvlasenko Date: Wed, 1 Jul 2026 13:08:23 +0300 Subject: [PATCH 05/10] fix(analytics): attribute cost to skill dispatches in native sessions Skill dispatches ran inline in the parent transcript and were never priced (enrichDispatchCosts only enriched kind === 'agent'). They now get costUSD/tokens by re-attributing the session's own already-priced usage records that fall inside the skill's time window. --- .../cost/__tests__/cost-enricher.test.ts | 80 +++++++++++++++++++ .../commands/analytics/cost/cost-enricher.ts | 57 +++++++++++-- 2 files changed, 129 insertions(+), 8 deletions(-) diff --git a/src/cli/commands/analytics/cost/__tests__/cost-enricher.test.ts b/src/cli/commands/analytics/cost/__tests__/cost-enricher.test.ts index aeca6924..be36bf96 100644 --- a/src/cli/commands/analytics/cost/__tests__/cost-enricher.test.ts +++ b/src/cli/commands/analytics/cost/__tests__/cost-enricher.test.ts @@ -413,6 +413,86 @@ describe('enrichCosts — dispatch cost attribution', () => { expect(dispatch!.tokens).toBeUndefined(); expect(dispatch!.tools).toBeUndefined(); }); + + it('attributes cost to a skill dispatch from the session\'s own usage records in its time window', async () => { + const deps: EnricherDeps = { + resolveAgentName: () => 'claude', + loadAgentSessionFile: async () => '/fake/parent.jsonl', + parseNative: async () => + ({ + sessionId: 'parent-3', + agentName: 'claude', + metadata: {}, + messages: [ + { + timestamp: '2026-06-08T10:00:00Z', + message: { + role: 'assistant', + content: [{ type: 'tool_use', id: 'toolu_skill_1', name: 'Skill', input: { skill: 'code-review' } }], + }, + }, + // Inside the skill's window [10:00:00, 10:00:30] — must be attributed. + { + timestamp: '2026-06-08T10:00:10Z', + requestId: 'req-skill-1', + message: { id: 'msg-skill-1', role: 'assistant', model: 'claude-sonnet-4-5', usage: { input_tokens: 200_000, output_tokens: 0 } }, + }, + { + timestamp: '2026-06-08T10:00:30Z', + message: { + role: 'user', + content: [{ type: 'tool_result', tool_use_id: 'toolu_skill_1', content: 'done' }], + }, + }, + // Outside the skill's window (after it ends) — must NOT be attributed. + { + timestamp: '2026-06-08T10:05:00Z', + requestId: 'req-after', + message: { id: 'msg-after', role: 'assistant', model: 'claude-sonnet-4-5', usage: { input_tokens: 1_000_000, output_tokens: 0 } }, + }, + ], + }) as never, + }; + + const rawSession = [{ sessionId: 'parent-3', startEvent: { agentName: 'claude' }, deltas: [] }] as never[]; + const { index } = await enrichCosts(rawSession, deps); + const cost = index.get('parent-3')!; + + const dispatch = cost.dispatches!.find((d) => d.kind === 'skill' && d.name === 'code-review'); + expect(dispatch).toBeDefined(); + expect(dispatch!.tokens?.input).toBe(200_000); // only the in-window record, not the 1M after + expect(dispatch!.costUSD).toBeCloseTo(0.6, 6); // 200k input @ $3/1M sonnet-4-5 + }); + + it('leaves skill dispatch cost undefined when durationMs is 0 (no window to attribute from)', async () => { + const deps: EnricherDeps = { + resolveAgentName: () => 'claude', + loadAgentSessionFile: async () => '/fake/parent.jsonl', + parseNative: async () => + ({ + sessionId: 'parent-4', + agentName: 'claude', + metadata: {}, + messages: [ + { + timestamp: '2026-06-08T10:00:00Z', + message: { + role: 'assistant', + content: [{ type: 'tool_use', id: 'toolu_skill_2', name: 'Skill', input: { skill: 'orphan-skill' } }], + }, + }, + // No matching tool_result → 0-duration marker (dispatch-extractor.ts line 105). + ], + }) as never, + }; + const rawSession = [{ sessionId: 'parent-4', startEvent: { agentName: 'claude' }, deltas: [] }] as never[]; + const { index } = await enrichCosts(rawSession, deps); + const cost = index.get('parent-4')!; + const dispatch = cost.dispatches!.find((d) => d.kind === 'skill' && d.name === 'orphan-skill'); + expect(dispatch).toBeDefined(); + expect(dispatch!.costUSD).toBeUndefined(); + expect(dispatch!.tokens).toBeUndefined(); + }); }); describe('acceptance: TTL-aware pricing against real transcripts', () => { diff --git a/src/cli/commands/analytics/cost/cost-enricher.ts b/src/cli/commands/analytics/cost/cost-enricher.ts index fcfb939e..adfce04d 100644 --- a/src/cli/commands/analytics/cost/cost-enricher.ts +++ b/src/cli/commands/analytics/cost/cost-enricher.ts @@ -192,6 +192,38 @@ async function mapWithConcurrency(items: T[], limit: number, fn: (item: T) return out; } +/** + * Attribute cost to a `skill` dispatch from the session's OWN already-priced usage records + * whose timestamp falls inside the skill's [start, start + durationMs] window. Skills run + * inline in the parent transcript (no separate subagent log to pull tokens from), so this is a + * re-attribution of tokens already counted in the session total — same "ALLOCATION, don't add + * to `seen`" semantics as the agent-dispatch path below. A dispatch with durationMs === 0 (no + * matching tool_result found — see dispatch-extractor.ts) has no window to attribute from and is + * left as "unknown" (absent costUSD/tokens), not zero. + */ +function enrichSkillDispatchCost(dispatch: DispatchEventRaw, sessionRecords: UsageRecord[]): void { + if (!dispatch.durationMs) return; + const windowEnd = dispatch.start + dispatch.durationMs; + const matched = sessionRecords.filter((r) => r.ts != null && r.ts >= dispatch.start && r.ts <= windowEnd); + if (!matched.length) return; + + const usageByModel = sumUsageRecords(matched); + let totalCost = 0; + let totalTokens = emptyUsage(); + let priced = false; + for (const [rawModel, usage] of usageByModel) { + const model = normalizeModelName(rawModel); + const price = lookupPrice(model); + if (price) { + totalCost += costBreakdown(usage, price).total; + priced = true; + } + totalTokens = addUsage(totalTokens, usage); + } + if (priced) dispatch.costUSD = Math.round(totalCost * 1e8) / 1e8; + dispatch.tokens = totalTokens; +} + /** * Second-pass enrichment: for each agent dispatch that has a matching subagent entry * (linked by toolUseId from the .meta.json), extract usage from the subagent's messages, @@ -200,18 +232,24 @@ async function mapWithConcurrency(items: T[], limit: number, fn: (item: T) * The per-dispatch cost is an ALLOCATION of already-counted session tokens (subagents are * included in the session total via allMessageArrays). Do NOT add to `seen` here. */ -function enrichDispatchCosts(dispatches: DispatchEventRaw[], parsed: ParsedSession, agentName: string): void { - if (!parsed.subagents?.length) return; - +function enrichDispatchCosts( + dispatches: DispatchEventRaw[], + parsed: ParsedSession, + agentName: string, + sessionRecords: UsageRecord[] +): void { const byToolUseId = new Map(); - for (const sub of parsed.subagents) { + for (const sub of parsed.subagents ?? []) { if (sub.toolUseId && Array.isArray(sub.messages)) { byToolUseId.set(sub.toolUseId, sub); } } - if (!byToolUseId.size) return; for (const dispatch of dispatches) { + if (dispatch.kind === 'skill') { + enrichSkillDispatchCost(dispatch, sessionRecords); + continue; + } if (dispatch.kind !== 'agent' || !dispatch._toolUseId) continue; const sub = byToolUseId.get(dispatch._toolUseId); if (!sub) continue; @@ -288,13 +326,15 @@ export async function enrichCosts( for (const entry of ordered) { let usageByModel: Map; let series: CostSeriesPoint[] = []; + let records: UsageRecord[] = []; try { // Gather ordered, deduped records ONCE per session (consumes keys in `seen`). When there // are records (Claude per-message path) sum them for the map + build the series from the // SAME records — so the series endpoint equals the session cost. The summed-gatherer // fallback runs only when there are no records (SDK rollup / gemini / no-reader), paths - // that never touch `seen`, so there is no double-dedup. - const records = entry.parsed ? gatherDedupedUsageRecords(entry.agentName, entry.parsed, seen) : []; + // that never touch `seen`, so there is no double-dedup. The same `records` are reused + // below to attribute cost to `skill` dispatches within their time window. + records = entry.parsed ? gatherDedupedUsageRecords(entry.agentName, entry.parsed, seen) : []; if (records.length) { usageByModel = sumUsageRecords(records); series = buildCostSeries(records); @@ -315,6 +355,7 @@ export async function enrichCosts( logger.debug(`[cost] usage extraction failed for ${entry.sessionId}:`, e); usageByModel = new Map(); series = []; + records = []; } const { cost, unpriced: u } = priceUsage(entry.sessionId, entry.hadLog, usageByModel); if (series.length) { @@ -324,7 +365,7 @@ export async function enrichCosts( try { const dispatches = extractDispatchEvents(entry.parsed, entry.agentName); if (dispatches.length) { - enrichDispatchCosts(dispatches, entry.parsed, entry.agentName); + enrichDispatchCosts(dispatches, entry.parsed, entry.agentName, records); // Strip internal _toolUseId before storing in the public cost index cost.dispatches = dispatches.map(({ _toolUseId: _id, ...d }) => d); } From b5aa99fa5440f22f3dfbeabdc956b2defae519ab Mon Sep 17 00:00:00 2001 From: vadimvlasenko Date: Wed, 1 Jul 2026 13:08:59 +0300 Subject: [PATCH 06/10] fix(analytics): count skill dispatches toward timeline cost-data detection hasCostData gated the Timeline's "cost..." subtitle on kind === 'agent' only, so it under-reported once skill dispatches could also carry costUSD. Bar rendering and the detail panel already check costUSD without a kind restriction, so only this gate needed broadening. --- src/cli/commands/analytics/report/client/app.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cli/commands/analytics/report/client/app.js b/src/cli/commands/analytics/report/client/app.js index 4cf97855..3fab794e 100644 --- a/src/cli/commands/analytics/report/client/app.js +++ b/src/cli/commands/analytics/report/client/app.js @@ -1132,7 +1132,7 @@ // Timeline — Gantt of all top-level agent, skill, and command dispatches. var hasDispatches = (s.dispatches || []).length > 0; - var hasCostData = (s.dispatches || []).some(function (d) { return d.kind === 'agent' && d.costUSD != null; }); + var hasCostData = (s.dispatches || []).some(function (d) { return d.costUSD != null; }); var tlSubtitle = hasDispatches ? (hasCostData ? 'click a step for cost, token & timing details · positioned across the session' : 'click a step for timing details · positioned across the session') : ''; var tlCard = card('Timeline', tlSubtitle); if (hasDispatches) { From 2e87fcc5a318327b0032f1ffed7a5529614d911d Mon Sep 17 00:00:00 2001 From: vadimvlasenko Date: Wed, 1 Jul 2026 13:10:36 +0300 Subject: [PATCH 07/10] fix(analytics): attribute parallel-subagent OTEL cost to the narrowest matching window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit buildDispatches() attributed agent:* api_requests to whichever subagent_completed window happened to be processed first in event order. When parallel subagents' completion windows overlap, a wider window could greedily claim a request meant for a narrower, more precise one — leaving the correct dispatch with no cost at all. OTEL carries no per-invocation id to disambiguate exactly, so this switches to a two-pass design: collect every agent window first, then attribute each request to the narrowest window that contains it. --- .../analytics/__tests__/otel-loader.test.ts | 28 ++++ src/cli/commands/analytics/otel-loader.ts | 139 +++++++++++------- 2 files changed, 111 insertions(+), 56 deletions(-) diff --git a/src/cli/commands/analytics/__tests__/otel-loader.test.ts b/src/cli/commands/analytics/__tests__/otel-loader.test.ts index a4764746..371cb176 100644 --- a/src/cli/commands/analytics/__tests__/otel-loader.test.ts +++ b/src/cli/commands/analytics/__tests__/otel-loader.test.ts @@ -109,6 +109,34 @@ describe('buildDispatches', () => { expect(agent.tokens?.total).toBe(430); // 100+50+200+80 }); + it('attributes each request to the narrowest containing window when subagent windows overlap', () => { + const ev = (name: string, ts: string, extra: Record): OtelEvent => ({ + _type: 'log', + ts, + name, + attrs: { 'session.id': 'S', 'event.name': name, ...extra }, + resource: {}, + }); + const dispatches = buildDispatches([ + // WIDE window: [10:00:00, 10:02:00] (duration 120000ms). + ev('subagent_completed', '2026-06-19T10:02:00.000Z', { agent_type: 'wide-agent', duration_ms: 120000 }), + // NARROW window nested inside the wide one: [10:00:20, 10:01:00] (duration 40000ms). + ev('subagent_completed', '2026-06-19T10:01:00.000Z', { agent_type: 'narrow-agent', duration_ms: 40000 }), + // Falls inside BOTH windows — must go to the NARROW one, not "wide" just because wide's + // subagent_completed event happens to be processed after narrow's in this event list. + ev('api_request', '2026-06-19T10:00:30.000Z', { query_source: 'agent:narrow-agent', cost_usd: 0.7, input_tokens: 100 }), + // Falls ONLY inside the wide window (after narrow's window has already ended). + ev('api_request', '2026-06-19T10:01:30.000Z', { query_source: 'agent:wide-agent', cost_usd: 0.3, input_tokens: 50 }), + ]); + + const narrow = dispatches.find((d) => d.name === 'narrow-agent')!; + const wide = dispatches.find((d) => d.name === 'wide-agent')!; + expect(narrow.costUSD).toBeCloseTo(0.7, 6); + expect(narrow.tokens?.input).toBe(100); + expect(wide.costUSD).toBeCloseTo(0.3, 6); + expect(wide.tokens?.input).toBe(50); + }); + it('attributes real cost + duration to skills via api_request.skill.name', () => { const ev = (name: string, ts: string, extra: Record): OtelEvent => ({ _type: 'log', diff --git a/src/cli/commands/analytics/otel-loader.ts b/src/cli/commands/analytics/otel-loader.ts index 726fdb9d..3b42f970 100644 --- a/src/cli/commands/analytics/otel-loader.ts +++ b/src/cli/commands/analytics/otel-loader.ts @@ -210,7 +210,7 @@ function downsampleSeries(pts: Array<{ t: number; cost: number; tokens: number } export function buildDispatches(sessionEvents: OtelEvent[]): DispatchEvent[] { const agentReqs = sessionEvents .filter((e) => isApiRequest(e) && String(attr(e, 'query_source') || '').startsWith('agent:')) - .map((e) => ({ ms: Date.parse(e.ts), e, consumed: false })) + .map((e) => ({ ms: Date.parse(e.ts), e })) .filter((x) => Number.isFinite(x.ms)); const skillReqs = sessionEvents @@ -223,68 +223,95 @@ export function buildDispatches(sessionEvents: OtelEvent[]): DispatchEvent[] { .map((e) => ({ name: String(attr(e, 'skill.name') || '(skill)'), ms: Date.parse(e.ts) })) .filter((x) => Number.isFinite(x.ms)); + // Pass 1: collect every agent dispatch window (subagent_completed events) before attributing + // any request — attribution needs to compare ALL windows for a given request, not just the + // one whose subagent_completed event happens to appear first. + interface AgentWindow { + e: OtelEvent; + start: number; + end: number; + durationMs: number; + } + const agentWindows: AgentWindow[] = []; + for (const e of sessionEvents) { + if (e.name !== 'subagent_completed') continue; + const end = Date.parse(e.ts); + if (!Number.isFinite(end)) continue; + const durationMs = num(attr(e, 'duration_ms')); + const start = durationMs > 0 ? end - durationMs : end; + agentWindows.push({ e, start, end, durationMs }); + } + + // Pass 2: attribute each agent:* api_request to the NARROWEST containing window. OTEL carries + // no per-invocation id linking a request to a specific subagent run, so when parallel + // subagents' [start, end] windows overlap, the tightest containing window is the best + // available signal (a wider, unrelated window should not out-compete a precise match). Each + // request contributes to at most one window. + const claimed = new Map(); + for (const ar of agentReqs) { + let best: AgentWindow | null = null; + for (const w of agentWindows) { + if (ar.ms < w.start || ar.ms > w.end) continue; + if (!best || w.end - w.start < best.end - best.start) best = w; + } + if (!best) continue; + let claim = claimed.get(best); + if (!claim) { + claim = { tokens: emptyTokens(), costUSD: 0, matched: 0 }; + claimed.set(best, claim); + } + addTokens(claim.tokens, ar.e); + claim.costUSD += num(attr(ar.e, 'cost_usd')); + claim.matched += 1; + } + const out: DispatchEvent[] = []; + for (const w of agentWindows) { + const claim = claimed.get(w); + const d: DispatchEvent = { kind: 'agent', name: String(attr(w.e, 'agent_type') || '(agent)'), start: w.start, durationMs: w.durationMs }; + if (claim && claim.matched > 0) { + d.tokens = claim.tokens; + d.costUSD = claim.costUSD; + } else { + const tt = num(attr(w.e, 'total_tokens')); + if (tt > 0) { + const t = emptyTokens(); + t.total = tt; + d.tokens = t; + } + } + out.push(d); + } + + // Skill windows are non-overlapping by construction (a skill's window ends at the next + // activation of the SAME skill), so the original name+window matching is unaffected by the + // parallel-agent fix above and is kept as-is. for (const e of sessionEvents) { - if (e.name === 'subagent_completed') { - const end = Date.parse(e.ts); - if (!Number.isFinite(end)) continue; - const durationMs = num(attr(e, 'duration_ms')); - const start = durationMs > 0 ? end - durationMs : end; + if (e.name !== 'skill_activated') continue; + const start = Date.parse(e.ts); + if (!Number.isFinite(start)) continue; + const name = String(attr(e, 'skill.name') || '(skill)'); + const nextSame = skillStarts + .filter((s) => s.name === name && s.ms > start) + .reduce((min, s) => Math.min(min, s.ms), Infinity); + const matched = skillReqs.filter((r) => r.name === name && r.ms >= start && r.ms < nextSame); + const d: DispatchEvent = { kind: 'skill', name, start, durationMs: 0 }; + if (matched.length) { const tokens = emptyTokens(); let costUSD = 0; - let matched = 0; - for (const ar of agentReqs) { - // Attribute each subagent api_request to at most ONE dispatch. Parallel subagents complete - // with overlapping [end − duration, end] windows; without consuming a matched request, the - // same agent:* api_request would be counted by every overlapping window — double-counting a - // dispatch's cost/tokens. Earlier-processed (≈ earlier-completing) dispatches claim first. - if (ar.consumed) continue; - if (ar.ms >= start && ar.ms <= end) { - addTokens(tokens, ar.e); - costUSD += num(attr(ar.e, 'cost_usd')); - ar.consumed = true; - matched += 1; - } + let lastEnd = start; + for (const { ms, e: req } of matched) { + addTokens(tokens, req); + costUSD += num(attr(req, 'cost_usd')); + lastEnd = Math.max(lastEnd, ms + num(attr(req, 'duration_ms'))); } - const d: DispatchEvent = { kind: 'agent', name: String(attr(e, 'agent_type') || '(agent)'), start, durationMs }; - if (matched > 0) { - d.tokens = tokens; - d.costUSD = costUSD; - } else { - const tt = num(attr(e, 'total_tokens')); - if (tt > 0) { - const t = emptyTokens(); - t.total = tt; - d.tokens = t; - } - } - out.push(d); - } else if (e.name === 'skill_activated') { - const start = Date.parse(e.ts); - if (!Number.isFinite(start)) continue; - const name = String(attr(e, 'skill.name') || '(skill)'); - // Window ends at the next activation of the SAME skill (else open-ended). - const nextSame = skillStarts - .filter((s) => s.name === name && s.ms > start) - .reduce((min, s) => Math.min(min, s.ms), Infinity); - const matched = skillReqs.filter((r) => r.name === name && r.ms >= start && r.ms < nextSame); - const d: DispatchEvent = { kind: 'skill', name, start, durationMs: 0 }; - if (matched.length) { - const tokens = emptyTokens(); - let costUSD = 0; - let lastEnd = start; - for (const { ms, e: req } of matched) { - addTokens(tokens, req); - costUSD += num(attr(req, 'cost_usd')); - lastEnd = Math.max(lastEnd, ms + num(attr(req, 'duration_ms'))); - } - d.durationMs = Math.max(0, lastEnd - start); - d.tokens = tokens; - d.costUSD = costUSD; - } - out.push(d); + d.durationMs = Math.max(0, lastEnd - start); + d.tokens = tokens; + d.costUSD = costUSD; } + out.push(d); } + out.sort((a, b) => a.start - b.start); return out.slice(0, MAX_DISPATCHES); } From b98401d8a12cd514e21f1f4470674c082f9fdb36 Mon Sep 17 00:00:00 2001 From: vadimvlasenko Date: Wed, 1 Jul 2026 13:11:02 +0300 Subject: [PATCH 08/10] docs(analytics): scope the timeline cost-per-dispatch description to what's implemented Clarify that only agent/skill dispatches with attributable usage in their time window show cost; slash commands are point events and never do. --- docs/ANALYTICS-REPORT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ANALYTICS-REPORT.md b/docs/ANALYTICS-REPORT.md index 0fd77813..7c08adde 100644 --- a/docs/ANALYTICS-REPORT.md +++ b/docs/ANALYTICS-REPORT.md @@ -145,7 +145,7 @@ Clicking a session anywhere in the report opens a detail overlay with: - **Activity** — turns, tool calls and success rate, agent/skill/command invocation counts - **Code changes** — files changed, lines added/removed, net - **Token & cost growth chart** — cumulative cost and token usage per turn (from the native log; shown when data is available) -- **Dispatch timeline** — Interactive Gantt of every top-level dispatch. A session bar spans the full activity window across the top, then each agent, skill, and slash-command dispatch is rendered as its own bar positioned by wall-clock start time, so each sits where it actually ran. The window is the union of the tracked session span and the dispatch span, so dispatches from a resumed or compacted session still place correctly. Short skills and zero-duration commands fall back to a minimum bar width so they stay visible as markers. Click any bar to open a detail panel on the right showing that dispatch's estimated cost, token breakdown (input / output / cache read / cache write), wall-clock duration, start offset from session start, and top tool call counts. +- **Dispatch timeline** — Interactive Gantt of every top-level dispatch. A session bar spans the full activity window across the top, then each agent, skill, and slash-command dispatch is rendered as its own bar positioned by wall-clock start time, so each sits where it actually ran. The window is the union of the tracked session span and the dispatch span, so dispatches from a resumed or compacted session still place correctly. Short skills and zero-duration commands fall back to a minimum bar width so they stay visible as markers. Click any bar to open a detail panel on the right showing wall-clock duration, start offset from session start, top tool call counts, and — for agent and skill dispatches where usage can be attributed to their time window — estimated cost and token breakdown (input / output / cache read / cache write). Slash-command dispatches are point events with no window to attribute usage from, so they show timing only, never a cost. - **Skills / Agent subtypes / Slash commands** — chip lists of what was invoked and how many times --- From 58aea99028d2d3ba0854f4773d27f3a81a61d68f Mon Sep 17 00:00:00 2001 From: vadimvlasenko Date: Wed, 1 Jul 2026 13:26:15 +0300 Subject: [PATCH 09/10] fix(analytics): fall back to same-tier Claude price for models newer than the pricing table pricing.json is manually vendored and lags new model releases; a model like claude-sonnet-5 (no shared version segment with any -4-* key) was hitting lookupPrice() returning null, so per-agent dispatches showed correctly attributed tokens but no cost in the timeline. Every -4-* version bump within a Claude tier has kept the same per-token rate, so lookupPrice() now falls back to the latest known same-tier price when no exact/segment match exists, in addition to pinning an explicit claude-sonnet-5 entry. --- .../analytics/cost/__tests__/pricing.test.ts | 29 ++++++++ src/cli/commands/analytics/cost/pricing.json | 9 ++- src/cli/commands/analytics/cost/pricing.ts | 72 +++++++++++++++++++ 3 files changed, 109 insertions(+), 1 deletion(-) diff --git a/src/cli/commands/analytics/cost/__tests__/pricing.test.ts b/src/cli/commands/analytics/cost/__tests__/pricing.test.ts index 9ff253c3..bb285f61 100644 --- a/src/cli/commands/analytics/cost/__tests__/pricing.test.ts +++ b/src/cli/commands/analytics/cost/__tests__/pricing.test.ts @@ -63,4 +63,33 @@ describe('lookupPrice', () => { expect(p).not.toBeNull(); expect(p!.cacheWrite1h).toBeUndefined(); }); + + it('returns the pinned sonnet-tier price for claude-sonnet-5 (a version bump ahead of the -4-* entries)', () => { + const p = lookupPrice('claude-sonnet-5'); + expect(p).not.toBeNull(); + expect(p!.input).toBe(3); + expect(p!.output).toBe(15); + }); + + it('falls back to the latest known price in the same Claude tier for a model newer than any table entry', () => { + // No claude-opus-9 entry exists (or ever will, by construction) — this proves the tier + // fallback is family-prefix based, not a one-off pinned key for sonnet-5. + const p = lookupPrice('claude-opus-9'); + expect(p).not.toBeNull(); + expect(p!.input).toBe(5); + expect(p!.output).toBe(25); + }); + + it('tier fallback ignores dated/pinned snapshot keys when picking the "latest" version', () => { + // claude-haiku-4-5-20251001 (a dated snapshot) must not be mistaken for a newer version + // than claude-haiku-4-6 just because "20251001" numerically exceeds "6". + const p = lookupPrice('claude-haiku-9'); + expect(p).not.toBeNull(); + expect(p!.input).toBe(1); + expect(p!.output).toBe(5); + }); + + it('still returns null for a non-Claude unknown model (no tier fallback applies)', () => { + expect(lookupPrice('totally-made-up-model')).toBeNull(); + }); }); diff --git a/src/cli/commands/analytics/cost/pricing.json b/src/cli/commands/analytics/cost/pricing.json index 1bee3a6e..5a7b046c 100644 --- a/src/cli/commands/analytics/cost/pricing.json +++ b/src/cli/commands/analytics/cost/pricing.json @@ -17,7 +17,14 @@ "cacheRead": "cache hit price (Anthropic: 0.1x input, OpenAI: varies, Google: ~0.1x input)", "cacheWrite": "cache write price (Anthropic: 1.25x input, OpenAI: ~same as input)" }, - "note": "codemie: added claude 4-7/4-8 tier estimates 2026-06-08; added kimi-for-coding alias and normalized kimi-k2-5 key 2026-06-15" + "note": "codemie: added claude 4-7/4-8 tier estimates 2026-06-08; added kimi-for-coding alias and normalized kimi-k2-5 key 2026-06-15; added claude-sonnet-5 pinned entry 2026-07-01 (pricing table lookup also has a generic same-tier fallback now, see pricing.ts)" + }, + "claude-sonnet-5": { + "input": 3, + "output": 15, + "cacheRead": 0.3, + "cacheWrite": 3.75, + "cacheWrite1h": 6.0 }, "claude-opus-4-6": { "input": 5, diff --git a/src/cli/commands/analytics/cost/pricing.ts b/src/cli/commands/analytics/cost/pricing.ts index b1dd3a9e..78f0b69e 100644 --- a/src/cli/commands/analytics/cost/pricing.ts +++ b/src/cli/commands/analytics/cost/pricing.ts @@ -77,11 +77,76 @@ function isSegmentMatch(name: string, key: string): boolean { } } +/** Claude pricing tiers whose per-tier rate has stayed flat across every `-4-*` version bump seen so far. */ +const CLAUDE_TIERS = ['claude-opus', 'claude-sonnet', 'claude-haiku']; + +/** + * Parse the version segments trailing a tier prefix into a numeric tuple for comparison, e.g. + * `claude-sonnet-4-8` under tier `claude-sonnet` -> `[4, 8]`. Returns null for keys that don't + * fit the plain `(-)*` shape — non-numeric segments (`-latest`) or a long numeric + * segment (a pinned date snapshot like `-20250514`, 8 digits) — since those aren't meaningful + * "is this newer" signals and would otherwise outrank a real version bump by raw magnitude. + */ +function tierVersionTuple(key: string, tier: string): number[] | null { + const rest = key.slice(tier.length); + if (!rest) { + return [0]; + } + const segments = rest.split('-').filter(Boolean); + const nums: number[] = []; + for (const segment of segments) { + if (!/^\d+$/.test(segment) || segment.length >= 8) { + return null; + } + nums.push(Number(segment)); + } + return nums; +} + +function compareVersionTuples(a: number[], b: number[]): number { + for (let i = 0; i < Math.max(a.length, b.length); i++) { + const diff = (a[i] ?? 0) - (b[i] ?? 0); + if (diff !== 0) { + return diff; + } + } + return 0; +} + +/** + * Fall back to the latest known price within the same Claude tier (opus/sonnet/haiku) when a + * model name matches no table entry at all — e.g. a new major-version model (`claude-sonnet-5`) + * that shares no version segment with any `-4-*` key, so {@link isSegmentMatch} can't find it. + * Every version bump observed within a tier so far has kept the same per-token rate, so the + * latest known entry is the best available estimate; callers must still log this as inexact. + */ +function claudeTierFallback(normalized: string, prices: Record): { key: string; price: ModelPrice } | null { + const tier = CLAUDE_TIERS.find((t) => normalized === t || normalized.startsWith(`${t}-`)); + if (!tier) { + return null; + } + let best: { key: string; version: number[]; price: ModelPrice } | null = null; + for (const [key, price] of Object.entries(prices)) { + if (key !== tier && !key.startsWith(`${tier}-`)) { + continue; + } + const version = tierVersionTuple(key, tier); + if (version === null) { + continue; + } + if (!best || compareVersionTuples(version, best.version) > 0) { + best = { key, version, price }; + } + } + return best ? { key: best.key, price: best.price } : null; +} + /** * Look up pricing for a model. Returns null when no entry matches (the caller marks the model * `unpriced` — never a silent $0). Resolution order: * 1. Exact (normalized) match — authoritative. * 2. Longest key aligned to a segment boundary — a deliberate family fallback, logged as inexact. + * 3. Latest same-tier Claude price — for a model newer than every table entry, logged as inexact. * Dots are folded to dashes first because the table keys use dashes (e.g. `gpt-4-1`, not `gpt-4.1`). */ export function lookupPrice(model: string): ModelPrice | null { @@ -103,5 +168,12 @@ export function lookupPrice(model: string): ModelPrice | null { logger.debug(`[pricing] no exact entry for "${normalized}"; using family price "${best.key}"`); return best.price; } + + const tierFallback = claudeTierFallback(normalized, prices); + if (tierFallback) { + logger.debug(`[pricing] no entry for "${normalized}"; using latest same-tier price "${tierFallback.key}"`); + return tierFallback.price; + } + return null; } From ce32d8b7b7a68985f3cb85eb3d909e69c36be3c2 Mon Sep 17 00:00:00 2001 From: vadimvlasenko Date: Wed, 1 Jul 2026 16:51:59 +0300 Subject: [PATCH 10/10] feat: update md docs --- docs/ANALYTICS-REPORT.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/ANALYTICS-REPORT.md b/docs/ANALYTICS-REPORT.md index 7c08adde..79c898c5 100644 --- a/docs/ANALYTICS-REPORT.md +++ b/docs/ANALYTICS-REPORT.md @@ -114,6 +114,8 @@ A ranked table of the most bloated sessions and the dead-session list are both c Estimated API-equivalent spend (token usage × model pricing). If you use Claude on a subscription, you don't pay per token — this view shows the equivalent metered-API value for benchmarking against alternatives or tracking consumption trends. +> **Why this reads lower than the terminal's live cost.** Cost here is counted **per API response**: each response's token usage is priced exactly once, matching how the provider bills and how Claude Code's own telemetry (`cost.usage`) records it. A single response is written to the native log across several lines (e.g. a `thinking` line and a `tool_use` line, each repeating the same usage), and the live statusline in the terminal sums those lines — so it over-counts multi-part responses and shows a higher number. For sessions heavy on extended thinking plus tool use, expect the report total to sit noticeably below the live statusline; the report figure is the authoritative, de-duplicated one. + Key elements: - **Coverage banner** — tells you how many sessions were successfully priced and which agents have full, partial, or missing token data