Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ jobs:
- run: pnpm lint
- run: pnpm typecheck
- run: pnpm test:unit
- run: pnpm audit --audit-level high
- run: pnpm format:check

e2e:
Expand All @@ -34,6 +35,6 @@ jobs:
cache: pnpm
- run: pnpm install --frozen-lockfile
- run: pnpm build
- run: pnpm --filter @iris/e2e exec playwright install --with-deps chromium
- run: pnpm --filter @syrin/iris-e2e --fail-if-no-match exec playwright install --with-deps chromium
- name: Run e2e battery (boots api+demo+next-smoke, drives Iris headless)
run: pnpm --filter @iris/e2e run e2e:ci
run: pnpm --filter @syrin/iris-e2e --fail-if-no-match run e2e:ci
3 changes: 3 additions & 0 deletions apps/demo/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ VITE_IRIS_WS_URL=ws://localhost:4400/iris

# Pairing token — must match IRIS_TOKEN in the @syrin/iris-server .env (blank for localhost dev).
VITE_IRIS_TOKEN=

# Explicit opt-in for a deployed page or non-localhost bridge. Requires VITE_IRIS_TOKEN.
VITE_IRIS_ALLOW_NON_LOCALHOST=false
7 changes: 5 additions & 2 deletions apps/demo/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@
"zustand": "^5.0.14"
},
"devDependencies": {
"@babel/core": "^7.29.7",
"@rolldown/plugin-babel": "^0.2.3",
"@syrin/iris-babel-plugin": "workspace:*",
"@types/babel__core": "^7.20.5",
"@types/react": "^19.0.0",
"@types/react-dom": "^19.0.0",
"@vitejs/plugin-react": "^4.3.4",
"@vitejs/plugin-react": "^6.0.2",
"agentation": "^3.0.2",
"agentation-mcp": "^1.2.0",
"vite": "^6.0.7"
"vite": "^8.0.16"
}
}
15 changes: 14 additions & 1 deletion apps/demo/src/iris-dev.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,20 @@ export function installIris(): void {
const present = params.has('present');
const session = params.get('session') ?? SESSION_AUTO;
const irisPort: number = typeof __IRIS_PORT__ !== 'undefined' ? __IRIS_PORT__ : 4400;
iris.connect({ session, present, url: `ws://localhost:${irisPort}/iris` });
const token = import.meta.env.VITE_IRIS_TOKEN;
const configuredUrl = import.meta.env.VITE_IRIS_WS_URL;
const url =
typeof configuredUrl === 'string' && configuredUrl.length > 0
? configuredUrl
: `ws://localhost:${irisPort}/iris`;
const allowNonLocalhost = import.meta.env.VITE_IRIS_ALLOW_NON_LOCALHOST === 'true';
iris.connect({
session,
present,
url,
...(allowNonLocalhost ? { allowNonLocalhost: true } : {}),
...(typeof token === 'string' && token.length > 0 ? { token } : {}),
});
registerStore('app', () => useApp.getState());
registerCapabilities({
testids: TESTIDS,
Expand Down
10 changes: 10 additions & 0 deletions apps/demo/src/vite-env.d.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
/// <reference types="vite/client" />

interface ImportMetaEnv {
readonly VITE_IRIS_ALLOW_NON_LOCALHOST?: string;
readonly VITE_IRIS_TOKEN?: string;
readonly VITE_IRIS_WS_URL?: string;
}

interface ImportMeta {
readonly env: ImportMetaEnv;
}

declare const __IRIS_PORT__: number;
3 changes: 2 additions & 1 deletion apps/demo/vite.config.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { defineConfig } from 'vite';
import react from '@vitejs/plugin-react';
import babel from '@rolldown/plugin-babel';
import irisSource from '@syrin/iris-babel-plugin';

// The Iris showcase dashboard runs on a dedicated port (4310) so it never collides with other
Expand All @@ -10,7 +11,7 @@ const IRIS_PORT = Number(process.env['IRIS_PORT'] ?? 4400);
export default defineConfig({
// Stamp data-iris-source on host elements in dev so iris_inspect can map DOM -> file:line
// (React 19 removed _debugSource). Dev-only; harmless in prod builds.
plugins: [react({ babel: { plugins: [irisSource] } })],
plugins: [babel({ plugins: [irisSource] }), react()],
server: { port: 4310 },
define: { __IRIS_PORT__: IRIS_PORT },
});
1 change: 1 addition & 0 deletions apps/e2e/run-ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ for _ in $(seq 1 120); do
sleep 2
done
curl -s -o /dev/null http://localhost:8787/api/health || { echo "api never came up"; cat /tmp/e2e-api.log; exit 1; }
curl -s -o /dev/null http://localhost:4310 || { echo "demo never came up"; cat /tmp/e2e-demo.log; exit 1; }
curl -s -o /dev/null http://localhost:3100 || { echo "next never came up"; cat /tmp/e2e-next.log; exit 1; }

echo "==> running e2e battery"
Expand Down
2 changes: 1 addition & 1 deletion apps/e2e/specs/next-smoke-test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ check('new task rendered after click', added.pass, added.failureReason ?? '');
console.log('\nTASK B — API call (Next route handler) + modal + no console errors');
const since = (await T('iris_act', { ref: await refOf('testid', 'ping-button'), action: 'click' })).since;
const verdict = await T('iris_assert', {
timeout_ms: 4000,
timeout_ms: 10000,
predicate: { kind: 'allOf', predicates: [
{ kind: 'net', method: 'GET', urlContains: '/api/ping', status: 200, since },
{ kind: 'element', query: { role: 'dialog', name: 'Server reply' }, state: 'visible' },
Expand Down
10 changes: 5 additions & 5 deletions apps/e2e/specs/p3a-test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,22 @@ for(let i=0;i<200&&server.bridge.sessions.count()===0;i++) await sleep(50);
const refOf=async(by,value)=>{for(let i=0;i<30;i++){const r=(await T('iris_query',{by,value})).elements?.[0]?.ref;if(r)return r;await sleep(100);}throw new Error('not found '+value);};
console.log('\n=== M8 Stage A: record → .iris/ flow → replay → drift (real browser) ===');
// record + save
await T('iris_record_start',{name:'addtask'});
await T('iris_record_start',{recordingName:'addtask'});
await T('iris_act',{ref:await refOf('testid','add-task'),action:'click'});
await T('iris_record_stop',{name:'addtask'});
const saved=await T('iris_flow_save',{name:'addtask'});
await T('iris_record_stop',{recordingName:'addtask'});
const saved=await T('iris_flow_save',{flowName:'addtask'});
const flowFile=path.join(irisRoot,'flows','addtask.json');
chk('flow saved to .iris/flows/addtask.json on disk', nfs.existsSync(flowFile), flowFile);
const raw=nfs.readFileSync(flowFile,'utf8');
chk('flow anchors on testid (no eXX refs leaked)', raw.includes('add-task') && !/"e\d+"/.test(raw), raw.includes('"testid"')?'has testid anchors':'no testid');
const list=await T('iris_flow_list',{});
chk('iris_flow_list returns the saved flow', JSON.stringify(list).includes('addtask'));
// replay happy path
const rep=await T('iris_flow_replay',{name:'addtask'});
const rep=await T('iris_flow_replay',{flowName:'addtask'});
chk('iris_flow_replay re-resolves anchors + runs green', (rep.ok!==false)&&!rep.drift, JSON.stringify(rep).slice(0,90));
// drift: corrupt the testid, replay, expect legible drift with nearest match
nfs.writeFileSync(flowFile, raw.replaceAll('add-task','add-tassk'));
const drift=await T('iris_flow_replay',{name:'addtask'});
const drift=await T('iris_flow_replay',{flowName:'addtask'});
const ds=JSON.stringify(drift);
chk('renamed testid → legible drift with a nearest-match', /drift/i.test(ds) && /add-task/.test(ds), ds.slice(0,140));
console.log(`\n${fail===0?'✅ M8 STAGE A VERIFIED':'❌ FAILED'} (${pass} passed, ${fail} failed)`);
Expand Down
12 changes: 6 additions & 6 deletions apps/e2e/specs/p3b-test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@ await p.goto('http://localhost:3100/',{waitUntil:'networkidle'});
for(let i=0;i<200&&server.bridge.sessions.count()===0;i++) await sleep(50);
const refOf=async(by,v)=>{for(let i=0;i<30;i++){const r=(await T('iris_query',{by,value:v})).elements?.[0]?.ref;if(r)return r;await sleep(100);}throw new Error('nf '+v);};
console.log('\n=== M8 Stage B: self-healing rebind (real browser) ===');
await T('iris_record_start',{name:'ht'});
await T('iris_record_start',{recordingName:'ht'});
await T('iris_act',{ref:await refOf('testid','add-task'),action:'click'});
await T('iris_record_stop',{name:'ht'});
await T('iris_flow_save',{name:'ht'});
await T('iris_record_stop',{recordingName:'ht'});
await T('iris_flow_save',{flowName:'ht'});
const file=path.join(irisRoot,'flows','ht.json');
// corrupt the testid
nfs.writeFileSync(file, nfs.readFileSync(file,'utf8').replaceAll('add-task','add-tassk'));
const bytesBefore=nfs.readFileSync(file,'utf8');
const proposeOnly=await T('iris_flow_heal',{name:'ht',apply:false});
const proposeOnly=await T('iris_flow_heal',{flowName:'ht',apply:false});
chk('heal(apply:false) proposes a rebind but does NOT write', /add-task/.test(JSON.stringify(proposeOnly)) && nfs.readFileSync(file,'utf8')===bytesBefore, JSON.stringify(proposeOnly).slice(0,120));
const applied=await T('iris_flow_heal',{name:'ht',apply:true});
const applied=await T('iris_flow_heal',{flowName:'ht',apply:true});
chk('heal(apply:true) rewrites the anchor back to add-task', nfs.readFileSync(file,'utf8').includes('add-task') && applied.applied===true, JSON.stringify(applied).slice(0,110));
const rep=await T('iris_flow_replay',{name:'ht'});
const rep=await T('iris_flow_replay',{flowName:'ht'});
chk('replay is green again after self-heal', rep.status==='ok'||rep.ok!==false&&!rep.drift, JSON.stringify(rep).slice(0,90));
console.log(`\n${fail===0?'✅ M8 STAGE B SELF-HEAL VERIFIED':'❌ FAILED'} (${pass} passed, ${fail} failed)`);
await b.close(); await server.close(); nfs.rmSync(path.dirname(irisRoot),{recursive:true,force:true}); process.exit(fail===0?0:1);
16 changes: 8 additions & 8 deletions apps/e2e/specs/project-history-test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ const refOf=async(by,value)=>{for(let i=0;i<30;i++){const r=(await T('iris_query
console.log('\n=== 0.3.7 RUNHISTORY: replay → .iris/project.json → iris_project diff (real browser) ===');

// Record + save a one-step flow.
await T('iris_record_start',{name:'addtask'});
await T('iris_record_start',{recordingName:'addtask'});
await T('iris_act',{ref:await refOf('testid','add-task'),action:'click'});
await T('iris_record_stop',{name:'addtask'});
await T('iris_flow_save',{name:'addtask'});
await T('iris_record_stop',{recordingName:'addtask'});
await T('iris_flow_save',{flowName:'addtask'});

// Replay twice — each replay should auto-record a run.
await T('iris_flow_replay',{name:'addtask'});
await T('iris_flow_replay',{name:'addtask'});
await T('iris_flow_replay',{flowName:'addtask'});
await T('iris_flow_replay',{flowName:'addtask'});

// 1) project.json exists on disk and holds flow_replay records.
const projFile=path.join(irisRoot,'project.json');
Expand All @@ -41,13 +41,13 @@ chk('each run carries status + driftSteps evidence + at', onDisk.runs.every(r=>r
// 2) iris_project { name } returns scoped history + lastRun + diff-vs-last.
const proj=await T('iris_project',{name:'addtask'});
chk('iris_project returns scoped runs', Array.isArray(proj.runs)&&proj.runs.length===2, `runs=${proj.runs?.length}`);
chk('iris_project returns lastRun', proj.lastRun&&proj.lastRun.name==='addtask', JSON.stringify(proj.lastRun).slice(0,80));
chk('iris_project returns a diff-vs-last block', proj.diff&&typeof proj.diff.regressed==='boolean', JSON.stringify(proj.diff).slice(0,100));
chk('iris_project returns lastRun', proj.lastRun&&proj.lastRun.name==='addtask', JSON.stringify(proj.lastRun)?.slice(0,80));
chk('iris_project returns a diff-vs-last block', proj.diff&&typeof proj.diff.regressed==='boolean', JSON.stringify(proj.diff)?.slice(0,100));

// 3) iris_run_record appends a manual run that lastRun then sees.
await T('iris_run_record',{name:'addtask',status:'pass',summary:'manual smoke'});
const after=await T('iris_project',{name:'addtask'});
chk('iris_run_record appends a manual run', after.lastRun?.kind==='manual'&&after.lastRun?.summary==='manual smoke', JSON.stringify(after.lastRun).slice(0,100));
chk('iris_run_record appends a manual run', after.lastRun?.kind==='manual'&&after.lastRun?.summary==='manual smoke', JSON.stringify(after.lastRun)?.slice(0,100));

console.log(`\n${fail===0?'✅ RUNHISTORY VERIFIED':'❌ FAILED'} (${pass} passed, ${fail} failed)`);
await b.close(); await server.close(); nfs.rmSync(path.dirname(irisRoot),{recursive:true,force:true}); process.exit(fail===0?0:1);
2 changes: 1 addition & 1 deletion apps/e2e/specs/real-world-tests.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ const refOf = async (by, value) => { for (let i = 0; i < 40; i++) { const r = (a

const b = await chromium.launch({ headless: true });
const p = await b.newPage();
await p.goto('http://localhost:4310/', { waitUntil: 'networkidle' });
await p.goto('http://localhost:4310/?session=demo', { waitUntil: 'networkidle' });
for (let i = 0; i < 200 && server.bridge.sessions.count() === 0; i++) await sleep(50);

console.log('\n=== Iris × showcase dashboard (:4310) ===');
Expand Down
8 changes: 5 additions & 3 deletions apps/e2e/specs/spa-nav-realinput-test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,20 @@ const refOf=async()=>{for(let i=0;i<30;i++){const r=(await T('iris_query',{by:'t
console.log('\n=== bug #1: real input survives SPA navigation (real Chromium + CDP) ===');
// baseline on "/"
let ref=await refOf();
const a1=await T('iris_act',{ref,action:'click'});
const a1=await T('iris_act',{ref,action:'click',args:{native:true}});
chk('pre-nav: iris_act is REAL', a1.inputMode==='real', `inputMode=${a1.inputMode} url=${sess.url}`);
chk('pre-nav: realInputAvailable true', (await provider.isAvailableFor(sess.url))===true);
// CLIENT-SIDE NAV (pushState) — no full reload; SDK stays connected and emits route.change
await page.evaluate(()=>history.pushState({},'','/workspace?script=42'));
for(let i=0;i<40&&!/\/workspace\?script=42/.test(sess.url);i++) await sleep(50); // wait for route.change → server
chk('after pushState: session.url tracks the SPA route (THE FIX)', /\/workspace\?script=42$/.test(sess.url), `url=${sess.url}`);
chk('after pushState: page.url matches session.url', page.url()===sess.url, `cdp=${page.url()}`);
chk('after pushState: realInputAvailable STILL true', (await provider.isAvailableFor(sess.url))===true);
let available=false;
for(let i=0;i<40&&!available;i++){available=await provider.isAvailableFor(sess.url);if(!available)await sleep(50);}
chk('after pushState: realInputAvailable STILL true', available);
// real input must STILL engage post-nav (the button is still mounted; pushState didn't re-render)
ref=await refOf();
const a2=await T('iris_act',{ref,action:'click'});
const a2=await T('iris_act',{ref,action:'click',args:{native:true}});
chk('after pushState: iris_act is STILL REAL (was synthetic before the fix)', a2.inputMode==='real', `inputMode=${a2.inputMode}`);
console.log(`\n${fail===0?'✅ BUG #1 FIXED':'❌ STILL BROKEN'} (${pass} passed, ${fail} failed)`);
await browser.close(); await server.close(); process.exit(fail===0?0:1);
2 changes: 2 additions & 0 deletions apps/next-smoke/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Optional browser/bridge pairing token. Must match IRIS_TOKEN.
NEXT_PUBLIC_IRIS_TOKEN=
7 changes: 6 additions & 1 deletion apps/next-smoke/app/iris-dev.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@ export function IrisDev() {
testids: ['ping-button', 'add-task', 'edit-field', 'show-toast'],
signals: ['field:committed'],
});
iris.connect({ session: 'next-smoke', present: true });
const token = process.env['NEXT_PUBLIC_IRIS_TOKEN'];
iris.connect({
session: 'next-smoke',
present: true,
...(typeof token === 'string' && token.length > 0 ? { token } : {}),
});
})();
}, []);
return null;
Expand Down
3 changes: 2 additions & 1 deletion docs/flows.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ iris_flow_replay({ flowName: "create-task" }) // re-resolve each anchor against
- `drift` — an anchor missed (a testid was renamed, or a signal never fired). The result is
**legible**: `{ step, anchor, drift: { reasonKind: "testid_not_found", nearest: "send-message" } }`
— never a blind failure. (This is the "whose fault is it" principle.)
- `error` — the flow file is missing/invalid; no steps ran.
- `error` — the flow file is missing/invalid, or a resolved action failed. Runtime failures include
the failed step and a top-level error envelope.

A testid-_preserving_ refactor (you moved markup but kept the testids) still replays green.

Expand Down
12 changes: 8 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,33 @@
"description": "Iris — eyes for coding agents. Observe and verify a running web app over MCP, no screenshots.",
"packageManager": "pnpm@10.33.2",
"engines": {
"node": ">=22"
"node": ">=22.12"
},
"scripts": {
"build": "turbo run build",
"dev": "turbo run dev",
"lint": "turbo run lint",
"typecheck": "turbo run typecheck",
"test": "turbo run test",
"test": "turbo run test:unit",
"test:unit": "turbo run test:unit",
"format": "prettier --write .",
"format:check": "prettier --check .",
"publish": "pnpm build && pnpm -r publish --access public --no-git-checks",
"publish:dry-run": "pnpm build && pnpm -r publish --access public --no-git-checks --dry-run"
},
"pnpm": {
"overrides": {
"esbuild@>=0.18.0 <0.28.1": "0.28.1",
"postcss@<8.5.10": "8.5.15"
},
"onlyBuiltDependencies": [
"esbuild"
]
},
"devDependencies": {
"@eslint/js": "^9.18.0",
"@types/node": "^22.10.0",
"@vitest/coverage-v8": "^4.1.8",
"@vitest/coverage-v8": "^3.2.6",
"eslint": "^9.18.0",
"eslint-plugin-react-hooks": "^5.1.0",
"jsdom": "^29.1.1",
Expand All @@ -36,7 +40,7 @@
"turbo": "^2.3.3",
"typescript": "^5.7.3",
"typescript-eslint": "^8.20.0",
"vitest": "^3.0.0",
"vitest": "^3.2.6",
"ws": "^8.21.0"
}
}
2 changes: 1 addition & 1 deletion packages/babel-plugin/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"build": "tsc -b",
"typecheck": "tsc -b",
"lint": "eslint src",
"test:unit": "vitest run --passWithNoTests",
"test:unit": "vitest run src --passWithNoTests",
"prepack": "tsc -b && find dist -name \"*.test.*\" -delete"
},
"peerDependencies": {
Expand Down
2 changes: 1 addition & 1 deletion packages/browser/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"build": "tsc -b",
"typecheck": "tsc -b",
"lint": "eslint src",
"test:unit": "vitest run --passWithNoTests",
"test:unit": "vitest run src --passWithNoTests",
"prepack": "tsc -b && find dist -name \"*.test.*\" -delete"
},
"dependencies": {
Expand Down
4 changes: 2 additions & 2 deletions packages/browser/src/actions/actions.effect.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -235,13 +235,13 @@ describe('action result: hover enter/leave warning (F3)', () => {
describe('action result: testid normalization (G6)', () => {
it('includes data-testid of the resolved element', async () => {
document.body.innerHTML = '<button data-testid="pay-btn">Pay</button>';
const r = await executeAction(refOf('button'), 'click');
const r = await executeAction(refOf('button'), 'click', { confirmDangerous: true });
expect(r.testid).toBe('pay-btn');
});

it('omits testid when the element has none', async () => {
document.body.innerHTML = '<button>Pay</button>';
const r = await executeAction(refOf('button'), 'click');
const r = await executeAction(refOf('button'), 'click', { confirmDangerous: true });
expect(r.testid).toBeUndefined();
});

Expand Down
30 changes: 30 additions & 0 deletions packages/browser/src/actions/actions.interactions.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,36 @@ describe('webmcp passthrough', () => {
expect(callTool).toHaveBeenCalledWith('search', { q: 'x' });
expect(result).toEqual({ called: 'search' });
});

it('blocks dangerous tools without explicit confirmation', async () => {
const callTool = vi.fn(() => Promise.resolve({ ok: true }));
(navigator as unknown as Record<string, unknown>)['modelContext'] = { callTool };
const reg = createCommandRegistry();
const handler = reg.get('act');
if (handler === undefined) throw new Error('no act handler');
await expect(
handler({ action: 'webmcp', args: { tool: 'delete_account', params: {} } }),
).rejects.toThrow(/confirmDangerous/);
await handler({
action: 'webmcp',
args: { tool: 'delete_account', params: {}, confirmDangerous: true },
});
expect(callTool).toHaveBeenCalledOnce();
});
});

describe('dangerous action confirmation', () => {
it('blocks a destructive click until explicitly confirmed', async () => {
document.body.innerHTML = '<button>Delete account</button>';
const button = document.querySelector('button') as HTMLButtonElement;
const ref = refs.refFor(button);
const clicked = vi.fn();
button.addEventListener('click', clicked);
await expect(executeAction(ref, 'click')).rejects.toThrow(/confirmDangerous/);
expect(clicked).not.toHaveBeenCalled();
await executeAction(ref, 'click', { confirmDangerous: true });
expect(clicked).toHaveBeenCalledOnce();
});
});

describe('dev overlay', () => {
Expand Down
Loading