diff --git a/change-logs/2026/06/27/fix-verify-task-create-persistence.md b/change-logs/2026/06/27/fix-verify-task-create-persistence.md new file mode 100644 index 00000000..171e6050 --- /dev/null +++ b/change-logs/2026/06/27/fix-verify-task-create-persistence.md @@ -0,0 +1 @@ +Task creation now verifies the write actually landed on disk before reporting success. Previously `dev3 task create` could print "Created task " (consuming a seq) for a task that was never persisted; it now fails loudly with a clear error instead of returning a ghost task, so the printed id is always immediately resolvable. diff --git a/decisions/082-verify-task-create-persistence.md b/decisions/082-verify-task-create-persistence.md new file mode 100644 index 00000000..368fa2c9 --- /dev/null +++ b/decisions/082-verify-task-create-persistence.md @@ -0,0 +1,37 @@ +# 082 — Verify task-create persistence before reporting success + +## Context +Agent vents reported that `dev3 task create` printed `Created task (seq N)` and exited 0, +but the task never appeared in `dev3 tasks list` / `dev3 task show` — the seq was consumed yet +the task was lost. An agent trusts that success line and tells the user "task created" when it +wasn't. + +## Investigation +The single-instance data layer is sound: `addTask` reads fresh under a cross-process +`mkdir` file lock (`file-lock.ts`), writes via `atomicWriteFile` (temp + `rename`), and +invalidates the read cache. No public `saveTasks(fullArray)` caller exists outside `data.ts`, +so there is no stale read-modify-write clobber within one process. The read cache is +stat-validated and adds always grow file size, so a stale cache cannot hide a freshly added +task. The reproducible permanent-loss paths are out of scope here: multiple app instances +clobbering the shared file (`wontfix` — niche dev-only setup) and macOS Full Disk Access loss +mid-write (separate FDA issue). What we *can* fix is the dishonest success. + +## Decision +`addTask` (`src/bun/data.ts`) now re-reads the tasks file fresh (`rawLoadTasks(..., { strict: +true })`, bypassing the cache) after the save and throws if the new task id is absent. The error +names the likely causes (FDA / multi-instance clobber) and the file path. Because the RPC layer +turns a thrown handler error into `{ ok: false, error }`, the CLI now exits non-zero with a clear +message instead of printing a false "Created task". This also guarantees the "id is immediately +resolvable" contract: if create returns, the task is confirmed on disk. + +## Risks +One extra fresh read per task creation (negligible — task creation is a rare, user/agent-driven +action and the file is small). The guard does not catch a *later* clobber by another instance +(that happens after `addTask` returns); it only guarantees the write landed at creation time. + +## Alternatives considered +- Verify in the RPC handler `createTask` instead of the data layer — rejected: would only cover + the create RPC path, not every `addTask` caller (UI, variants). +- Chase the multi-instance / FDA root cause — out of scope (wontfix / separate FDA track). +- Block/retry until queryable — unnecessary; the write is synchronous under the lock, so a + single read-back is authoritative. diff --git a/src/bun/__tests__/data-create-verify.test.ts b/src/bun/__tests__/data-create-verify.test.ts new file mode 100644 index 00000000..09d36ee6 --- /dev/null +++ b/src/bun/__tests__/data-create-verify.test.ts @@ -0,0 +1,97 @@ +import { afterAll, afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import type { Project, Task } from "../../shared/types"; + +// Mock node:fs/promises so we can simulate a *silent write drop*: the payload +// write "succeeds" (no throw) but the new task never actually lands on disk. +// This is the failure mode the vents describe — `task create` reports success +// (id + seq consumed) yet the task is never queryable. The real-world triggers +// are FDA/sandbox losing fs access mid-write or another instance clobbering the +// file; here we reproduce the observable symptom deterministically by writing an +// empty array instead of the requested payload. Only the main tasks.json payload +// write is intercepted; backups and every other fs op use the real impl. +let dropTasksWrite = false; +vi.mock("node:fs/promises", async () => { + const actual = await vi.importActual("node:fs/promises"); + // Matches the live tasks.json AND its `.tmp-` sibling (atomicWriteFile), + // but NOT the dated backup files under tasks-backups/. + const isTasksPayloadWrite = (p: string): boolean => !p.includes(".bak") && p.includes("tasks.json"); + return { + ...actual, + writeFile: vi.fn(async (path: any, data: any, opts?: any) => { + const p = String(path); + if (dropTasksWrite && isTasksPayloadWrite(p)) { + return actual.writeFile(p, "[]", opts); + } + return actual.writeFile(path, data, opts); + }), + }; +}); + +const tempHome = mkdtempSync(join(tmpdir(), "dev3-create-verify-")); +const dev3Home = join(tempHome, ".dev3.0"); +const originalHome = process.env.HOME; +const tasksDir = join(dev3Home, "data", "tmp-existing-project"); +const tasksFile = join(tasksDir, "tasks.json"); + +function makeProject(overrides?: Partial): Project { + return { + id: "proj-1", + name: "Existing Project", + path: "/tmp/existing-project", + setupScript: "", + setupScriptLaunchMode: "parallel", + devScript: "", + cleanupScript: "", + defaultBaseBranch: "main", + createdAt: "2026-04-15T00:00:00.000Z", + labels: [], + customColumns: [], + ...overrides, + }; +} + +describe("addTask verifies persistence before returning", () => { + beforeEach(() => { + dropTasksWrite = false; + vi.resetModules(); + process.env.HOME = tempHome; + rmSync(tempHome, { recursive: true, force: true }); + mkdirSync(dev3Home, { recursive: true }); + writeFileSync(join(dev3Home, "projects.json"), JSON.stringify([makeProject()], null, 2)); + }); + + afterEach(() => { + dropTasksWrite = false; + }); + + afterAll(() => { + process.env.HOME = originalHome; + rmSync(tempHome, { recursive: true, force: true }); + }); + + it("throws (instead of returning a ghost task) when the write is silently dropped", async () => { + const data = await import("../data"); + + dropTasksWrite = true; + await expect(data.addTask(makeProject(), "Ghost task", "todo")).rejects.toThrow(/persist/i); + dropTasksWrite = false; + + // The task must genuinely NOT be on disk — confirming the throw reflects a + // real persistence failure, not a false alarm. + const onDisk = JSON.parse(readFileSync(tasksFile, "utf8")) as Task[]; + expect(onDisk.find((t) => t.description === "Ghost task")).toBeUndefined(); + }); + + it("returns a task that is immediately readable from disk on a healthy write", async () => { + const data = await import("../data"); + + const task = await data.addTask(makeProject(), "Real task", "todo"); + + const onDisk = JSON.parse(readFileSync(tasksFile, "utf8")) as Task[]; + expect(onDisk.find((t) => t.id === task.id)).toBeDefined(); + expect(task.description).toBe("Real task"); + }); +}); diff --git a/src/bun/data.ts b/src/bun/data.ts index 62044695..0fb18038 100644 --- a/src/bun/data.ts +++ b/src/bun/data.ts @@ -788,6 +788,24 @@ export async function addTask( task.history = [{ at: now, title: getTaskTitle(task), overview: getTaskOverview(task), changed: "created" }]; tasks.push(task); await rawSaveTasks(project, tasks); + + // Verify the write actually landed before reporting success. atomicWriteFile + // can report success while the new content never reaches disk — e.g. macOS + // Full Disk Access / sandbox loss mid-write, or another running app instance + // clobbering the file. Without this guard the CLI prints "Created task " + // (consuming a seq) for a task that is never queryable, which an agent then + // trusts. Re-read fresh from disk (strict bypasses the cache) and fail loudly + // instead of returning a ghost task. See decision 082. + const persisted = await rawLoadTasks(project, { strict: true }); + if (!persisted.some((t) => t.id === task.id)) { + log.error("Task create verification failed — write did not persist", { taskId: task.id, seq: task.seq }); + throw new Error( + `Task ${task.id} failed to persist (verification read-back did not find it). ` + + `The write reported success but the task is not on disk — likely macOS Full Disk Access loss ` + + `or another running app instance clobbering ${tasksFile(project)}.`, + ); + } + log.info("Task created", { taskId: task.id, seq: task.seq, title }); return task; });