diff --git a/cmd/jcode/main.go b/cmd/jcode/main.go index e27c913..bba2fe1 100644 --- a/cmd/jcode/main.go +++ b/cmd/jcode/main.go @@ -35,6 +35,7 @@ func main() { command.NewMCPCmd(), command.NewACPCmd(), command.NewWebCmd(), + command.NewAutomationCmd(), command.NewVersionCmd(), command.NewDoctorCmd(), command.NewSessionsCmd(), diff --git a/design/automations-redesign.html b/design/automations-redesign.html new file mode 100644 index 0000000..9ac78b2 --- /dev/null +++ b/design/automations-redesign.html @@ -0,0 +1,1596 @@ + + + + + +jcode — Automations Redesign + + + + + + + + + + +
+
+
+

Automations — Redesign

+

+ A spec rendering of the AutomationsView page. The card is restructured + around when it runs next, runs become a status-coloured table, and the empty / + templates / editor states share one surface geometry. Tokens are verbatim from + tokens.css — toggle the theme to preview dark. +

+
+
+ + +
+
+ + +
+
+ 01 +

Page surface — full page at rest

+
+
+
+
+

Automations

+
+
+ + +
+ +
+
+
+
+

Use agents to handle recurring work on a cadence you choose.

+
+ +
+
+
+ Daily code review + Daily +
+ Ran ok +
+

Review open PRs on main, summarise risks, and post a thread to the team channel with suggested reviewers.

+
+ Schedule + Daily · 09:00 + + + + + + + Next run + in 6h 12m +
+
+ + +
+
+
+ Weekly dependency audit + Weekly +
+ Running +
+

Check go.sum for known CVEs, open issues for anything critical, and draft a bump PR for safe patches.

+
+ Schedule + Mon · 04:00 + + + + + + + Started + 2m ago +
+
+ + +
+
+
+ Triange stale issues + Manual +
+
+

Close issues with no activity for 90 days after leaving a comment with a reopen link.

+
+ Trigger + Run manually + + + + + + + Last run + 3d ago · ok +
+
+
+ + +
+

Recent runs

+
+ + +
+
+
+
+
+ Daily code review + + Jun 24, 09:00 + · + schedule + +
+ 42s +
+
+
+ Weekly dependency audit + + Jun 24, 04:00 + · + schedule + +
+ running… +
+
+
+ Triange stale issues + + Jun 21, 18:22 + · + manual + + approval_required: tool edit needs confirmation — manual runs with Ask mode wait for a human. +
+ 1m 03s +
+
+
+ Daily code review + + Jun 23, 09:00 + · + schedule + +
+ 38s +
+
+
+
+
+
+

+ Anatomy change. The current card puts name, status, schedule, and four icon + buttons all in one crowded foot row. The redesign separates them into a status + strip + title/cadence header, a prompt, and a labelled Schedule / Next run + meta grid. "Next run" is the one number a scheduled automation exists to surface — it + now carries the brand accent so the eye finds it first. The colour strip on the card's + leading edge matches the strip on each run row, so state reads the same in both places. +

+
+ + +
+
+ 02 +

Card states — the four real statuses

+
+
+
+
+
+
IdleDaily
+ Ran ok +
+

Healthy, last run succeeded, waiting for the next window.

+
+ ScheduleDaily · 09:00 + + + + + + + Next runin 6h 12m +
+
+ +
+
+
RunningWeekly
+ Running +
+

Active right now — run button pulses and disables, strip turns brand orange.

+
+ ScheduleMon · 04:00 + + + + + + + Started2m ago +
+
+ +
+
+
FailedHourly
+ Failed +
+

Last run errored. The strip + chip turn destructive red — the schedule still shows, so you know it'll retry next window.

+
+ ScheduleHourly · :00 + + + + + + + Next runin 41m +
+
+ +
+
+
PausedDaily
+
+

Disabled via the switch — whole card dims, the grey strip signals "not armed", no next-run line.

+
+ ScheduleDaily · 09:00 + + + + + + + StatusPaused +
+
+
+
+

+ State = strip colour + chip. Today a success/error is just a 3.5px icon buried + in the foot row — easy to miss, and indistinguishable from decoration. The redesign + promotes it to a labelled chip (Ran ok / Running / Failed) and mirrors it on the + left edge. Paused drops the chip entirely and dims the card, since "paused" is the + absence of a run state rather than one. +

+
+ + +
+
+ 03 +

Empty state — first-run

+
+
+
+
+

Automations

+
+
+ + +
+ +
+
+
+
+
+
+
No automations yet
+
Use agents to handle recurring work on a cadence you choose — code reviews, dependency audits, issue triage.
+
+ + +
+
+
+
+
+
+
+ + +
+
+ 04 +

Templates — gallery

+
+
+
+
+

Automations

+
+
+ + +
+ +
+
+
+
+

Start from a template — pick a project and confirm.

+
+ + + + +
+
+
+
+
+

+ Template cards gain a small mode + trigger foot line (Autopilot/Ask/Plan · + schedule/manual) so you know what you're committing to before you click — the current + card only shows name + badge. +

+
+ + +
+
+ 05 +

Editor dialog — create / edit

+
+
+
+ +
+
+
+ + +
+
+ 06 +

Run detail — a finished run, replayed

+
+
+ +
+
+ +
+ Daily code review + Completed +
+
+ triggerschedule + · + windowDaily 09:00 + · + ranJun 24, 09:00 + · + duration42s + · + tools6 calls + · + tokens4,812 +
+
+ +
+
+ +
+
+
+
Automation prompt
+
Review open PRs on main, summarise risks, and post a thread to the team channel with suggested reviewers.
+
+
+ + +
+
+ + read + internal/automation/scheduler.go +
+
+ + +
+
+ + execute + git log --oneline -5 origin/main +
+
+
$ git log --oneline -5 origin/main +950efe8 feat(web): sidebar nav redesign + Channels page +099b133 fix(automations): keep the left sidebar visible +d1f61c0 fix(automations): align overlay header +5073afd fix(automations): resolve CI lint +8f08269 feat(automations): scheduled & manual tasks
+
+
+ + +
+
+ + edit + internal/web/automation_api.go + +12/−3 +
+
+ + +
+
+ + jcode +
+
+

Reviewed the 3 open PRs on main. Two are low-risk doc/CI tweaks; #412 changes the scheduler's locking and needs a closer look.

+

Summary thread posted to #eng-reviews with suggested reviewers: @ada for #412, @bo for the rest.

+
+
42s · 4,812 tokens
+
+
+
+ +
+
+ Completed in 42s · 6 tool calls. +
+ + +
+
+
+
+
+

+ The run prompt is not a user message. The single piece the chat canvas lacks is + the trigger itself — here it's a brand-tinted block with an AUTOMATION PROMPT + eyebrow + bolt icon, so the timeline opens with "this came from the schedule at 09:00", + not an anonymous U avatar. Everything below reuses ChatMessage + + ToolCallCard verbatim — the run's session_id is just a session + replayed via api.session(), so no new renderer is needed. The header packs + the six facts you'd otherwise hunt for (trigger · window · when · duration · tool count · + tokens) into one dense line, and the footer is a Run again action, not a composer. +

+
+ + +
+
+ 07 +

Run detail — a live run, mid-execution

+
+
+
+
+ +
+ Weekly dependency audit + Running +
+
+ triggerschedule + · + windowMon 04:00 + · + started2m ago + · + tools4 calls + · + tokens3,107 +
+
+ +
+
+
+
+
+
Automation prompt
+
Check go.sum for known CVEs, open issues for anything critical, and draft a bump PR for safe patches.
+
+
+ + +
+
+ + execute + govulncheck ./… +
+
+
$ govulncheck ./… +=== Symbol Results === +Vulnerability #1: GO-2024-2687 (golang.org/x/crypto) + Found in: golang.org/x/crypto@v0.21.0 + Fixed in: golang.org/x/crypto@v0.22.0
+
+
+ + +
+
+ + grep + scanning go.sum references… +
+
+ + +
+ + Thinking… + 2m 04s +
+
+
+ +
+
+ Running for 2m 04s · 4 tool calls so far. +
+ +
+
+
+
+
+

+ Same surface, live. A running automation lands here the same way — clicked from its + card (status chip Running) or from the running… run row. The differences are + minimal: the active tool's name shimmers like the chat canvas, the Thinking… + footer with a live timer trails the timeline, and the footer swaps Run again for a + Stop run button. Because the session is streamed over the same WebSocket events the + chat page already handles (onAgentText, onToolCall, + onToolResult), wiring this up is pointing the existing handlers at the run's + session_id rather than building new plumbing. +

+
+
+ + + + diff --git a/design/nav-actions-redesign.html b/design/nav-actions-redesign.html new file mode 100644 index 0000000..fd60389 --- /dev/null +++ b/design/nav-actions-redesign.html @@ -0,0 +1,510 @@ + + + + + +jcode + + + + + + + + + + +
+ +
+
+ jcode +
+ +
+ + + + + +
+ + +
+ + +
+
+
+ + Channels + +

Approve and monitor from your phone

+

+ Link WeChat once. jcode forwards every approval request and + task-completion notice straight to your chat — so a long-running + agent never waits on you being at the desk. +

+ +
+
+ +
+

One-tap approvals

+

Approve or deny a tool call right from the WeChat message — no need to open the app.

+
+
+
+ +
+

Done notifications

+

Get pinged the moment a scheduled automation or background task finishes.

+
+
+
+ +
+

Scan to connect

+

Pair in seconds with a QR code. Disconnect any time from Settings → Channels.

+
+
+
+ +
+ + or press⌘⇧C +
+
+
+ + +
+
+
+
+
+ 9:41 + +
+ + +
+
+ j + jcode + now +
+
+ Approval needed
+ Run git push origin? +
+
+ + +
+
+ + +
+
jcode · just now
+
✓ Nightly automations finished
+ 3 tasks · 0 failed +
+
+
+
+ +
+ + +
+ +

Start a new task

+

Begin a fresh conversation. Your previous sessions stay in the workspace tree.

+
+ + +
+ +

Automations

+

Scheduled and manual agent tasks. Runs and history live here.

+
+ +
+ +
+
+ + + + + diff --git a/design/sidebar-redesign.html b/design/sidebar-redesign.html new file mode 100644 index 0000000..2b944b3 --- /dev/null +++ b/design/sidebar-redesign.html @@ -0,0 +1,1179 @@ + + + + + +jcode — Sidebar Redesign + + + + + + + + + + + +
+
+
+

Sidebar — redesign

+

+ The left rail of jcode: a workspace tree of projects and + their conversations. This doc captures it as it is — every state it can + be in — in the shell's own tokens, so spacing, type, and color read as + one system with the canvas it sits next to. +

+
+
+ + +
+
+ + +
+
+ 01 +

At rest — group by project

+
+ +
+
+ + + +
+
Anatomy, top → bottom
+

+ Header — a paired set of bordered-surface buttons. "New task" + leads with the action; "Automations" is the quieter page-switcher + (its .active wash mirrors the active-project row). +

+ Tree — collapsible folder rows, each a chevron + folder + glyph + name + count. A collapsed folder with live work shows a + breathing ring; the + appears on hover. Task rows hang + off a left rail: a status mark, the title, a mono timestamp, and a + that surfaces on hover. +

+ Footer — theme + settings, right-aligned on a hairline. +

+
+
+
+
+ + +
+
+ 02 +

Task row states

+
+ +
+

+ One row shape, six signals. The leftmost mark carries the state so a + scan down the rail reads status before title. +

+ +
+
+ + Running — breathing ring + accent rail + running +
+
+ + Active — neutral wash + accent rail + now +
+
+ + Unread — filled dot + 2h +
+
+ + + Pinned — bookmark glyph + 1d +
+
+ + Archived — dimmed to 55% + 5d +
+
+ + +
+
+
+
+ + +
+
+ 03 +

Group by date

+
+ +
+
+ + +

+ A flat chronological feed: no folder affordances, no indent rail. + Each row carries a quiet project tag (right-truncated) since + tasks no longer hang under a folder. Buckets are calendar-day based + (Today / Yesterday) then rolling windows (This week → Older). +

+
+
+
+ + +
+
+ 04 +

Filter menu

+
+ +
+

+ A single trigger in the tree head. Dirty state shows a dot so an + applied filter reads without opening. The panel drills in-place (no + side-flyout) to stay within the narrow rail. +

+ +
+ +
+
+
+ Workspace +
+ +
+
+
default
+
+
+
+ Workspace +
+ +
+
+
dirty (filters applied)
+
+
+ + +
+
+ + + +
+ + +
+ +
+
root list
+
+ + +
+
+ +
+
+ + + + +
+
+
drill-in (with check)
+
+
+
+
+ + +
+
+ 05 +

Task action menu (⋯)

+
+ +
+

+ Surfaces on row hover. Near the bottom of the rail the panel + flips up so it never clips against the window edge. A divider + separates the safe metadata actions from destructive delete. +

+ +
+ +
+ +
open (downward) · Pin hovered
+
+ + +
+ +
flip-up · near bottom · Delete hovered
+
+
+
+
+ + +
+
+ 06 +

Footer & Tauri titlebar

+
+ +
+
+ +
+
+ +
footer — right-aligned on a hairline
+
+ + +
+
+
+
+
+
+ +
+
~ canvas
+
+
+
+
+ Tauri (macOS): header padding-top: 12px sits under the + 28px title bar. "Automations" shown .active. +
+
+
+
+
+ +
+ + + + diff --git a/docs/automations-prd.md b/docs/automations-prd.md new file mode 100644 index 0000000..da658a3 --- /dev/null +++ b/docs/automations-prd.md @@ -0,0 +1,342 @@ +# jcode Automations(自动化)PRD + +> 状态:草案 **v2**(2026-06-23,关键决策已定,待实现) +> 对标形态:Anthropic Claude Code 的 **Automations** —— 用 agent 处理「按节奏 / 手动触发」的重复性工作。 +> 关联:[[web 任务化架构]](`docs/web-task-architecture.md`)、Goal(`docs/goal.md`)、Mode 选择器(Ask/Plan/Autopilot)、Skills、MCP。 +> +> v1→v2 变更:①去掉事件/GitHub 触发(先不碰 gh)②去掉 Effort/推理力度 ③新增 agent 可调用的 `automation_create` 工具 + 渲染卡片 ④调度器定为「文件锁选主」⑤存储定为「flock 写锁 + 易变态分离」⑥运行时不加护栏(沿用既有哲学)。 + +--- + +## 1. 一句话定义与背景 + +**Automations = 让一条 jcode 任务(session)按定时 / 手动自动跑起来。** + +它**不是新引擎**:一次运行 = 一条普通 jcode session(带转写、可 resume、可看 diff、可通知),只多打 `automation_id` 标签。新增的只有「定义 + 触发器 + 调度」三件,外加一个让 **agent 自己也能创建自动化** 的工具+卡片。 + +底座已就绪:任务型 web + 并行 Engine(每任务一份)+ 多项目 + 跨前端共核(TUI/Web/ACP/CLI)+ Goal + Mode + Skills + 远程工作区。 + +参考截图三屏:自动化列表页、新建自动化弹窗、模板页(逐元素映射见 §4)。 + +--- + +## 2. 目标 / 非目标 + +### 目标 +- 创建、编辑、启停、手动运行、查看历史一条自动化。 +- 触发:**定时**(Hourly/Daily/Weekly + 时分)+ **手动**(Run now)。 +- **全前端一等公民**:定义/调度/历史下沉核心层(`internal/automation` + `~/.jcode/`);Web/桌面完整 UI;CLI `jcode automation …`;TUI `/automation`;ACP 最小能力。 +- **agent 可创建**:新增 `automation_create` 工具,agent 能从自然语言生成自动化草稿,**经用户在卡片上确认后**落库。 +- **模板** + **技能转自动化**。 +- 运行结束有通知(复用现有通道)。 + +### 非目标(明确不做) +- **事件/GitHub 触发**:先不碰 gh,整体推迟(不在本 PRD 实现范围)。 +- **真·云端执行**:jcode 是本地工具。`RunInCloud` 字段保留(恒 false)但 v1 **不渲染 Cloud tab / 不放死开关**,改用「coming soon」tooltip。 +- **常驻守护进程**:调度器跑在 `jcode web` 进程内(App 开着才触发)。`jcode daemon` + launchd/systemd 是后续承接「App 关闭也跑」的路径,不在 v1。 +- **运行时安全护栏**:不做全局总开关 / 单次 MaxTurns·超时 / 强制审计通知。沿用既定哲学「Autopilot 接受全部风险、不设护栏」;安全把关只发生在**创建处**(§8 human-in-loop),armed 之后全信任。 +- **推理力度(Effort)选择**:去掉。 +- 并发上限 / 排队(沿用「本地单人工具,不设上限、不排队」);多用户/团队共享;自动化间 DAG 编排。 + +--- + +## 3. 两个硬约束(研究证实,约束全局设计) + +1. **无人值守的定时运行在结构上只能是 Autopilot/full_access。** 没有 WS 客户端连着时,Ask/Plan 的审批请求会永远阻塞(`internal/handler/web.go:410` 的 `RequestApproval` 仅靠 ctx 取消解开;full_access 在 `internal/runner/approval.go:253` 直接 auto-approve)。所以「定时跑」恒等于「自动批准一切的 full_access 跑」——这是结构所迫,不是偏好。 +2. **桌面 App 一关,web 进程即被杀**(`desktop/src-tauri/src/main.rs:178` 显式 `child.kill()`,每次开 App 重起 sidecar)。任何进程内调度器在 App 关闭后都不跑;要「关 App/关机也跑」只有守护进程一条路(v2 之后)。 + +--- + +## 4. 截图逐元素 → jcode 映射 + +### 屏 A:新建自动化弹窗 +| 截图元素 | jcode 映射 | 备注 | +|---|---|---| +| Name | `Automation.Name` | — | +| Trigger(Daily 下拉) | `Trigger.Type=schedule` + `Cadence`(hourly/daily/weekly) | — | +| Hours / Minute | `Trigger.Hour/Minute`(weekly 再带 `Weekday`) | 本地时区 | +| Run in the cloud(开关) | `RunInCloud`(恒 false) | v1 不放死开关,改 tooltip「coming soon」 | +| Prompt(`Type / for skills`) | `Automation.Prompt` + `/` 唤起技能 | 复用 `GET /api/slash-commands` 补全 | +| Autopilot(左下) | `Automation.Mode`(Ask/Plan/Autopilot) | **schedule 触发强制 Autopilot**(见 §3、§7.4) | +| Select project | `Automation.ProjectPath` | **必填**;空 = 无人值守不支持 → skip+停用(§7.5) | +| Claude Sonnet 4.6 | `Provider`/`Model` | 留空=全局默认 | +| ~~High(推理力度)~~ | **去掉** | — | +| 「Without a project … quick chat」 | `ProjectPath==""` | jcode **不支持**无人值守跑(§7.5),与截图分歧 | +| Cancel / Create / Create and run | `POST /api/automations`(可带 `run_now`) | — | + +### 屏 B:自动化列表页 +| 截图元素 | jcode 映射 | +|---|---| +| 左侧导航「Automations」 | Sidebar 新增一级入口 | +| Tabs:All / Local / ~~Cloud~~ | v1 **砍掉 Cloud tab**(保留字段,留待云端) | +| Your automations 卡片(名/节奏徽标/prompt 预览/最近运行/▶) | `GET /api/automations`;▶ = `POST …/{id}/run` | +| Recent runs(按日期分组、状态、时间戳) | `AutomationID != ""` 的 session 子集 | +| 搜索框 | 前端过滤 | + +### 屏 C:模板页 +| 截图元素 | jcode 映射 | +|---|---| +| 6 张模板卡(带 Daily/Weekly/Manual 徽标) | 内置模板(embed),点卡→预填新建弹窗 | +| Skills 区「Turn an existing agent skill into an automation」 | 列 `GET /api/skills`,选一个→预填 `prompt=/` | + +--- + +## 5. 已定决策(汇总) + +| # | 决策 | 来源 | +|---|---|---| +| D1 | 执行**仅本地**;`RunInCloud` 留字段、UI 砍死 tab/死开关 | owner | +| D2 | 触发 = **定时 + 手动**;事件/gh 推迟 | owner | +| D3 | **全前端一等公民**:核心层承载定义/调度/历史 | owner | +| D4 | 调度器 = **文件锁选主**(`~/.jcode/automation-scheduler.lock`);并把这把锁复用为存储写锁 | owner | +| D5 | `automation_create` 工具 = **human-in-the-loop**:走 `ask_user` 式阻塞回路,用户在卡片确认才落库 | owner | +| D6 | **运行时不加护栏**(无总开关/无单次上限/无强制审计) | owner | +| D7 | 存储 = **flock 写锁 + 易变调度态与用户定义分文件** | owner | +| D8 | 去掉 Effort | owner | +| D9 | schedule 触发**强制 Autopilot**(Ask/Plan 会 hang,§3) | 研究结论 | + +--- + +## 6. 核心概念与数据模型 + +运行历史复用既有 Task/Session,新增模型只有「定义」「易变态」「模板」。**用户定义与高频调度态分两个文件**(D7:避免调度器的频繁写与人工编辑互相覆盖)。 + +```go +// internal/automation/types.go(新 core 叶子包,仿 internal/mode) + +// —— 用户定义,存 ~/.jcode/automations.json(人工低频写)—— +type Automation struct { + ID string `json:"id"` + Name string `json:"name"` + Prompt string `json:"prompt"` + Trigger Trigger `json:"trigger"` + ProjectPath string `json:"project_path"` // 必填本地路径;空 → 禁止 headless 跑(skip+停用,§7.5) + Mode string `json:"mode"` // approval|plan|full_access;schedule 触发强制 full_access + Provider string `json:"provider,omitempty"` + Model string `json:"model,omitempty"` + RunInCloud bool `json:"run_in_cloud"` // v1 恒 false + Enabled bool `json:"enabled"` + Source string `json:"source"` // manual|template:|skill:|agent + CreatedAt string `json:"created_at"` + UpdatedAt string `json:"updated_at"` +} + +type Trigger struct { + Type string `json:"type"` // "schedule" | "manual" + Cadence string `json:"cadence,omitempty"` // "hourly" | "daily" | "weekly" + Hour int `json:"hour,omitempty"` // 0-23 本地 + Minute int `json:"minute,omitempty"` // 0-59 + Weekday int `json:"weekday,omitempty"` // 0=Sun..6=Sat +} + +// —— 易变调度态,存 ~/.jcode/automation-state.json(调度器高频写)—— +type RunState struct { + LastRunAt string `json:"last_run_at,omitempty"` + LastStatus string `json:"last_status,omitempty"` // success|error|running + LastSessionID string `json:"last_session_id,omitempty"` + NextRunAt string `json:"next_run_at,omitempty"` // 纯函数算出,落盘防漂移 + LastFiredSlot string `json:"last_fired_slot,omitempty"` // date+H+M 去重键(防 DST 回拨重跑) +} + +// —— 运行历史:给 session.SessionMeta 加两字段(不另建存储)—— +// AutomationID string // 关联的自动化 +// TriggerKind string // scheduled | manual +// 「Recent runs」= ListAllSessions() 里 AutomationID != "" 的子集; +// 普通任务列表反过来排除 AutomationID != ""(§7.6)。 +``` + +**模板**(内置 embed,仿内置技能): + +```go +type Template struct { + ID, Name, Description, Badge string // Badge: Daily|Weekly|Manual(展示) + Prompt string + Trigger Trigger + SuggestMode string +} +// 初始 6 个对齐截图:issue-triage / changelog-draft / repo-audit / +// perf-improvements / a11y-audit / cost-tips +``` + +**单一校验**(仿 `ValidateGoalObjective`,`internal/tools/goal.go:308` 那句"every entry point 共用一个校验"):`automation.ValidateAutomation(a)` 是**所有创建路径(弹窗 HTTP / agent 工具 / CLI)唯一的校验入口**。 + +--- + +## 7. 功能需求 + +### 7.1 自动化列表页 +卡片(名/节奏徽标/prompt 预览/最近运行+状态/▶);Tabs **All / Local**(无 Cloud);Recent runs 按日期分组、点进=打开该 session 转写;顶部「New automation」「Browse templates」+ 搜索。 + +### 7.2 新建/编辑弹窗 +字段:Name、Trigger(节奏+时分)、Prompt(`/` 技能补全)、Mode、Project(**必填**)、Model。`RunInCloud` 开关→tooltip「coming soon」。「Create」保存;「Create and run」保存并立即手动跑一次。**无项目不允许保存**(提示需选项目,§7.5)。**Trigger=schedule 时 Mode 锁/警示为 Autopilot**(§3)。删除/启停走卡片右键。 + +### 7.3 触发类型 +- **定时**:hourly/daily/weekly + 时分,本地时区。`NextRunAt` 由**纯函数** `ComputeNextRun(now, trigger, tz)` 每次触发后重算并落盘(可单测,避开沙箱时钟限制)。错过窗口(笔记本休眠等)**默认跳过不补跑**。DST「回拨」靠 `LastFiredSlot`(date+H+M)去重防双触发。 +- **手动**:列表 ▶ / CLI / TUI `run`;不经调度器锁,任意前端进程直接 `StartRun`。 + +### 7.4 运行语义(关键) +- 一次运行 = 调 `internal/web` 的 `buildLocalEngine` 工厂建一条 Engine(按 `pwd/mode/provider/model`)→ `submitMessage(prompt)` 跑到结束(后台 goroutine,落 session JSONL,与用户手点任务同路径)→ 打 `automation_id`/`trigger_kind` 标签。 +- **schedule 触发恒 full_access**(§3 硬约束)。manual 触发若有客户端连着,可用 Ask/Plan(用户在场能答审批)。 +- **不加运行时护栏**(D6):不设单次 MaxTurns/超时、无全局总开关。仅有的边界是既有 runner 轮次上限。 +- 完成信号:`NotifyingHandler.SetDoneNotifier`(`internal/handler/notifying.go:49`);完成发普通通知(native/浏览器/WeChat/BLE),**非强制审计**。 +- 触发前**预检 `ProjectPath`**:路径缺失(被删/移走/未挂载)→ 跳过本次、`LastStatus=error`+原因、发失败通知;连续 N 次缺失→自动 `Enabled=false` 止损(防每晚重复空跑)。 + +### 7.5 无项目自动化:禁止 headless 跑(skip + stop) +`ProjectPath==""` 时 headless 没有可继承 pwd(`activeEngine().pwd` 回退为空,`server.go:1020`),full_access 会落到启动 cwd 跑 `edit/rm`(机制确认:`env.go:98/215` 空 pwd→`cmd.Dir` 未设→launch cwd,逃逸守护失效)——**风险不可接受**。**决策(owner):不做 scratch 兜底**,改为 **`ProjectPath` 为空/缺失 → 跳过本次 run + 停用该自动化(disable)+ 通知**。`ValidateAutomation` 对 schedule 触发**要求非空本地 ProjectPath**(与 §10.4 远程路径拒绝合并);弹窗 Project 必填、空不让存。截图的"无项目=quick chat"在 jcode 无人值守场景**不支持**。 + +### 7.6 运行历史不污染主列表 +`ListAllSessions()`(`internal/session/session.go:873`)是主侧栏与自动化页共用源。**一个谓词两处用**:主任务列表**排除** `AutomationID!=""`,自动化页**只收** `AutomationID!=""`。保留策略:每条自动化只留最近 N 条运行(独立于用户会话清理)。 + +### 7.7 模板 & 技能转自动化 +模板页 6 卡,点卡→预填弹窗(含默认节奏/建议模式)。Skills 区列 `GET /api/skills`(可搜),选一个→预填 `Prompt=/` + 默认 daily + Autopilot。 + +--- + +## 8. agent_create 工具 + 渲染卡片(human-in-the-loop) + +让 agent 从自然语言创建自动化("以后每天早上帮我跑测试并总结失败项")。**安全闸门在创建处**:工具不直接落库,走 `ask_user` 式阻塞回路,用户在卡片上确认/编辑后才提交(D5)——挡住 prompt 注入静默造一条"每天自动批准"的后门。 + +**机制**(照搬 `ask_user` 的 请求→卡片→resolve 阻塞回路,非 goal 那种被动 banner): +1. agent 调 `automation_create`(参数=解析出的 name/prompt/trigger/project/mode)。 +2. 工具 handler 调 `WebHandler.RequestAutomation(ctx, draft)` → emit `automation_request`(带唯一 id)→ **阻塞在 channel**(仿 `RequestAskUser`,`internal/handler/web.go:283-318/515-532`)。 +3. WS → `AutomationCard.vue` 渲染草稿预览 + Confirm/Edit/Cancel。 +4. 用户 Confirm → `POST /api/automations`(带 request id)→ `ResolveAutomation(id, draft')` → 经**唯一** `automation.Store.Create`+`ValidateAutomation` 落库 → 解开 channel,工具返回「已创建」。Cancel → 工具返回「用户取消」。 +5. 弹窗路径与工具路径**共用同一个 `Store.Create`**(仿 goal 单校验),唯一差别是"谁点的 Create"。 + +**新增/改动文件**(研究已勘定): +- 新增 `internal/tools/automation.go`(`automation_create` 工具 + 草稿类型)。注意:工具落库的是 `internal/automation.Store`,不在 tools 包重造存储——tools 包仅持一个对 Store 的引用(挂在 `tools.Env`,仿 `Env.GoalStore`,`internal/tools/env.go`)。 +- 注册:两处 `buildAllTools` 各加一行 —— `internal/command/web.go:286-313`、`internal/command/interactive.go:82-110`(全前端自动获得)。 +- `internal/handler/web.go`:加 `WebAutomationRequestData` + `RequestAutomation`/`ResolveAutomation`(镜像 ask_user)。 +- `internal/web/server.go`:`POST /api/automations` 兼作 resolve(带 request id 时)。 +- 前端:新增 `web/src/components/AutomationCard.vue`(仿 `AskUserCard.vue`);`ws.ts` 加 `automation_request` 派发;`stores/chat.ts` 加 `onAutomationRequest`/`submitAutomation`;`types/api.ts`、`api.ts` 加类型与端点。 + +--- + +## 9. 技术设计 + +### 9.1 新 core 包 `internal/automation`(叶子包) +`types.go`(§6)、`store.go`(读写两个 json + flock + version 迁移)、`scheduler.go`(选主+循环)、`validate.go`(`ValidateAutomation`)、`templates/`(embed)。不依赖 web/tui;各前端注入「如何跑一条 run」的 `Runner` 回调(避免 import 环,与 mode/goal 同模式)。 + +### 9.2 存储(D7) +- `~/.jcode/automations.json`(定义,低频人工写)+ `~/.jcode/automation-state.json`(易变态,调度器高频写)。 +- **跨进程写锁**:所有写者(任意进程)先抢 `~/.jcode/automation.lock`(flock/`syscall.Flock`)再 read-modify-write。这把锁与调度选主锁**复用同一基础设施**。理由:`session.json` 只有进程内 `sync.Mutex`(`session.go:590-599`,注释自承「lost updates, last one wins」),`config.json` 是裸 `os.WriteFile`(`config.go:408-427`)——都不能跨进程;自动化是跨前端多进程并写,必须 flock。 +- 不塞进 `config.json`(整文件覆盖写,并发更差)。 + +### 9.3 调度器:文件锁选主(D4) +- `~/.jcode/automation-scheduler.lock`(flock)。任意常驻进程启动时尝试抢锁:**赢家**跑定时循环;**输家**仅管理定义+手动跑(手动不经锁,直接 `StartRun`)。owner 优先级:未来 `jcode daemon` > `jcode web`。v1 实际就是 `jcode web` 持锁。 +- 启动清理 stale lock(崩溃残留);`ctx.Done()`/退出释放。 +- 循环:每 ~30s tick,扫 enabled 且 schedule 的自动化,`NextRunAt<=now` 且 `slot!=LastFiredSlot` → 预检 pwd → `StartRun(scheduled)` → 重算落盘 `NextRunAt`+`LastFiredSlot`。 +- **不排队、不限并发**(沿用既定原则);同刻多触发=多 Engine 同起,惊群风险**接受**(与 web 任务化"不设上限"一致)。 + +### 9.4 Runner(复用 Engine,headless) +`internal/automation` 定义 `Runner` 接口,`internal/web` 实现: +``` +StartRun(a Automation, kind) (sessionID, done<-chan error): + if engineCount() >= cap → 记 LastStatus=error + 通知, 直接返回 // 无 idle-evict, §10.1 C1 + eng := buildLocalEngine("", resolvePwd(a), "full_access") // schedule 恒 full_access;resolvePwd 见 §7.5 + 设 provider/model;工具集剔除 ask_user + automation_create // headless 不可交互/不可再造, §10.1 C2 + SetDoneNotifier(→ 写 terminal status + deleteEngine(eng) + 通知 → done) + sessionID := submitMessage(eng, a.Prompt, ...) // 后台跑,立即返回 + // 落 JSONL 不依赖 WS 客户端(事件泵无背压,server.go:303-318) + stampSessionMeta(sessionID, a.ID, kind) // 写 AutomationID/TriggerKind +``` +关键事实/纠正(研究证实):headless 跑不需要 WS 订阅者,recorder 同步落盘;完成靠 `SetDoneNotifier`/`OnAgentDone`。**纠正两处原假设(见 §10.1)**:①Engine **没有 idle-evict**(`internal/web` 无 reaper),cap=64 命中即 `errTooManyTasks` 硬失败 → 定时 run **必须在完成回调里 `deleteEngine` 自销**,否则累积泄漏直至静默失败;②full_access 只旁路审批,**`ask_user`/`automation_create` 仍阻塞**(`handler/web.go:490` 无模式旁路)→ headless 下这类工具调用必须 auto-fail,否则 run 永久 hang。 + +### 9.5 HTTP API(`internal/web/server.go` 新增) +- `GET/POST /api/automations`(POST 带 `run_now` = Create and run;带 request id = resolve agent 草稿) +- `GET/PUT/DELETE /api/automations/{id}` +- `POST /api/automations/{id}/run`(手动,返回 session_id) +- `GET /api/automations/runs[?automation_id=]`(= `ListAllSessions` 过滤) +- `GET /api/automation-templates` +- 复用 `GET /api/skills`、`GET /api/slash-commands` + +### 9.6 前端 +新增 `views/AutomationsView.vue`(列表:All/Local + Your automations + Recent runs + 搜索)、`AutomationEditorDialog.vue`(弹窗,复用 mode/model/WorkspacePicker 子组件 + cloud tooltip)、`AutomationTemplatesView.vue`、`AutomationCard.vue`(§8)。Sidebar 加入口;`stores/automation.ts`。主任务列表加 `AutomationID` 排除谓词(§7.6)。 + +### 9.7 CLI / TUI / ACP(全前端一等公民) +- CLI:`jcode automation list|show|create|run|enable|disable|delete`、`templates`。 +- TUI:`/automation`(list/启停/手动 run;不在 TUI 内长跑调度)。 +- ACP:暴露最小能力(list + run)。 +- 三者读写同一 `automation.Store`(经 flock),定义全局一致。 + +### 9.8 通知 +复用 `NotifyingHandler.SetDoneNotifier`(native/浏览器/WeChat/BLE)。无人值守→运行结束默认发通知(成功/失败均发);非强制审计、无附加要求(D6)。 + +--- + +## 10. 边界条件与可靠性(reliability) + +> 来源:可靠性/SRE 视角穷举(研究 wf)。戳破了原 PRD 两个**错误假设**(C1/C2),并补全审计与关闭语义。 + +### 10.1 两处错误假设的纠正(最高优先) +- **C1 · Engine 无 idle-evict、cap 64 硬失败**(原 §9.4 写错):`internal/web` 无任何时间 reaper(`engine.go:263/277`,`registerEngine` 命中即 `errTooManyTasks`,不腾位)。→ 定时 run 必须**完成即 `deleteEngine` 自销**(throwaway engine,非用户 tab);cap 命中→记 failed run + 通知,别静默丢。 +- **C2 · `ask_user`/`automation_create` headless 照样阻塞**:full_access 只旁路审批(`approval.go:253`),`RequestAskUser`(`handler/web.go:490`)**无模式旁路**会永久 hang。**决策(owner):自动化 run 的工具集直接 exclude `ask_user` + `automation_create`**(不是运行时 auto-fail,而是 `buildAllTools` 按 run 类型剔除)——agent 根本拿不到这两个工具,既不会 hang 也不会"自动化造自动化"。 + +### 10.2 审计日志(回答"我有 audit log 吗") +- **已有·transcript 级(够用)**:每条 run = 一份 session JSONL,逐轮记 user/assistant + tool call↔result(按 toolCallID 配对、带 `error` 字段、毫秒时间戳、append-only 同步落盘)。"这条自动化做了什么"可完整追溯。 +- **缺·run-outcome 级(必补)**:`SessionMeta.Status` 只有 idle/running 二态,**没有 success/error 终态、没有 EndTime、没有 error reason 落盘**(`OnAgentDone(err)` 的 err 只进瞬时 WS 事件,不落盘)→ **撑不起截图的 Success/Failed 过滤**。补:`SessionMeta` 加 `EndTime/TerminalStatus/ErrorReason`(+ `AutomationID/TriggerKind`),在 `OnAgentDone` 里 `RecordSessionEnd` 落盘。可选 `~/.jcode/automation-runs.jsonl` append-only 审计(仿 `usage/events.jsonl`,跨进程安全、易过滤)。 + +### 10.3 关闭/排空(回答"关闭能确认 task 都结束吗") +- **今天不能,且有 bug**:`jcode web` 收 SIGINT 走 `http.Server.Shutdown(context.Background())` **无超时**、Engine teardown 只等 **1s** 即关 recorder(>1s 的 run 被半切、可能 mid-write);桌面端 `child.kill()`=SIGTERM 但 Tauri **不等**;web/desktop **无"N 个任务在跑,确认退出?"对话框**(只有 TUI `pickers.go` 有)。 +- **stale "running" bug**:进程被切后 `SessionMeta.Status` 永停 `running`,**无 startup reconciliation**(`engine.go:448` 异步写 + 无回收)。 +- **补**:①调度 owner 启动时**扫 stale `running`→`interrupted/error`**(按 owning PID/lock 代际);②给 `Shutdown` 加超时 ctx;③被切的 run 记一条 `run_terminated` marker(区分正常完成/被切);④web/desktop 退出前若有 in-flight 自动化 run → drain(带超时)或确认对话框。flock 由 OS 在进程死时自动释放(含 SIGKILL),不会死锁 ✓。 + +### 10.4 其余边界(分级,"还有其他吗") +| 级别 | 边界 | 处理 | +|---|---|---| +| 关键 | 同一自动化上次没跑完、下个 tick 又到(重叠,racing 同 working tree 的 full_access `git`/`edit`) | **skip-if-running**(`LastStatus==running` 跳过,带 stale 守护防 §10.3 卡死) | +| 关键 | run 永不终止(agent 死循环;无 goal 时连 25 轮上限都没有,`runner.go:110`) | **决策**(§10.5):scheduled-only 存活上限 vs 接受 engine 泄漏到重启 | +| 高 | 空 pwd full_access 在启动目录跑 `rm`(`env.go:98/215` 空 pwd→`cmd.Dir` 未设→launch cwd,逃逸守护失效) | **owner 定:empty pwd → skip + 停用自动化 + 通知**(不做 scratch 兜底,§7.5) | +| 高 | 选主锁与存储写锁**复用同一把**(D4/D7)→ 长持选主锁饿死短写 | **拆两把**:选主锁长持、存储写锁短持 | +| 高 | 远程(SSH/Docker) `ProjectPath` 被指过来 → Runner 用 `buildLocalEngine` 在本地跑一个错的本地路径 | `ValidateAutomation` v1 **拒绝非本地 ProjectPath** | +| 中 | prompt 里 `/skill` 已禁用/删、model/provider 已删/key 没了、MCP 挂了、配额/限流/断网 | 统一**触发前预检 + 连续失败 N 次自动禁用**(把 §7.4 的 pwd 预检扩成通用) | +| 中 | DST **春进**(02:30 这种 slot 当天不存在,原只处理了秋退去重) | `ComputeNextRun` 必须落到真实 instant;纯函数单测覆盖两次 DST 切换 | +| 中 | 时钟/NTP/时区跳变致 `NextRunAt` 卡死或漂移 | 每 tick 用 `ComputeNextRun(now,…)` **重算**,存值只当去重提示 | +| 中 | agent_create 卡片的 resolve 被伪造(毒 prompt `curl` localhost 自确认) | request_id 绑**服务端 nonce**,不进工具输出、agent 不可见 | +| 低/UX | 通知风暴(hourly = 24 条/天) | **owner 定:成功/失败全通知**(接受 hourly 量;嫌多可后续加合并) | +| 低 | corrupt `automation-state.json`、磁盘满 mid-transcript、编辑 schedule 与 run 回写互踩 | 解析失败→重建空 state 不崩启动;run 回写**只碰 state 文件、不碰定义文件**(D7 分文件天然帮到) | + +### 10.5 决策(owner 已定 4 项 / 待定 1 项) +- ✅ **无项目**:empty/缺失 pwd → **skip + 停用 + 通知**(不做 scratch);Project 必填、非本地路径拒绝(§7.5)。 +- ✅ **headless 工具**:自动化 run 工具集 **exclude `ask_user` + `automation_create`**(§10.1 C2)。 +- ✅ **通知**:成功/失败**全通知**(接受 hourly 量)。 +- ✅ **退出语义**:关闭时 in-flight run **短超时 drain,超时标 interrupted**(§10.3)。 +- ⏳ **待定 · 存活上限**:scheduled run 要不要加宽松 wall-clock 上限防永久 hang?这是**存活**护栏(防 run 卡死泄漏 engine),非你拒掉的**安全**护栏。建议加(仅 scheduled,如 30min)。 + +--- + +## 11. 分期落地 + +### Phase 0 —— 核心 + 手动(无 UI 也有值) +`internal/automation` 包(types/store 双文件+flock/validate/模板 embed);`SessionMeta` 加 `AutomationID/TriggerKind/TerminalStatus/EndTime/ErrorReason` + 索引 version 迁移;`Runner` 接口 + web 实现(复用 Engine、完成即 `deleteEngine` 自销、工具集剔除 ask_user/automation_create、empty-pwd→skip+停用);`RecordSessionEnd` 落终态;CLI `jcode automation …`;完成通知。 + +### Phase 1 —— Web UI + 定时调度 + agent 工具 +列表页/弹窗/模板页/技能转自动化(cloud tooltip);调度器(flock 选主)+ 定时(hourly/daily/weekly,纯函数 NextRunAt + slot 去重 + pwd 预检);`automation_create` 工具 + `AutomationCard`(human-in-loop);HTTP API 全量;Sidebar 入口 + 主列表排除谓词;Recent runs。 + +### Phase 2 —— 其余前端 + 守护进程承接 +TUI `/automation`、ACP 最小能力;`jcode daemon`(launchd/systemd)作为更高优先级调度 owner,承接「App 关闭也跑」;(事件/gh 触发、真云端仍后续再议)。 + +--- + +## 12. 决策记录 + 仍开放的小问题 +**已按推荐定(P2 默认):** 无项目→skip+停用(不做 scratch,§7.5);主列表排除自动化 run(§7.6);项目缺失跳过+止损(§7.4);DST `LastFiredSlot` 去重(§7.3);不排队、惊群接受(§9.3);删/禁用进行中的 run "让它跑完、只停后续";resume 自动化 run = 首条人工消息起 fork 成普通会话(保留 `AutomationID` 溯源、不再重发通知);砍 Cloud tab、留字段。 + +**仍开放(实现时定):** +1. 每条自动化运行历史保留 N 的具体值。 +2. 连续 pwd-缺失自动禁用的阈值 N。 +3. manual 触发 + Ask/Plan:是否在列表 ▶ 时要求当前有 web 客户端连着(否则也会 hang)→ 倾向:▶ 仅 web 内可用且自动连当前客户端;CLI/TUI 的 `run` 强制 full_access。 + +## 13. 验收 / 测试 +受 [[jcode e2e 沙箱限制]]:live server 沙箱内绑不了 socket → 用 in-process `httptest` + 预置 json 测 API;`ComputeNextRun` 抽纯函数单测(不依赖系统时钟,**覆盖 DST 春进/秋退两次切换**);flock 单 owner:起两个进程断言只触发一次。可靠性单测:skip-if-running(上次 running 时不重叠触发);完成回调 `deleteEngine` 自销(engine 数回落);自动化 run 工具集**不含** `ask_user`/`automation_create`;empty/缺失 pwd → skip+停用;startup 把 stale `running` 扫成 interrupted;终态 `TerminalStatus` 落盘可重建 Success/Failed。冒烟(桌面/本地手动):建 daily → Create and run → 侧栏出现 run(且**不**进主列表)→ 收完成通知 → Recent runs 有记录、状态过滤 Success/Failed 生效;让 agent 说一句话→出 AutomationCard→Confirm→落库。 + +## 14. 风险 +1. **重复触发**(多前端进程)→ flock 选主(§9.3)。 +2. **prompt 注入造后门** → 创建处 human-in-loop 卡片是唯一提交边界(§8)+ resolve 绑服务端 nonce(§10.4)。 +3. **engine 泄漏致静默失败**(无 idle-evict + cap 64 硬失败)→ 完成即 `deleteEngine` 自销(§10.1 C1)。 +4. **headless 工具阻塞致永久 hang**(`ask_user`/`automation_create`)→ 自动化 run 工具集直接 exclude 这两个(§10.1 C2)。 +5. **关闭切断 run + stale "running"** → startup reconciliation + drain/超时 + terminated marker(§10.3)。 +6. **Success/Failed 审计撑不起** → 补 `TerminalStatus/EndTime/ErrorReason` + `RecordSessionEnd`(§10.2)。 +3. **无人值守 full_access 无护栏**(D6 主动接受)→ 风险集中在创建处把关;运行处沿用既有哲学。文档讲清。 +4. **进程不常驻**致定时漏触发 → v1 接受(App 开着才跑);Phase 2 daemon 承接。 +5. **空 pwd full_access 误操作** → empty/缺失 pwd 直接 skip + 停用,不做 scratch 兜底(§7.5)。 +6. **运行历史膨胀** → 主列表排除 + 每条保留 N(§7.6)。 diff --git a/internal/automation/filelock_unix.go b/internal/automation/filelock_unix.go new file mode 100644 index 0000000..c9cb1f9 --- /dev/null +++ b/internal/automation/filelock_unix.go @@ -0,0 +1,52 @@ +//go:build !windows + +package automation + +import ( + "os" + + "golang.org/x/sys/unix" +) + +// fileLock is an advisory OS file lock (flock). The kernel releases it +// automatically when the holding process exits — including on crash/SIGKILL — +// so a dead owner never deadlocks the lock. +type fileLock struct{ f *os.File } + +// acquireLock blocks until the exclusive lock at path is held. +func acquireLock(path string) (*fileLock, error) { + f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600) + if err != nil { + return nil, err + } + if err := unix.Flock(int(f.Fd()), unix.LOCK_EX); err != nil { + _ = f.Close() + return nil, err + } + return &fileLock{f: f}, nil +} + +// tryAcquireLock attempts a non-blocking exclusive lock. ok=false means another +// process holds it. +func tryAcquireLock(path string) (lock *fileLock, ok bool, err error) { + f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600) + if err != nil { + return nil, false, err + } + if err := unix.Flock(int(f.Fd()), unix.LOCK_EX|unix.LOCK_NB); err != nil { + _ = f.Close() + if err == unix.EWOULDBLOCK { + return nil, false, nil + } + return nil, false, err + } + return &fileLock{f: f}, true, nil +} + +func (l *fileLock) release() error { + if l == nil || l.f == nil { + return nil + } + _ = unix.Flock(int(l.f.Fd()), unix.LOCK_UN) + return l.f.Close() +} diff --git a/internal/automation/filelock_windows.go b/internal/automation/filelock_windows.go new file mode 100644 index 0000000..b1099cd --- /dev/null +++ b/internal/automation/filelock_windows.go @@ -0,0 +1,57 @@ +//go:build windows + +package automation + +import ( + "os" + + "golang.org/x/sys/windows" +) + +// fileLock is an advisory OS file lock via LockFileEx. Windows releases the lock +// when the file handle closes, which the OS does on process exit, so a crashed +// owner does not deadlock the lock. +type fileLock struct{ f *os.File } + +func lockFile(f *os.File, flags uint32) error { + ol := new(windows.Overlapped) + // Lock the first byte; that is sufficient for advisory whole-file locking + // when every participant locks the same byte range. + return windows.LockFileEx(windows.Handle(f.Fd()), flags, 0, 1, 0, ol) +} + +func acquireLock(path string) (*fileLock, error) { + f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600) + if err != nil { + return nil, err + } + if err := lockFile(f, windows.LOCKFILE_EXCLUSIVE_LOCK); err != nil { + _ = f.Close() + return nil, err + } + return &fileLock{f: f}, nil +} + +func tryAcquireLock(path string) (lock *fileLock, ok bool, err error) { + f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600) + if err != nil { + return nil, false, err + } + if err := lockFile(f, windows.LOCKFILE_EXCLUSIVE_LOCK|windows.LOCKFILE_FAIL_IMMEDIATELY); err != nil { + _ = f.Close() + if err == windows.ERROR_LOCK_VIOLATION { + return nil, false, nil + } + return nil, false, err + } + return &fileLock{f: f}, true, nil +} + +func (l *fileLock) release() error { + if l == nil || l.f == nil { + return nil + } + ol := new(windows.Overlapped) + _ = windows.UnlockFileEx(windows.Handle(l.f.Fd()), 0, 1, 0, ol) + return l.f.Close() +} diff --git a/internal/automation/schedule.go b/internal/automation/schedule.go new file mode 100644 index 0000000..cf84736 --- /dev/null +++ b/internal/automation/schedule.go @@ -0,0 +1,105 @@ +package automation + +import ( + "fmt" + "time" +) + +// ComputeNextRun returns the next instant strictly after `after` that matches +// the trigger, evaluated in after's own location (the host's local tz). It is a +// pure function (no time.Now) so it is fully unit-testable, including DST +// transitions — see schedule_test.go. +// +// DST handling: each candidate is built with time.Date, which normalizes +// non-existent wall-clock times (spring-forward, e.g. a 02:30 daily on the day +// the clock jumps 02:00→03:00 lands on a real instant). Fall-back (a slot that +// occurs twice) is deduped by the caller via SlotKey/LastFiredSlot, not here. +// +// Returns ok=false for non-schedule triggers (manual never auto-fires). +func ComputeNextRun(after time.Time, t Trigger) (time.Time, bool) { + if t.Type != TriggerSchedule { + return time.Time{}, false + } + loc := after.Location() + switch t.Cadence { + case CadenceHourly: + // Next occurrence of :MM strictly after `after`. + c := time.Date(after.Year(), after.Month(), after.Day(), after.Hour(), t.Minute, 0, 0, loc) + for !c.After(after) { + c = c.Add(time.Hour) + } + return c, true + + case CadenceDaily: + c := time.Date(after.Year(), after.Month(), after.Day(), t.Hour, t.Minute, 0, 0, loc) + for !c.After(after) { + c = time.Date(c.Year(), c.Month(), c.Day()+1, t.Hour, t.Minute, 0, 0, loc) + } + return c, true + + case CadenceWeekly: + c := time.Date(after.Year(), after.Month(), after.Day(), t.Hour, t.Minute, 0, 0, loc) + // Advance day-by-day (calendar-safe) until weekday matches and it's in the future. + for i := 0; i < 8; i++ { + if int(c.Weekday()) == t.Weekday && c.After(after) { + return c, true + } + c = time.Date(c.Year(), c.Month(), c.Day()+1, t.Hour, t.Minute, 0, 0, loc) + } + return c, true // unreachable in practice; loop always finds a match within 7 days + + default: + return time.Time{}, false + } +} + +// SlotKey is a stable dedup key for a fire instant: the local calendar minute. +// Two fires at the same wall-clock minute (e.g. a DST fall-back repeat) share a +// key, so LastFiredSlot can suppress the duplicate. +func SlotKey(t time.Time) string { + return t.Format("2006-01-02T15:04") +} + +// HumanSchedule renders a trigger for display, e.g. "Daily at 09:00", +// "Weekly on Mon at 14:30", "Hourly at :05", "Manual". +func HumanSchedule(t Trigger) string { + if t.Type == TriggerManual { + return "Manual" + } + switch t.Cadence { + case CadenceHourly: + return fmt.Sprintf("Hourly at :%02d", t.Minute) + case CadenceDaily: + return fmt.Sprintf("Daily at %02d:%02d", t.Hour, t.Minute) + case CadenceWeekly: + return fmt.Sprintf("Weekly on %s at %02d:%02d", weekdayName(t.Weekday), t.Hour, t.Minute) + default: + return string(t.Cadence) + } +} + +// Badge renders a short cadence label for cards: "Daily" / "Weekly" / "Hourly" +// / "Manual". +func Badge(t Trigger) string { + if t.Type == TriggerManual { + return "Manual" + } + switch t.Cadence { + case CadenceHourly: + return "Hourly" + case CadenceDaily: + return "Daily" + case CadenceWeekly: + return "Weekly" + default: + return string(t.Cadence) + } +} + +func weekdayName(w int) string { + names := []string{"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"} + if w < 0 || w > 6 { + return "?" + } + return names[w] +} diff --git a/internal/automation/schedule_test.go b/internal/automation/schedule_test.go new file mode 100644 index 0000000..60d439f --- /dev/null +++ b/internal/automation/schedule_test.go @@ -0,0 +1,111 @@ +package automation + +import ( + "testing" + "time" +) + +func mustLoad(t *testing.T, name string) *time.Location { + t.Helper() + loc, err := time.LoadLocation(name) + if err != nil { + t.Skipf("tz %s unavailable: %v", name, err) + } + return loc +} + +func TestComputeNextRun_Hourly(t *testing.T) { + after := time.Date(2026, 6, 23, 10, 12, 0, 0, time.UTC) + got, ok := ComputeNextRun(after, Trigger{Type: TriggerSchedule, Cadence: CadenceHourly, Minute: 5}) + if !ok { + t.Fatal("expected ok") + } + want := time.Date(2026, 6, 23, 11, 5, 0, 0, time.UTC) + if !got.Equal(want) { + t.Fatalf("hourly: got %v want %v", got, want) + } + // When the minute is still ahead this hour, it stays in the same hour. + got2, _ := ComputeNextRun(time.Date(2026, 6, 23, 10, 2, 0, 0, time.UTC), + Trigger{Type: TriggerSchedule, Cadence: CadenceHourly, Minute: 5}) + if want2 := time.Date(2026, 6, 23, 10, 5, 0, 0, time.UTC); !got2.Equal(want2) { + t.Fatalf("hourly same-hour: got %v want %v", got2, want2) + } +} + +func TestComputeNextRun_Daily_StrictlyAfter(t *testing.T) { + // Exactly at the slot → must roll to tomorrow (strictly after). + after := time.Date(2026, 6, 23, 9, 0, 0, 0, time.UTC) + got, _ := ComputeNextRun(after, Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9, Minute: 0}) + want := time.Date(2026, 6, 24, 9, 0, 0, 0, time.UTC) + if !got.Equal(want) { + t.Fatalf("daily strict: got %v want %v", got, want) + } +} + +func TestComputeNextRun_Weekly(t *testing.T) { + // 2026-06-23 is a Tuesday(2). Want Friday(5) 17:00. + after := time.Date(2026, 6, 23, 12, 0, 0, 0, time.UTC) + got, _ := ComputeNextRun(after, Trigger{Type: TriggerSchedule, Cadence: CadenceWeekly, Weekday: 5, Hour: 17, Minute: 0}) + if got.Weekday() != time.Friday { + t.Fatalf("weekly: got weekday %v", got.Weekday()) + } + if want := time.Date(2026, 6, 26, 17, 0, 0, 0, time.UTC); !got.Equal(want) { + t.Fatalf("weekly: got %v want %v", got, want) + } +} + +func TestComputeNextRun_Manual(t *testing.T) { + if _, ok := ComputeNextRun(time.Now(), Trigger{Type: TriggerManual}); ok { + t.Fatal("manual trigger must not produce a next run") + } +} + +func TestComputeNextRun_DST_SpringForward(t *testing.T) { + loc := mustLoad(t, "America/New_York") + // 2026-03-08: clocks jump 02:00 -> 03:00. A daily 02:30 does not exist that + // day; ComputeNextRun must still return a real future instant (normalized). + after := time.Date(2026, 3, 8, 0, 0, 0, 0, loc) + got, ok := ComputeNextRun(after, Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 2, Minute: 30}) + if !ok { + t.Fatal("expected ok") + } + if !got.After(after) { + t.Fatalf("spring-forward: result %v not after %v", got, after) + } + // Round-trips to a valid instant (no zero / no panic) and is the same day. + if got.Day() != 8 { + t.Fatalf("spring-forward: expected same calendar day, got %v", got) + } +} + +func TestComputeNextRun_DST_FallBack_SlotDedup(t *testing.T) { + loc := mustLoad(t, "America/New_York") + // During fall-back the 01:30 wall-clock occurs twice. SlotKey must be equal + // for both so the scheduler's LastFiredSlot guard suppresses the second. + first := time.Date(2026, 11, 1, 1, 30, 0, 0, loc) + second := first.Add(time.Hour) // same wall clock, different offset + if SlotKey(first) != SlotKey(second) { + t.Fatalf("fall-back slot keys differ: %q vs %q", SlotKey(first), SlotKey(second)) + } +} + +func TestHumanScheduleAndBadge(t *testing.T) { + cases := []struct { + tr Trigger + human string + badge string + }{ + {Trigger{Type: TriggerManual}, "Manual", "Manual"}, + {Trigger{Type: TriggerSchedule, Cadence: CadenceHourly, Minute: 5}, "Hourly at :05", "Hourly"}, + {Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9, Minute: 0}, "Daily at 09:00", "Daily"}, + {Trigger{Type: TriggerSchedule, Cadence: CadenceWeekly, Weekday: 1, Hour: 14, Minute: 30}, "Weekly on Mon at 14:30", "Weekly"}, + } + for _, c := range cases { + if got := HumanSchedule(c.tr); got != c.human { + t.Errorf("HumanSchedule(%v)=%q want %q", c.tr, got, c.human) + } + if got := Badge(c.tr); got != c.badge { + t.Errorf("Badge(%v)=%q want %q", c.tr, got, c.badge) + } + } +} diff --git a/internal/automation/scheduler.go b/internal/automation/scheduler.go new file mode 100644 index 0000000..6e07bc0 --- /dev/null +++ b/internal/automation/scheduler.go @@ -0,0 +1,312 @@ +package automation + +import ( + "context" + "fmt" + "os" + "path/filepath" + "sync" + "time" + + "github.com/cnjack/jcode/internal/config" +) + +// schedulerLockFile is held by the single process that owns periodic firing. It +// is intentionally SEPARATE from the storage write lock (automation.lock): the +// election lock is held for the whole process lifetime, while storage writes +// take their lock briefly — conflating them would let the long-held election +// lock starve short writes. +const schedulerLockFile = "automation-scheduler.lock" + +// ScheduledRunCeiling bounds a scheduled (headless) run's wall-clock time. This +// is a liveness bound, not a safety guardrail: with ask_user/automation_create +// excluded from headless runs, the remaining hang vector is an agent loop, and +// an unbounded run would hold an engine forever. Manual runs are not capped. +const ScheduledRunCeiling = 30 * time.Minute + +// manualRunStaleWindow bounds how long a manual run may sit in "running" before +// reconciliation treats it as a zombie. Manual runs are uncapped and bypass the +// scheduler election, so on restart we can't prove one is dead the way we can a +// scheduled run; we only reset clearly-stale ones (older than this window). +const manualRunStaleWindow = 2 * time.Hour + +// Runner executes one automation run to completion. Implementations (internal/web) +// reuse the Engine: build a headless engine for the automation's project + mode, +// inject the prompt, and block until the agent is done. The returned sessionID +// identifies the recorded session. +type Runner interface { + StartRun(ctx context.Context, a *Automation, kind string) (sessionID string, err error) +} + +// SkipNotifier is called when the scheduler skips a fire without running (e.g. +// the bound project is gone). Optional. +type SkipNotifier func(a *Automation, reason string) + +// Scheduler owns periodic firing for the process that wins the election lock. +// Non-owner processes can still manage definitions and trigger manual runs +// (which bypass the scheduler entirely). +type Scheduler struct { + store *Store + runner Runner + interval time.Duration + onSkip SkipNotifier + + mu sync.Mutex + inflight map[string]bool +} + +// NewScheduler builds a scheduler. interval<=0 defaults to 30s. +func NewScheduler(store *Store, runner Runner) *Scheduler { + return &Scheduler{ + store: store, + runner: runner, + interval: 30 * time.Second, + inflight: map[string]bool{}, + } +} + +// SetInterval overrides the tick interval (used in tests). +func (s *Scheduler) SetInterval(d time.Duration) { + if d > 0 { + s.interval = d + } +} + +// SetSkipNotifier registers a callback for skipped fires. +func (s *Scheduler) SetSkipNotifier(fn SkipNotifier) { s.onSkip = fn } + +// Run blocks until ctx is cancelled. It first contends for the election lock; if +// another process owns it, Run returns immediately (this process won't fire +// scheduled runs, but manual runs still work). The flock is released by the OS +// on process exit, so a crashed owner never deadlocks the election. +func (s *Scheduler) Run(ctx context.Context) { + lockPath := filepath.Join(s.store.dir, schedulerLockFile) + lock, ok, err := tryAcquireLock(lockPath) + if err != nil { + config.Logger().Printf("[automation] scheduler lock error, not scheduling: %v", err) + return + } + if !ok { + config.Logger().Printf("[automation] another process owns scheduling; periodic runs disabled here") + return + } + defer func() { _ = lock.release() }() + config.Logger().Printf("[automation] scheduler started (owner)") + + // Acquiring the election lock means any prior owner is gone: reconcile runs + // it left marked "running" (zombies) so skip-if-running and the UI recover. + s.reconcileStale() + + t := time.NewTicker(s.interval) + defer t.Stop() + s.tick(ctx) + for { + select { + case <-ctx.Done(): + return + case <-t.C: + s.tick(ctx) + } + } +} + +// reconcileStale resets run-state left in "running" by a crashed owner so +// skip-if-running and the UI recover. Scheduled runs are reset unconditionally: +// winning the election lock proves the prior SCHEDULER owner is gone. Manual +// runs need a time heuristic instead — one may be executing right now in a +// DIFFERENT process (manual runs bypass the election), so resetting a fresh one +// would briefly show a bogus "interrupted" for a live cross-process run; only +// runs older than manualRunStaleWindow are treated as zombies. +func (s *Scheduler) reconcileStale() { + now := nowFunc() + for _, a := range s.store.List() { + st := s.store.State(a.ID) + if st.LastStatus != StatusRunning { + continue + } + if a.Trigger.Type != TriggerSchedule { + // Manual (or non-scheduled) run: a live one in another process has a + // recent, valid LastRunAt (ExecuteRun stamps it atomically when it + // claims the run). Leave those alone; reset only runs older than the + // window — and also an empty/garbled LastRunAt, which can't be a live + // run and would otherwise stay stuck at "running" forever. + if t, err := time.Parse(time.RFC3339, st.LastRunAt); err == nil && now.Sub(t) < manualRunStaleWindow { + continue + } + } + _ = s.store.UpdateState(a.ID, func(st *RunState) { + st.LastStatus = StatusInterrupted + st.LastError = "previous run interrupted (process restart)" + }) + } +} + +func (s *Scheduler) tick(ctx context.Context) { + now := nowFunc() + for _, a := range s.store.List() { + if !a.Enabled || a.Trigger.Type != TriggerSchedule { + continue + } + st := s.store.State(a.ID) + + next, err := time.Parse(time.RFC3339, st.NextRunAt) + if st.NextRunAt == "" || err != nil { + // First time we see it (or unparseable): seed NextRunAt, don't fire. + _ = s.store.UpdateState(a.ID, func(rs *RunState) { + rs.NextRunAt = nextRunString(now, a.Trigger) + }) + continue + } + if now.Before(next) { + continue // not due yet + } + + slot := SlotKey(next) + if slot == st.LastFiredSlot { + // Already fired this wall-clock minute (DST fall-back guard); advance. + _ = s.store.UpdateState(a.ID, func(rs *RunState) { + rs.NextRunAt = nextRunString(now, a.Trigger) + }) + continue + } + + s.mu.Lock() + busy := s.inflight[a.ID] + s.mu.Unlock() + // Skip if a scheduled run is still in flight (s.inflight) OR a manual run + // is currently executing (LastStatus), so a scheduled fire can't overlap a + // manual "Run Now" that races it. A crashed-run zombie left at "running" is + // cleared by reconcileStale on the next owner election. + if busy || st.LastStatus == StatusRunning { + continue + } + + // Fire-time precheck: the bound project must still exist locally. + if !projectUsable(a.ProjectPath) { + s.skipAndMaybeDisable(a, "project path is missing or not local: "+a.ProjectPath) + _ = s.store.UpdateState(a.ID, func(rs *RunState) { + rs.NextRunAt = nextRunString(now, a.Trigger) + }) + continue + } + + s.fire(ctx, a, slot, now) + } +} + +func (s *Scheduler) fire(ctx context.Context, a *Automation, slot string, now time.Time) { + s.mu.Lock() + s.inflight[a.ID] = true + s.mu.Unlock() + + _ = s.store.UpdateState(a.ID, func(rs *RunState) { + rs.LastFiredSlot = slot + rs.NextRunAt = nextRunString(now, a.Trigger) + }) + + go func() { + defer func() { + s.mu.Lock() + delete(s.inflight, a.ID) + s.mu.Unlock() + }() + runCtx, cancel := context.WithTimeout(ctx, ScheduledRunCeiling) + defer cancel() + _, _ = ExecuteRun(runCtx, s.store, s.runner, a, KindScheduled) + }() +} + +func (s *Scheduler) skipAndMaybeDisable(a *Automation, reason string) { + // State mutation and the conditional auto-disable happen in one lock scope so + // a concurrent successful run can't reset ConsecutiveFails between them. + _, _ = s.store.UpdateStateAndMaybeDisable(a.ID, func(rs *RunState) { + rs.LastStatus = StatusSkipped + rs.LastError = reason + rs.LastRunAt = nowFunc().Format(time.RFC3339) + rs.ConsecutiveFails++ + }) + if s.onSkip != nil { + s.onSkip(a, reason) + } +} + +// ExecuteRun runs an automation through the runner and records terminal state. +// It blocks until completion. Shared by the scheduler (scheduled fires) and the +// manual ▶ path so state bookkeeping is identical. For scheduled runs, repeated +// errors increment ConsecutiveFails and auto-disable past the threshold. +func ExecuteRun(ctx context.Context, store *Store, runner Runner, a *Automation, kind string) (string, error) { + // Atomically claim the run. If a run for this automation is already in + // progress (a scheduled fire racing a manual "Run Now", or another process), + // refuse rather than start a second agent session against the same project. + // Returning before writing any terminal state preserves the live run's + // status. + claimed, _ := store.TryMarkRunning(a.ID) + if !claimed { + return "", fmt.Errorf("a run is already in progress for automation %q", a.ID) + } + + sessionID, err := safeStartRun(ctx, runner, a, kind) + + if err != nil && kind == KindScheduled { + // Scheduled failure: increment ConsecutiveFails and auto-disable past the + // threshold atomically (single lock scope) so a concurrent success can't + // race the disable. + _, _ = store.UpdateStateAndMaybeDisable(a.ID, func(rs *RunState) { + rs.LastSessionID = sessionID + rs.LastStatus = StatusError + rs.LastError = truncate(err.Error(), 300) + rs.ConsecutiveFails++ + }) + return sessionID, err + } + + _ = store.UpdateState(a.ID, func(rs *RunState) { + rs.LastSessionID = sessionID + if err != nil { + // Manual failure: record the error but never auto-disable (manual runs + // don't increment the failure counter). + rs.LastStatus = StatusError + rs.LastError = truncate(err.Error(), 300) + } else { + rs.LastStatus = StatusSuccess + rs.LastError = "" + rs.ConsecutiveFails = 0 + } + }) + return sessionID, err +} + +// safeStartRun runs the runner with a recover guard so a panic in one +// automation run (the agent/engine stack is large and concurrent) becomes a +// recorded StatusError instead of crashing the whole host process — which would +// take down the web UI / TUI / scheduler with it. +func safeStartRun(ctx context.Context, runner Runner, a *Automation, kind string) (sessionID string, err error) { + defer func() { + if r := recover(); r != nil { + err = fmt.Errorf("automation run panicked: %v", r) + } + }() + return runner.StartRun(ctx, a, kind) +} + +func nextRunString(after time.Time, t Trigger) string { + if n, ok := ComputeNextRun(after, t); ok { + return n.Format(time.RFC3339) + } + return "" +} + +func projectUsable(p string) bool { + if !IsLocalPath(p) { + return false + } + info, err := os.Stat(p) + return err == nil && info.IsDir() +} + +func truncate(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] + "…" +} diff --git a/internal/automation/scheduler_test.go b/internal/automation/scheduler_test.go new file mode 100644 index 0000000..73e68ab --- /dev/null +++ b/internal/automation/scheduler_test.go @@ -0,0 +1,238 @@ +package automation + +import ( + "context" + "errors" + "sync" + "testing" + "time" +) + +type fakeRunner struct { + mu sync.Mutex + calls int + sid string + err error +} + +func (f *fakeRunner) StartRun(_ context.Context, _ *Automation, _ string) (string, error) { + f.mu.Lock() + defer f.mu.Unlock() + f.calls++ + return f.sid, f.err +} + +func (f *fakeRunner) count() int { f.mu.Lock(); defer f.mu.Unlock(); return f.calls } + +func waitFor(t *testing.T, cond func() bool, d time.Duration) { + t.Helper() + deadline := time.Now().Add(d) + for time.Now().Before(deadline) { + if cond() { + return + } + time.Sleep(5 * time.Millisecond) + } + t.Fatal("condition not met within timeout") +} + +func TestExecuteRun_SuccessThenError_AutoDisable(t *testing.T) { + s, _ := NewStoreDir(t.TempDir()) + a, _ := s.Create(Automation{Name: "n", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9}, Enabled: true}) + + okRunner := &fakeRunner{sid: "sess1"} + if _, err := ExecuteRun(context.Background(), s, okRunner, a, KindScheduled); err != nil { + t.Fatal(err) + } + st := s.State(a.ID) + if st.LastStatus != StatusSuccess || st.LastSessionID != "sess1" || st.ConsecutiveFails != 0 { + t.Fatalf("success state wrong: %+v", st) + } + + failRunner := &fakeRunner{err: errors.New("boom")} + for i := 0; i < AutoDisableThreshold; i++ { + _, _ = ExecuteRun(context.Background(), s, failRunner, a, KindScheduled) + } + st = s.State(a.ID) + if st.LastStatus != StatusError || st.ConsecutiveFails < AutoDisableThreshold { + t.Fatalf("error state wrong: %+v", st) + } + if s.Get(a.ID).Enabled { + t.Fatal("expected auto-disable after repeated failures") + } +} + +// ExecuteRun must refuse to start a second run while one is already in progress +// (atomic claim), so a scheduled fire racing a manual "Run Now" can't launch two +// agent sessions against the same project. +func TestExecuteRun_RefusesConcurrent(t *testing.T) { + s, _ := NewStoreDir(t.TempDir()) + a, _ := s.Create(Automation{Name: "n", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9}, Enabled: true}) + + // Simulate a run already in flight. + if ok, _ := s.TryMarkRunning(a.ID); !ok { + t.Fatal("setup claim failed") + } + + r := &fakeRunner{sid: "sess"} + if _, err := ExecuteRun(context.Background(), s, r, a, KindScheduled); err == nil { + t.Fatal("expected ExecuteRun to refuse a concurrent run") + } + if r.count() != 0 { + t.Fatal("runner must not be invoked when a run is already in progress") + } + // The live run's status must be untouched (not clobbered to error). + if s.State(a.ID).LastStatus != StatusRunning { + t.Fatalf("refused run clobbered the live status: %s", s.State(a.ID).LastStatus) + } +} + +type panicRunner struct{} + +func (panicRunner) StartRun(_ context.Context, _ *Automation, _ string) (string, error) { + panic("boom") +} + +func TestExecuteRun_RecoversPanic(t *testing.T) { + s, _ := NewStoreDir(t.TempDir()) + a, _ := s.Create(Automation{Name: "n", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerManual}}) + // A panicking run must not crash the process — it becomes a recorded error. + _, err := ExecuteRun(context.Background(), s, panicRunner{}, a, KindManual) + if err == nil { + t.Fatal("expected error from recovered panic") + } + if s.State(a.ID).LastStatus != StatusError { + t.Fatalf("expected error status, got %q", s.State(a.ID).LastStatus) + } +} + +func TestSchedulerTick_SeedsThenFires(t *testing.T) { + s, _ := NewStoreDir(t.TempDir()) + a, _ := s.Create(Automation{Name: "n", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9}, Enabled: true}) + r := &fakeRunner{sid: "sess1"} + sch := NewScheduler(s, r) + + // First sight: seeds NextRunAt, does NOT fire. + sch.tick(context.Background()) + if r.count() != 0 { + t.Fatal("should not fire on first sight") + } + if s.State(a.ID).NextRunAt == "" { + t.Fatal("NextRunAt not seeded") + } + + // Make it due, then tick again → fires. + if err := s.UpdateState(a.ID, func(rs *RunState) { + rs.NextRunAt = time.Now().Add(-time.Minute).Format(time.RFC3339) + rs.LastFiredSlot = "" + }); err != nil { + t.Fatal(err) + } + sch.tick(context.Background()) + waitFor(t, func() bool { return r.count() == 1 }, 2*time.Second) + waitFor(t, func() bool { return s.State(a.ID).LastStatus == StatusSuccess }, 2*time.Second) +} + +func TestSchedulerTick_SlotDedup(t *testing.T) { + s, _ := NewStoreDir(t.TempDir()) + a, _ := s.Create(Automation{Name: "n", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9}, Enabled: true}) + r := &fakeRunner{} + sch := NewScheduler(s, r) + + due := time.Now().Add(-time.Minute) + _ = s.UpdateState(a.ID, func(rs *RunState) { + rs.NextRunAt = due.Format(time.RFC3339) + rs.LastFiredSlot = SlotKey(due) // already fired this slot + }) + sch.tick(context.Background()) + time.Sleep(50 * time.Millisecond) + if r.count() != 0 { + t.Fatal("slot dedup failed: fired an already-fired slot") + } +} + +func TestSchedulerTick_SkipMissingProject(t *testing.T) { + s, _ := NewStoreDir(t.TempDir()) + a, _ := s.Create(Automation{Name: "n", Prompt: "p", ProjectPath: "/no/such/dir/really", + Trigger: Trigger{Type: TriggerManual}}) + // Force it into a schedule that's due (bypass Create's manual trigger). + _, _ = s.Update(a.ID, func(x *Automation) { + x.Trigger = Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9} + x.Enabled = true + }) + r := &fakeRunner{} + sch := NewScheduler(s, r) + for i := 0; i < AutoDisableThreshold; i++ { + _ = s.UpdateState(a.ID, func(rs *RunState) { + rs.NextRunAt = time.Now().Add(-time.Minute).Format(time.RFC3339) + rs.LastFiredSlot = "" + }) + sch.tick(context.Background()) + } + if r.count() != 0 { + t.Fatal("should not run with a missing project") + } + if s.Get(a.ID).Enabled { + t.Fatal("missing-project automation should auto-disable") + } +} + +func TestSchedulerTick_SkipWhenInflight(t *testing.T) { + s, _ := NewStoreDir(t.TempDir()) + a, _ := s.Create(Automation{Name: "n", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9}, Enabled: true}) + r := &fakeRunner{} + sch := NewScheduler(s, r) + sch.inflight[a.ID] = true // pretend a prior run is still going + + _ = s.UpdateState(a.ID, func(rs *RunState) { + rs.NextRunAt = time.Now().Add(-time.Minute).Format(time.RFC3339) + }) + sch.tick(context.Background()) + time.Sleep(50 * time.Millisecond) + if r.count() != 0 { + t.Fatal("overlap guard failed: fired while a run was in flight") + } +} + +// reconcileStale must reset scheduled zombies unconditionally, reset only +// clearly-stale manual zombies (older than manualRunStaleWindow), and leave a +// fresh manual run — which may be live in another process — untouched. +func TestReconcileStale_ManualHeuristic(t *testing.T) { + s, _ := NewStoreDir(t.TempDir()) + sch := NewScheduler(s, &fakeRunner{}) + + sched, _ := s.Create(Automation{Name: "sched", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9}, Enabled: true}) + staleManual, _ := s.Create(Automation{Name: "stale", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerManual}}) + freshManual, _ := s.Create(Automation{Name: "fresh", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerManual}}) + + _ = s.UpdateState(sched.ID, func(rs *RunState) { rs.LastStatus = StatusRunning }) + _ = s.UpdateState(staleManual.ID, func(rs *RunState) { + rs.LastStatus = StatusRunning + rs.LastRunAt = time.Now().Add(-3 * time.Hour).Format(time.RFC3339) + }) + _ = s.UpdateState(freshManual.ID, func(rs *RunState) { + rs.LastStatus = StatusRunning + rs.LastRunAt = time.Now().Format(time.RFC3339) + }) + + sch.reconcileStale() + + if got := s.State(sched.ID).LastStatus; got != StatusInterrupted { + t.Fatalf("scheduled zombie not reset: %s", got) + } + if got := s.State(staleManual.ID).LastStatus; got != StatusInterrupted { + t.Fatalf("stale manual zombie not reset: %s", got) + } + if got := s.State(freshManual.ID).LastStatus; got != StatusRunning { + t.Fatalf("fresh manual run was reset (may be live in another process): %s", got) + } +} diff --git a/internal/automation/store.go b/internal/automation/store.go new file mode 100644 index 0000000..6e83c22 --- /dev/null +++ b/internal/automation/store.go @@ -0,0 +1,407 @@ +package automation + +import ( + "crypto/rand" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "sync" + "time" + + "github.com/cnjack/jcode/internal/config" +) + +const storeVersion = 1 + +// ErrNotFound is returned (wrapped) when an operation targets an automation id +// that does not exist, so HTTP handlers can map it to 404 rather than 400. +var ErrNotFound = errors.New("automation not found") + +// defsFile is the user-edited definitions; stateFile is the volatile scheduler +// bookkeeping; lockFile is the cross-process advisory write lock guarding both. +const ( + defsFile = "automations.json" + stateFile = "automation-state.json" + lockFile = "automation.lock" +) + +type defsDoc struct { + Version int `json:"version"` + Automations []*Automation `json:"automations"` +} + +type stateDoc struct { + Version int `json:"version"` + State map[string]*RunState `json:"state"` +} + +// Store persists automations across processes. Writes take an OS file lock and +// re-read from disk before mutating, so concurrent jcode processes (web, TUI, +// CLI) never lose updates. Definitions and volatile run-state live in separate +// files so the scheduler's frequent state writes don't collide with human edits. +type Store struct { + dir string + defsPath string + statePath string + lockPath string + + mu sync.RWMutex + defs map[string]*Automation + state map[string]*RunState +} + +// NewStore opens (and lazily creates) the automation store under ~/.jcode. +func NewStore() (*Store, error) { + return NewStoreDir(config.ConfigDir()) +} + +// NewStoreDir opens a store rooted at an explicit directory (used by tests). +func NewStoreDir(dir string) (*Store, error) { + if err := os.MkdirAll(dir, 0o755); err != nil { + return nil, fmt.Errorf("create automation dir: %w", err) + } + s := &Store{ + dir: dir, + defsPath: filepath.Join(dir, defsFile), + statePath: filepath.Join(dir, stateFile), + lockPath: filepath.Join(dir, lockFile), + defs: map[string]*Automation{}, + state: map[string]*RunState{}, + } + s.mu.Lock() + defer s.mu.Unlock() + if err := s.loadLocked(); err != nil { + return nil, err + } + return s, nil +} + +// loadLocked reads both files from disk into memory. A missing file is empty, +// not an error; an unparseable file is logged and treated as empty so a corrupt +// state file can never block startup. +func (s *Store) loadLocked() error { + s.defs = map[string]*Automation{} + s.state = map[string]*RunState{} + + if b, err := os.ReadFile(s.defsPath); err == nil { + var doc defsDoc + if jerr := json.Unmarshal(b, &doc); jerr != nil { + config.Logger().Printf("[automation] corrupt %s, ignoring: %v", defsFile, jerr) + } else { + for _, a := range doc.Automations { + if a != nil && a.ID != "" { + s.defs[a.ID] = a + } + } + } + } else if !os.IsNotExist(err) { + return err + } + + if b, err := os.ReadFile(s.statePath); err == nil { + var doc stateDoc + if jerr := json.Unmarshal(b, &doc); jerr != nil { + config.Logger().Printf("[automation] corrupt %s, rebuilding: %v", stateFile, jerr) + } else { + for id, st := range doc.State { + if st != nil { + s.state[id] = st + } + } + } + } else if !os.IsNotExist(err) { + return err + } + return nil +} + +func (s *Store) persistDefsLocked() error { + doc := defsDoc{Version: storeVersion, Automations: s.listLocked()} + return writeJSONAtomic(s.defsPath, doc) +} + +func (s *Store) persistStateLocked() error { + doc := stateDoc{Version: storeVersion, State: s.state} + return writeJSONAtomic(s.statePath, doc) +} + +// withLock runs fn while holding the cross-process write lock with a fresh +// disk-synced view. fn mutates s.defs/s.state in memory; the requested files are +// then persisted atomically. +func (s *Store) withLock(persistDefs, persistState bool, fn func() error) error { + lock, err := acquireLock(s.lockPath) + if err != nil { + return fmt.Errorf("lock automation store: %w", err) + } + defer func() { _ = lock.release() }() + + s.mu.Lock() + defer s.mu.Unlock() + if err := s.loadLocked(); err != nil { + return err + } + if err := fn(); err != nil { + return err + } + if persistDefs { + if err := s.persistDefsLocked(); err != nil { + return err + } + } + if persistState { + if err := s.persistStateLocked(); err != nil { + return err + } + } + return nil +} + +func (s *Store) listLocked() []*Automation { + out := make([]*Automation, 0, len(s.defs)) + for _, a := range s.defs { + out = append(out, a) + } + sort.Slice(out, func(i, j int) bool { + if out[i].CreatedAt != out[j].CreatedAt { + return out[i].CreatedAt < out[j].CreatedAt + } + return out[i].ID < out[j].ID + }) + return out +} + +// List returns all automations sorted by creation time. +func (s *Store) List() []*Automation { + s.mu.RLock() + defer s.mu.RUnlock() + out := s.listLocked() + cp := make([]*Automation, len(out)) + for i, a := range out { + c := *a + cp[i] = &c + } + return cp +} + +// Get returns a copy of the automation, or nil if not found. +func (s *Store) Get(id string) *Automation { + s.mu.RLock() + defer s.mu.RUnlock() + if a, ok := s.defs[id]; ok { + c := *a + return &c + } + return nil +} + +// State returns a copy of the run-state for an automation (zero value if none). +func (s *Store) State(id string) RunState { + s.mu.RLock() + defer s.mu.RUnlock() + if st, ok := s.state[id]; ok { + return *st + } + return RunState{} +} + +// Create validates, assigns id/timestamps/defaults, and persists a new +// automation. The input is copied; the stored automation is returned. +func (s *Store) Create(a Automation) (*Automation, error) { + if a.Mode == "" { + a.Mode = "full_access" + } + if err := ValidateAutomation(&a); err != nil { + return nil, err + } + now := nowFunc().Format(time.RFC3339) + a.ID = newID() + a.CreatedAt = now + a.UpdatedAt = now + if a.Source == "" { + a.Source = SourceManual + } + a.RunInCloud = false // reserved; never honored in v1 + + stored := a + err := s.withLock(true, false, func() error { + s.defs[stored.ID] = &stored + return nil + }) + if err != nil { + return nil, err + } + c := stored + return &c, nil +} + +// Update applies a mutation to an existing automation under lock, re-validating +// the result. The mutate callback receives a pointer it may modify in place. +// +// Re-enabling (Enabled false -> true) also clears ConsecutiveFails so a +// recovered automation isn't immediately re-disabled by the next single +// failure. Centralizing the reset here means EVERY enable path gets it — the +// web UI's partial-patch PUT (handleUpdateAutomation), the CLI's SetEnabled, and +// any future caller — not just SetEnabled. +func (s *Store) Update(id string, mutate func(*Automation)) (*Automation, error) { + var out *Automation + err := s.withLock(true, true, func() error { + a, ok := s.defs[id] + if !ok { + return fmt.Errorf("automation %q: %w", id, ErrNotFound) + } + wasEnabled := a.Enabled + cp := *a + mutate(&cp) + cp.ID = id // immutable + cp.UpdatedAt = nowFunc().Format(time.RFC3339) + if err := ValidateAutomation(&cp); err != nil { + return err + } + s.defs[id] = &cp + out = &cp + if !wasEnabled && cp.Enabled { + st := s.state[id] + if st == nil { + st = &RunState{} + } else { + c := *st + st = &c + } + st.ConsecutiveFails = 0 + s.state[id] = st + } + return nil + }) + if err != nil { + return nil, err + } + c := *out + return &c, nil +} + +// SetEnabled flips the enabled flag (re-enabling clears ConsecutiveFails via +// Update's shared reset, so a recovered automation isn't immediately +// re-disabled by the next single failure). +func (s *Store) SetEnabled(id string, enabled bool) (*Automation, error) { + return s.Update(id, func(a *Automation) { a.Enabled = enabled }) +} + +// Delete removes an automation and its run-state. +func (s *Store) Delete(id string) error { + return s.withLock(true, true, func() error { + if _, ok := s.defs[id]; !ok { + return fmt.Errorf("automation %q: %w", id, ErrNotFound) + } + delete(s.defs, id) + delete(s.state, id) + return nil + }) +} + +// UpdateState mutates only the volatile run-state file (never the definitions), +// so scheduler/run-completion writes can never clobber a concurrent human edit. +func (s *Store) UpdateState(id string, mutate func(*RunState)) error { + return s.withLock(false, true, func() error { + st := s.state[id] + if st == nil { + st = &RunState{} + } else { + cp := *st + st = &cp + } + mutate(st) + s.state[id] = st + return nil + }) +} + +// TryMarkRunning atomically claims a run for id: it sets LastStatus=running +// (clearing LastError, stamping LastRunAt) only if a run is not ALREADY in +// progress, returning whether the claim succeeded. This is the single +// authoritative guard against overlapping runs across the scheduler, manual +// "Run Now", and other processes — the local in-flight maps are only fast-path +// hints that can't see each other or another process. A crashed run left at +// "running" is cleared by the scheduler's reconcileStale on the next election. +func (s *Store) TryMarkRunning(id string) (bool, error) { + claimed := false + err := s.withLock(false, true, func() error { + st := s.state[id] + if st != nil && st.LastStatus == StatusRunning { + return nil // already running; do not clobber the live run's state + } + if st == nil { + st = &RunState{} + } else { + cp := *st + st = &cp + } + st.LastStatus = StatusRunning + st.LastError = "" + st.LastRunAt = nowFunc().Format(time.RFC3339) + s.state[id] = st + claimed = true + return nil + }) + return claimed, err +} + +// UpdateStateAndMaybeDisable mutates the run-state (e.g. recording a failure and +// bumping ConsecutiveFails) and, in the SAME lock scope, disables the definition +// when ConsecutiveFails has reached AutoDisableThreshold. Folding the disable +// into the run-state mutation closes the TOCTOU window that exists when the +// disable is a separate SetEnabled(false) call: a concurrent successful run can +// no longer reset ConsecutiveFails between the threshold check and the disable. +// Returns whether the definition was disabled by this call. +func (s *Store) UpdateStateAndMaybeDisable(id string, mutate func(*RunState)) (bool, error) { + disabled := false + err := s.withLock(true, true, func() error { + st := s.state[id] + if st == nil { + st = &RunState{} + } else { + cp := *st + st = &cp + } + mutate(st) + s.state[id] = st + if st.ConsecutiveFails >= AutoDisableThreshold { + if a, ok := s.defs[id]; ok && a.Enabled { + cp := *a + cp.Enabled = false + cp.UpdatedAt = nowFunc().Format(time.RFC3339) + s.defs[id] = &cp + disabled = true + } + } + return nil + }) + return disabled, err +} + +// ---- helpers ---- + +func writeJSONAtomic(path string, v any) error { + b, err := json.MarshalIndent(v, "", " ") + if err != nil { + return err + } + tmp := path + ".tmp" + if err := os.WriteFile(tmp, b, 0o600); err != nil { + return err + } + return os.Rename(tmp, path) +} + +func newID() string { + var b [6]byte + if _, err := rand.Read(b[:]); err != nil { + // extremely unlikely; fall back to a time-derived id + return strings.ReplaceAll(nowFunc().Format("150405.000000"), ".", "") + } + return hex.EncodeToString(b[:]) +} diff --git a/internal/automation/store_test.go b/internal/automation/store_test.go new file mode 100644 index 0000000..ff983c7 --- /dev/null +++ b/internal/automation/store_test.go @@ -0,0 +1,222 @@ +package automation + +import ( + "errors" + "os" + "path/filepath" + "testing" +) + +func TestStoreCRUDRoundTrip(t *testing.T) { + dir := t.TempDir() + s, err := NewStoreDir(dir) + if err != nil { + t.Fatal(err) + } + a, err := s.Create(Automation{ + Name: "Nightly", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9}, Enabled: true, + }) + if err != nil { + t.Fatal(err) + } + if a.ID == "" || a.Mode != "full_access" || a.Source != SourceManual { + t.Fatalf("defaults not applied: %+v", a) + } + if got := s.List(); len(got) != 1 { + t.Fatalf("want 1, got %d", len(got)) + } + if s.Get(a.ID) == nil { + t.Fatal("Get returned nil") + } + + upd, err := s.Update(a.ID, func(x *Automation) { x.Name = "Renamed" }) + if err != nil || upd.Name != "Renamed" { + t.Fatalf("update failed: %v %+v", err, upd) + } + + // Persistence across reopen. + s2, err := NewStoreDir(dir) + if err != nil { + t.Fatal(err) + } + if got := s2.Get(a.ID); got == nil || got.Name != "Renamed" { + t.Fatalf("not persisted: %+v", got) + } + + if err := s.Delete(a.ID); err != nil { + t.Fatal(err) + } + if len(s.List()) != 0 { + t.Fatal("delete did not remove") + } +} + +func TestStoreTwoFileSeparation(t *testing.T) { + dir := t.TempDir() + s, _ := NewStoreDir(dir) + a, _ := s.Create(Automation{Name: "n", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerManual}}) + if err := s.UpdateState(a.ID, func(rs *RunState) { rs.LastStatus = StatusSuccess }); err != nil { + t.Fatal(err) + } + if _, err := os.Stat(filepath.Join(dir, defsFile)); err != nil { + t.Fatalf("defs file missing: %v", err) + } + if _, err := os.Stat(filepath.Join(dir, stateFile)); err != nil { + t.Fatalf("state file missing: %v", err) + } + // State survives reopen and lives apart from defs. + s2, _ := NewStoreDir(dir) + if s2.State(a.ID).LastStatus != StatusSuccess { + t.Fatal("state not persisted separately") + } +} + +func TestStoreCorruptStateIgnored(t *testing.T) { + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, stateFile), []byte("{not json"), 0o600); err != nil { + t.Fatal(err) + } + s, err := NewStoreDir(dir) // must not fail on corrupt state + if err != nil { + t.Fatalf("corrupt state should not block open: %v", err) + } + if len(s.List()) != 0 { + t.Fatal("expected empty store") + } +} + +func TestStoreCreateValidates(t *testing.T) { + s, _ := NewStoreDir(t.TempDir()) + if _, err := s.Create(Automation{Name: "", Prompt: "p", ProjectPath: "/x"}); err == nil { + t.Fatal("expected validation error for empty name") + } +} + +// Re-enabling a recovered automation must clear ConsecutiveFails so a single +// later failure doesn't immediately re-disable it (regression: permanent +// re-disable loop). +func TestSetEnabledResetsConsecutiveFails(t *testing.T) { + s, _ := NewStoreDir(t.TempDir()) + a, _ := s.Create(Automation{Name: "n", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9}, Enabled: true}) + + // Drive it to the auto-disable threshold. + disabled, err := s.UpdateStateAndMaybeDisable(a.ID, func(rs *RunState) { + rs.ConsecutiveFails = AutoDisableThreshold + }) + if err != nil || !disabled { + t.Fatalf("expected auto-disable: disabled=%v err=%v", disabled, err) + } + if s.Get(a.ID).Enabled { + t.Fatal("expected definition disabled at threshold") + } + if got := s.State(a.ID).ConsecutiveFails; got < AutoDisableThreshold { + t.Fatalf("ConsecutiveFails = %d, want >= %d", got, AutoDisableThreshold) + } + + // User re-enables: the counter must reset so the next single failure doesn't + // immediately re-disable. + if _, err := s.SetEnabled(a.ID, true); err != nil { + t.Fatal(err) + } + if got := s.State(a.ID).ConsecutiveFails; got != 0 { + t.Fatalf("ConsecutiveFails after re-enable = %d, want 0", got) + } +} + +// UpdateStateAndMaybeDisable must not disable below the threshold, and reports +// whether it disabled. +func TestUpdateStateAndMaybeDisableThreshold(t *testing.T) { + s, _ := NewStoreDir(t.TempDir()) + a, _ := s.Create(Automation{Name: "n", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9}, Enabled: true}) + + for i := 1; i < AutoDisableThreshold; i++ { + disabled, _ := s.UpdateStateAndMaybeDisable(a.ID, func(rs *RunState) { rs.ConsecutiveFails++ }) + if disabled { + t.Fatalf("disabled early at fail %d", i) + } + if !s.Get(a.ID).Enabled { + t.Fatalf("definition disabled early at fail %d", i) + } + } + disabled, _ := s.UpdateStateAndMaybeDisable(a.ID, func(rs *RunState) { rs.ConsecutiveFails++ }) + if !disabled || s.Get(a.ID).Enabled { + t.Fatal("expected disable at threshold") + } +} + +// The web UI re-enables an auto-disabled automation through the partial-patch +// PUT, which routes via Store.Update (not SetEnabled). Update must therefore +// also clear ConsecutiveFails on the disabled->enabled transition, and must NOT +// clear it on an unrelated edit. +func TestUpdateReEnableResetsConsecutiveFails(t *testing.T) { + s, _ := NewStoreDir(t.TempDir()) + a, _ := s.Create(Automation{Name: "n", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9}, Enabled: true}) + + if _, err := s.UpdateStateAndMaybeDisable(a.ID, func(rs *RunState) { + rs.ConsecutiveFails = AutoDisableThreshold + }); err != nil { + t.Fatal(err) + } + if s.Get(a.ID).Enabled { + t.Fatal("expected auto-disable") + } + + // An edit that does NOT re-enable must leave the counter intact. + if _, err := s.Update(a.ID, func(x *Automation) { x.Name = "renamed" }); err != nil { + t.Fatal(err) + } + if got := s.State(a.ID).ConsecutiveFails; got != AutoDisableThreshold { + t.Fatalf("unrelated edit reset the counter: %d", got) + } + + // Re-enabling via Update (the web PUT path) must reset the counter. + if _, err := s.Update(a.ID, func(x *Automation) { x.Enabled = true }); err != nil { + t.Fatal(err) + } + if got := s.State(a.ID).ConsecutiveFails; got != 0 { + t.Fatalf("ConsecutiveFails after web re-enable = %d, want 0", got) + } +} + +// TryMarkRunning is the atomic overlap guard: it claims only when not already +// running, and a terminal status frees the next claim. +func TestTryMarkRunning(t *testing.T) { + s, _ := NewStoreDir(t.TempDir()) + a, _ := s.Create(Automation{Name: "n", Prompt: "p", ProjectPath: t.TempDir(), + Trigger: Trigger{Type: TriggerManual}}) + + if ok, _ := s.TryMarkRunning(a.ID); !ok { + t.Fatal("first claim should succeed") + } + if s.State(a.ID).LastStatus != StatusRunning { + t.Fatal("claim did not mark running") + } + if ok, _ := s.TryMarkRunning(a.ID); ok { + t.Fatal("second claim must fail while running") + } + // Terminal status frees the slot. + _ = s.UpdateState(a.ID, func(rs *RunState) { rs.LastStatus = StatusSuccess }) + if ok, _ := s.TryMarkRunning(a.ID); !ok { + t.Fatal("claim should succeed after a terminal status") + } +} + +// Mutating a missing automation must report ErrNotFound so HTTP handlers can map +// it to 404 rather than 400. +func TestStoreNotFoundSentinel(t *testing.T) { + s, _ := NewStoreDir(t.TempDir()) + if _, err := s.Update("nope", func(*Automation) {}); !errors.Is(err, ErrNotFound) { + t.Fatalf("Update: want ErrNotFound, got %v", err) + } + if _, err := s.SetEnabled("nope", true); !errors.Is(err, ErrNotFound) { + t.Fatalf("SetEnabled: want ErrNotFound, got %v", err) + } + if err := s.Delete("nope"); !errors.Is(err, ErrNotFound) { + t.Fatalf("Delete: want ErrNotFound, got %v", err) + } +} diff --git a/internal/automation/templates.go b/internal/automation/templates.go new file mode 100644 index 0000000..9c327f3 --- /dev/null +++ b/internal/automation/templates.go @@ -0,0 +1,73 @@ +package automation + +// Template is a built-in starting point shown on the Templates page. Selecting +// one pre-fills the editor; the user picks a project and confirms. +type Template struct { + ID string `json:"id"` + Name string `json:"name"` + Description string `json:"description"` + Badge string `json:"badge"` // Daily|Weekly|Manual (display) + Prompt string `json:"prompt"` + Trigger Trigger `json:"trigger"` + SuggestMode string `json:"suggest_mode"` +} + +// BuiltinTemplates returns the curated templates (aligned with the reference UI). +func BuiltinTemplates() []Template { + return []Template{ + { + ID: "issue-triage", + Name: "Issue triage", + Description: "Review the latest GitHub issues and propose priorities and owners.", + Badge: "Daily", + Prompt: "Review the newest open issues in this repository. For each, propose a priority (P0–P3) and a likely owner, and summarize the triage as a short report.", + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9, Minute: 0}, + SuggestMode: "full_access", + }, + { + ID: "changelog-draft", + Name: "Changelog draft", + Description: "Summarize key merged PRs this week into a release-note draft.", + Badge: "Weekly", + Prompt: "Summarize the PRs merged into the main branch over the last 7 days into a concise, user-facing release-note draft grouped by Features / Fixes / Chores.", + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceWeekly, Weekday: 5, Hour: 17, Minute: 0}, + SuggestMode: "full_access", + }, + { + ID: "repo-audit", + Name: "Repo audit", + Description: "Audit open PRs and identify blockers or risky changes.", + Badge: "Manual", + Prompt: "Audit the currently open pull requests. Identify any that are blocked, stale, or contain risky changes, and produce a prioritized action list.", + Trigger: Trigger{Type: TriggerManual}, + SuggestMode: "full_access", + }, + { + ID: "perf-improvements", + Name: "Performance improvements", + Description: "Identify high-impact performance improvements and summarize them.", + Badge: "Weekly", + Prompt: "Identify up to 10 concrete performance improvements in this codebase, ranked by impact and effort. Summarize each with the file and the rationale.", + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceWeekly, Weekday: 1, Hour: 9, Minute: 0}, + SuggestMode: "full_access", + }, + { + ID: "a11y-audit", + Name: "Accessibility audit", + Description: "Review recent changes and summarize any accessibility issues.", + Badge: "Daily", + Prompt: "Review the changes merged in the last day for accessibility issues (labels, contrast, keyboard nav, ARIA) and summarize findings with suggested fixes.", + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 10, Minute: 0}, + SuggestMode: "full_access", + }, + { + ID: "cost-tips", + Name: "Cost tips", + Description: "Get personalized tips to reduce token usage and cost.", + Badge: "Weekly", + Prompt: "Analyze recent agent usage in this project and suggest concrete ways to reduce token usage and cost without losing capability.", + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceWeekly, Weekday: 1, Hour: 8, Minute: 0}, + SuggestMode: "full_access", + }, + } +} diff --git a/internal/automation/types.go b/internal/automation/types.go new file mode 100644 index 0000000..b97685a --- /dev/null +++ b/internal/automation/types.go @@ -0,0 +1,101 @@ +// Package automation implements jcode Automations: scheduled and manual agent +// runs. The package is a leaf domain layer — it owns the data model, validation, +// scheduling math, persistence (two-file + flock), built-in templates, and the +// single-owner scheduler loop. It does NOT depend on web/tui/runner; callers +// inject a Runner to actually execute a run. See docs/automations-prd.md. +package automation + +import "time" + +// TriggerType is how an automation fires. +type TriggerType string + +const ( + // TriggerSchedule fires on a recurring wall-clock cadence. + TriggerSchedule TriggerType = "schedule" + // TriggerManual never fires automatically; only via an explicit run. + TriggerManual TriggerType = "manual" +) + +// Cadence is the recurrence granularity for a scheduled trigger. +type Cadence string + +const ( + CadenceHourly Cadence = "hourly" + CadenceDaily Cadence = "daily" + CadenceWeekly Cadence = "weekly" +) + +// Trigger describes when an automation fires. Times are interpreted in the +// host's local timezone (see ComputeNextRun). +type Trigger struct { + Type TriggerType `json:"type"` + Cadence Cadence `json:"cadence,omitempty"` + Hour int `json:"hour,omitempty"` // 0-23, used by daily/weekly + Minute int `json:"minute,omitempty"` // 0-59, used by all cadences + Weekday int `json:"weekday,omitempty"` // 0=Sun..6=Sat, used by weekly +} + +// Run terminal-status values (mirrored onto session.SessionMeta.TerminalStatus). +const ( + StatusRunning = "running" + StatusSuccess = "success" + StatusError = "error" + StatusInterrupted = "interrupted" + StatusSkipped = "skipped" +) + +// Trigger-kind values stamped onto a run's session. +const ( + KindScheduled = "scheduled" + KindManual = "manual" +) + +// Source values record how an automation was created. +const ( + SourceManual = "manual" + SourceAgent = "agent" + // Template/skill sources are recorded as "template:" / "skill:". +) + +// Automation is the user-edited definition. It persists in +// ~/.jcode/automations.json (low-frequency, human writes). Volatile scheduler +// bookkeeping lives separately in RunState (automation-state.json) so the +// scheduler's frequent writes never collide with human edits. +type Automation struct { + ID string `json:"id"` + Name string `json:"name"` + Prompt string `json:"prompt"` + Trigger Trigger `json:"trigger"` + ProjectPath string `json:"project_path"` // required, must be a local path + Mode string `json:"mode"` // approval|plan|full_access + Provider string `json:"provider,omitempty"` + Model string `json:"model,omitempty"` + RunInCloud bool `json:"run_in_cloud"` // reserved; always false in v1 + Enabled bool `json:"enabled"` + Source string `json:"source"` + CreatedAt string `json:"created_at"` + UpdatedAt string `json:"updated_at"` +} + +// RunState is the volatile per-automation scheduler bookkeeping. It persists in +// ~/.jcode/automation-state.json, written frequently by the scheduler and the +// run-completion callback — kept apart from Automation so the two write paths +// don't clobber each other. +type RunState struct { + LastRunAt string `json:"last_run_at,omitempty"` + LastStatus string `json:"last_status,omitempty"` // running|success|error|interrupted|skipped + LastError string `json:"last_error,omitempty"` + LastSessionID string `json:"last_session_id,omitempty"` + NextRunAt string `json:"next_run_at,omitempty"` + LastFiredSlot string `json:"last_fired_slot,omitempty"` // SlotKey dedup guard (DST fall-back) + ConsecutiveFails int `json:"consecutive_fails,omitempty"` +} + +// AutoDisableThreshold is the number of consecutive failures (missing project, +// provider gone, etc.) after which a scheduled automation auto-disables so it +// stops re-failing — and re-notifying — every night. (PRD §11 open item N.) +const AutoDisableThreshold = 5 + +// nowFunc is overridable in tests; production uses time.Now. +var nowFunc = time.Now diff --git a/internal/automation/validate.go b/internal/automation/validate.go new file mode 100644 index 0000000..457e11c --- /dev/null +++ b/internal/automation/validate.go @@ -0,0 +1,104 @@ +package automation + +import ( + "fmt" + "path/filepath" + "strings" +) + +const ( + maxNameLen = 200 + maxPromptLen = 8000 +) + +// validModes mirrors internal/mode wire ids (approval|plan|full_access). Kept as +// a local set to avoid importing mode just for validation. +var validModes = map[string]bool{"approval": true, "plan": true, "full_access": true} + +// ValidateAutomation is the single validation rule shared by every creation +// path (HTTP API, agent tool, CLI) — mirroring tools.ValidateGoalObjective. It +// mutates nothing; callers assign ID/timestamps/defaults around it. +// +// Key invariants (PRD §7.5, §10.4): a local non-empty ProjectPath is required +// (no-project headless runs are unsafe and unsupported); remote (ssh://docker://) +// targets are rejected for v1. +func ValidateAutomation(a *Automation) error { + if a == nil { + return fmt.Errorf("automation is nil") + } + if strings.TrimSpace(a.Name) == "" { + return fmt.Errorf("name is required") + } + if len(a.Name) > maxNameLen { + return fmt.Errorf("name too long (max %d)", maxNameLen) + } + if strings.TrimSpace(a.Prompt) == "" { + return fmt.Errorf("prompt is required") + } + if len(a.Prompt) > maxPromptLen { + return fmt.Errorf("prompt too long (max %d)", maxPromptLen) + } + if a.Mode != "" && !validModes[a.Mode] { + return fmt.Errorf("invalid mode %q (want approval|plan|full_access)", a.Mode) + } + if err := validateProjectPath(a.ProjectPath); err != nil { + return err + } + return validateTrigger(a.Trigger) +} + +func validateProjectPath(p string) error { + p = strings.TrimSpace(p) + if p == "" { + return fmt.Errorf("project is required (no-project automations cannot run unattended)") + } + if strings.Contains(p, "://") { + return fmt.Errorf("remote workspaces (ssh:// / docker://) are not supported for automations yet") + } + // A relative path would fire against whatever cwd the scheduling process + // happens to have, not the user's intended project. Require an absolute path. + if !filepath.IsAbs(p) { + return fmt.Errorf("project must be an absolute path (got %q)", p) + } + return nil +} + +func validateTrigger(t Trigger) error { + switch t.Type { + case TriggerManual: + return nil + case TriggerSchedule: + // fallthrough to cadence checks + default: + return fmt.Errorf("invalid trigger type %q (want schedule|manual)", t.Type) + } + if t.Minute < 0 || t.Minute > 59 { + return fmt.Errorf("minute out of range 0-59") + } + switch t.Cadence { + case CadenceHourly: + // minute only + case CadenceDaily: + if t.Hour < 0 || t.Hour > 23 { + return fmt.Errorf("hour out of range 0-23") + } + case CadenceWeekly: + if t.Hour < 0 || t.Hour > 23 { + return fmt.Errorf("hour out of range 0-23") + } + if t.Weekday < 0 || t.Weekday > 6 { + return fmt.Errorf("weekday out of range 0-6") + } + default: + return fmt.Errorf("invalid cadence %q (want hourly|daily|weekly)", t.Cadence) + } + return nil +} + +// IsLocalPath reports whether p is a usable local project path (non-empty, not a +// remote scheme). Used at fire time to skip+disable an automation pointed at a +// vanished or remote target. +func IsLocalPath(p string) bool { + p = strings.TrimSpace(p) + return p != "" && !strings.Contains(p, "://") +} diff --git a/internal/automation/validate_test.go b/internal/automation/validate_test.go new file mode 100644 index 0000000..c6930ed --- /dev/null +++ b/internal/automation/validate_test.go @@ -0,0 +1,65 @@ +package automation + +import "testing" + +func validAutomation() Automation { + return Automation{ + Name: "Nightly", + Prompt: "do the thing", + ProjectPath: "/tmp/proj", + Mode: "full_access", + Trigger: Trigger{Type: TriggerSchedule, Cadence: CadenceDaily, Hour: 9, Minute: 0}, + } +} + +func TestValidateAutomation_OK(t *testing.T) { + a := validAutomation() + if err := ValidateAutomation(&a); err != nil { + t.Fatalf("expected valid, got %v", err) + } + // Manual trigger needs no cadence. + m := validAutomation() + m.Trigger = Trigger{Type: TriggerManual} + if err := ValidateAutomation(&m); err != nil { + t.Fatalf("manual should be valid: %v", err) + } +} + +func TestValidateAutomation_Rejections(t *testing.T) { + cases := []struct { + name string + mutate func(*Automation) + }{ + {"empty name", func(a *Automation) { a.Name = " " }}, + {"empty prompt", func(a *Automation) { a.Prompt = "" }}, + {"empty project", func(a *Automation) { a.ProjectPath = "" }}, + {"remote ssh project", func(a *Automation) { a.ProjectPath = "ssh://user@host/path" }}, + {"remote docker project", func(a *Automation) { a.ProjectPath = "docker://c/path" }}, + {"relative project", func(a *Automation) { a.ProjectPath = "relative/dir" }}, + {"dot project", func(a *Automation) { a.ProjectPath = "." }}, + {"bad mode", func(a *Automation) { a.Mode = "yolo" }}, + {"bad trigger type", func(a *Automation) { a.Trigger.Type = "weird" }}, + {"bad cadence", func(a *Automation) { a.Trigger.Cadence = "fortnightly" }}, + {"bad minute", func(a *Automation) { a.Trigger.Minute = 99 }}, + {"bad hour", func(a *Automation) { a.Trigger.Hour = 30 }}, + {"bad weekday", func(a *Automation) { + a.Trigger = Trigger{Type: TriggerSchedule, Cadence: CadenceWeekly, Weekday: 9, Hour: 1, Minute: 0} + }}, + } + for _, c := range cases { + a := validAutomation() + c.mutate(&a) + if err := ValidateAutomation(&a); err == nil { + t.Errorf("%s: expected validation error", c.name) + } + } +} + +func TestIsLocalPath(t *testing.T) { + if !IsLocalPath("/home/x/proj") { + t.Error("local abs path should be usable") + } + if IsLocalPath("") || IsLocalPath("ssh://h/p") || IsLocalPath("docker://c/p") { + t.Error("empty/remote paths should be rejected") + } +} diff --git a/internal/command/acp.go b/internal/command/acp.go index 1f9937c..bc06a28 100644 --- a/internal/command/acp.go +++ b/internal/command/acp.go @@ -358,6 +358,7 @@ func (a *acpAgent) buildAgentSession( env.NewExecuteTool(bgManager), env.NewGrepTool(), env.NewTodoWriteTool(), env.NewTodoReadTool(), env.NewGoalSetTool(), env.NewGoalGetTool(), env.NewGoalUpdateTool(), + env.NewAutomationCreateTool(), env.NewSwitchEnvTool(), env.NewCheckBackgroundTool(bgManager), } diff --git a/internal/command/automation.go b/internal/command/automation.go new file mode 100644 index 0000000..6ca2321 --- /dev/null +++ b/internal/command/automation.go @@ -0,0 +1,166 @@ +package command + +import ( + "fmt" + "os" + "text/tabwriter" + + "github.com/spf13/cobra" + + "github.com/cnjack/jcode/internal/automation" +) + +// NewAutomationCmd builds the `jcode automation` command tree for managing +// automations from the terminal. Definition management works standalone (it +// only touches ~/.jcode/automations.json); periodic firing is owned by a running +// `jcode web` process. +func NewAutomationCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "automation", + Aliases: []string{"automations", "auto"}, + Short: "Manage automations (scheduled and manual agent tasks)", + } + cmd.AddCommand( + newAutomationListCmd(), + newAutomationShowCmd(), + newAutomationTemplatesCmd(), + newAutomationEnableCmd(true), + newAutomationEnableCmd(false), + newAutomationDeleteCmd(), + ) + return cmd +} + +func openAutomationStore() (*automation.Store, error) { + return automation.NewStore() +} + +func newAutomationListCmd() *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "List all automations", + RunE: func(cmd *cobra.Command, args []string) error { + st, err := openAutomationStore() + if err != nil { + return err + } + list := st.List() + if len(list) == 0 { + fmt.Println("No automations yet.") + return nil + } + w := tabwriter.NewWriter(os.Stdout, 0, 2, 2, ' ', 0) + _, _ = fmt.Fprintln(w, "ID\tNAME\tSCHEDULE\tENABLED\tLAST\tPROJECT") + for _, a := range list { + state := st.State(a.ID) + last := state.LastStatus + if last == "" { + last = "-" + } + _, _ = fmt.Fprintf(w, "%s\t%s\t%s\t%v\t%s\t%s\n", + a.ID, a.Name, automation.HumanSchedule(a.Trigger), a.Enabled, last, a.ProjectPath) + } + return w.Flush() + }, + } +} + +func newAutomationShowCmd() *cobra.Command { + return &cobra.Command{ + Use: "show ", + Short: "Show an automation's definition and last run state", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + st, err := openAutomationStore() + if err != nil { + return err + } + a := st.Get(args[0]) + if a == nil { + return fmt.Errorf("automation %q not found", args[0]) + } + state := st.State(a.ID) + fmt.Printf("ID: %s\nName: %s\nSchedule: %s\nEnabled: %v\nProject: %s\nMode: %s\nSource: %s\n", + a.ID, a.Name, automation.HumanSchedule(a.Trigger), a.Enabled, a.ProjectPath, a.Mode, a.Source) + fmt.Printf("Last run: %s (%s)\nNext run: %s\n", nz(state.LastRunAt), nz(state.LastStatus), nz(state.NextRunAt)) + if state.LastError != "" { + fmt.Printf("Last error: %s\n", state.LastError) + } + fmt.Printf("\nPrompt:\n%s\n", a.Prompt) + return nil + }, + } +} + +func newAutomationTemplatesCmd() *cobra.Command { + return &cobra.Command{ + Use: "templates", + Short: "List built-in automation templates", + RunE: func(cmd *cobra.Command, args []string) error { + w := tabwriter.NewWriter(os.Stdout, 0, 2, 2, ' ', 0) + _, _ = fmt.Fprintln(w, "ID\tNAME\tBADGE\tDESCRIPTION") + for _, t := range automation.BuiltinTemplates() { + _, _ = fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", t.ID, t.Name, t.Badge, t.Description) + } + return w.Flush() + }, + } +} + +func newAutomationEnableCmd(enable bool) *cobra.Command { + use, short := "enable ", "Enable an automation" + if !enable { + use, short = "disable ", "Disable an automation" + } + return &cobra.Command{ + Use: use, + Short: short, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + st, err := openAutomationStore() + if err != nil { + return err + } + a, err := st.SetEnabled(args[0], enable) + if err != nil { + return err + } + fmt.Printf("%s is now %s\n", a.Name, enabledWord(a.Enabled)) + return nil + }, + } +} + +func newAutomationDeleteCmd() *cobra.Command { + return &cobra.Command{ + Use: "delete ", + Aliases: []string{"rm"}, + Short: "Delete an automation", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + st, err := openAutomationStore() + if err != nil { + return err + } + if err := st.Delete(args[0]); err != nil { + return err + } + fmt.Printf("Deleted automation %s\n", args[0]) + return nil + }, + } +} + +func enabledWord(b bool) string { + if b { + return "enabled" + } + return "disabled" +} + +func nz(s string) string { + if s == "" { + return "-" + } + return s +} diff --git a/internal/command/interactive.go b/internal/command/interactive.go index 88ddea4..4c12e68 100644 --- a/internal/command/interactive.go +++ b/internal/command/interactive.go @@ -85,6 +85,7 @@ func (s *interactiveState) buildAllTools() []tool.BaseTool { s.env.NewExecuteTool(s.bgManager), s.env.NewGrepTool(), s.env.NewTodoWriteTool(), s.env.NewTodoReadTool(), s.env.NewGoalSetTool(), s.env.NewGoalGetTool(), s.env.NewGoalUpdateTool(), + s.env.NewAutomationCreateTool(), s.env.NewCheckBackgroundTool(s.bgManager), s.env.NewSubagentTool(&tools.SubagentDeps{ ChatModel: s.chatModel, diff --git a/internal/command/web.go b/internal/command/web.go index 7b6e262..c4a23b7 100644 --- a/internal/command/web.go +++ b/internal/command/web.go @@ -20,6 +20,7 @@ import ( "github.com/spf13/cobra" "github.com/cnjack/jcode/internal/agent" + "github.com/cnjack/jcode/internal/automation" "github.com/cnjack/jcode/internal/channel" "github.com/cnjack/jcode/internal/channel/ble" "github.com/cnjack/jcode/internal/config" @@ -74,6 +75,28 @@ func NewWebCmd() *cobra.Command { return cmd } +// interactiveToolNames are the tools that require a live human to answer — they +// cannot run unattended. Automation runs (scheduled, and manual runs that may be +// headless) drop them via dropInteractiveTools so an agent calling ask_user in a +// run with no watching client can't block on the WS channel forever, stalling +// the run until the liveness ceiling cancels it. +var interactiveToolNames = map[string]struct{}{"ask_user": {}} + +// dropInteractiveTools returns tools minus any whose name is in +// interactiveToolNames. Tools whose Info() can't be read are kept (best-effort). +func dropInteractiveTools(tools []tool.BaseTool) []tool.BaseTool { + out := make([]tool.BaseTool, 0, len(tools)) + for _, t := range tools { + if info, err := t.Info(context.Background()); err == nil { + if _, drop := interactiveToolNames[info.Name]; drop { + continue + } + } + out = append(out, t) + } + return out +} + func runWebServer(port int, host string, openBrowser bool) error { // Check if we need setup (no providers configured). needsSetup := config.NeedsSetup() @@ -203,12 +226,30 @@ func runWebServer(port int, host string, openBrowser bool) error { return cm, ctxLimit, nil } + // Automation store (definitions + scheduler state). Skipped in setup mode. + // Created before buildWebTask so every per-task Env shares this one live + // store — the automation_create tool must write through it (not a throwaway) + // so created automations are visible to the REST API and scheduler. + var autoStore *automation.Store + if !needsSetup { + var aerr error + if autoStore, aerr = automation.NewStore(); aerr != nil { + config.Logger().Printf("[automation] store unavailable: %v", aerr) + autoStore = nil + } + } + // buildWebTask is the per-task engine factory. It produces a fully ISOLATED // set of run state — its own env, background manager, recorder, token tracker, // approval state, plan store, and event handler — so concurrent tasks never // share mutable execution state. exec != nil binds the task to a remote SSH // target instead of a local pwd. taskID != "" resumes an existing session. - buildWebTask := func(taskID, taskPwd, modeStr string, exec tools.RemoteExecutor) (*web.EngineConfig, error) { + // interactiveTools are the tool names that require a live human to answer — + // they cannot run unattended, so automation runs (scheduled, and manual runs + // that may be headless) exclude them. An agent in an automation run that calls + // ask_user would otherwise block on the WS channel forever (no client resolves + // it) and stall the run until the liveness ceiling cancels it. + buildWebTask := func(taskID, taskPwd, modeStr string, exec tools.RemoteExecutor, excludeInteractive bool) (*web.EngineConfig, error) { startMode := startupMode if modeStr != "" { startMode = mode.Parse(modeStr) @@ -216,6 +257,7 @@ func runWebServer(port int, host string, openBrowser bool) error { // Fresh execution environment for this task only. tenv := tools.NewEnv(taskPwd, platform) + tenv.AutomationStore = autoStore promptPlatform := platform envLabel := "local" projectKey := taskPwd @@ -289,6 +331,7 @@ func runWebServer(port int, host string, openBrowser bool) error { tenv.NewExecuteTool(tbg), tenv.NewGrepTool(), tenv.NewTodoWriteTool(), tenv.NewTodoReadTool(), tenv.NewGoalSetTool(), tenv.NewGoalGetTool(), tenv.NewGoalUpdateTool(), + tenv.NewAutomationCreateTool(), tenv.NewSwitchEnvTool(), tenv.NewCheckBackgroundTool(tbg), tenv.NewSubagentTool(&tools.SubagentDeps{ @@ -309,6 +352,11 @@ func runWebServer(port int, host string, openBrowser bool) error { if mt := mcpToolsPtr.Load(); mt != nil { all = append(all, (*mt)...) } + // Automation runs are unattended — drop interactive tools that would + // otherwise block on a human who isn't there (see dropInteractiveTools). + if excludeInteractive { + all = dropInteractiveTools(all) + } return all } @@ -490,7 +538,7 @@ func runWebServer(port int, host string, openBrowser bool) error { } // Bootstrap engine for the initial task. - bootEC, err := buildWebTask("", pwd, startupMode.String(), nil) + bootEC, err := buildWebTask("", pwd, startupMode.String(), nil, false) if err != nil { return err } @@ -506,10 +554,16 @@ func runWebServer(port int, host string, openBrowser bool) error { CreateAgent: bootEC.CreateAgent, RebuildForMode: bootEC.RebuildForMode, NewEngine: func(taskID, taskPwd, modeStr string) (*web.EngineConfig, error) { - return buildWebTask(taskID, taskPwd, modeStr, nil) + return buildWebTask(taskID, taskPwd, modeStr, nil, false) }, NewRemoteEngine: func(taskID string, exec tools.RemoteExecutor, remotePwd, modeStr string) (*web.EngineConfig, error) { - return buildWebTask(taskID, remotePwd, modeStr, exec) + return buildWebTask(taskID, remotePwd, modeStr, exec, false) + }, + // NewAutomationEngine builds a headless task engine for automation runs. + // Same as NewEngine but drops interactive tools (ask_user) so an unattended + // run can't stall waiting for a human to answer a question no one is watching. + NewAutomationEngine: func(taskID, taskPwd, modeStr string) (*web.EngineConfig, error) { + return buildWebTask(taskID, taskPwd, modeStr, nil, true) }, InitialMode: startupMode.String(), TodoStore: bootEC.TodoStore, @@ -530,8 +584,18 @@ func runWebServer(port int, host string, openBrowser bool) error { NeedsSetup: needsSetup, TokenUsage: bootEC.TokenUsage, ContextBreakdownFn: bootEC.BreakdownFn, + Automations: autoStore, }) + // Start the periodic automation scheduler. A single process owns periodic + // firing (elected via flock); others return immediately. Manual runs work in + // any process regardless of ownership. The flock is OS-released on exit, so a + // crashed owner never deadlocks the election. + if autoStore != nil { + sched := automation.NewScheduler(autoStore, srv.AutomationRunner()) + go sched.Run(ctx) + } + // Set up inbound WeChat message handler now that srv exists. Always register // regardless of WebEnabled — the user can enable via the UI. Inbound messages // target the active task (no task_id channel). diff --git a/internal/command/web_tools_test.go b/internal/command/web_tools_test.go new file mode 100644 index 0000000..a4084e2 --- /dev/null +++ b/internal/command/web_tools_test.go @@ -0,0 +1,61 @@ +package command + +import ( + "context" + "testing" + + "github.com/cloudwego/eino/components/tool" + "github.com/cloudwego/eino/schema" +) + +// stubTool is a minimal tool.BaseTool (Info only) for testing tool-set filtering +// without standing up a real Env / model. +type stubTool struct { + name string +} + +func (s stubTool) Info(_ context.Context) (*schema.ToolInfo, error) { + return &schema.ToolInfo{Name: s.name}, nil +} + +// Tools implementing BaseTool compile as tool.BaseTool. +var _ tool.BaseTool = stubTool{} + +// TestDropInteractiveTools verifies that automation runs drop tools that require a +// live human (ask_user) while keeping every other tool. This is the guard against +// an unattended run stalling on a question nobody is watching. +func TestDropInteractiveTools(t *testing.T) { + all := []tool.BaseTool{ + stubTool{name: "read"}, + stubTool{name: "ask_user"}, + stubTool{name: "edit"}, + stubTool{name: "execute"}, + } + + got := dropInteractiveTools(all) + if len(got) != 3 { + t.Fatalf("want 3 tools after dropping ask_user, got %d", len(got)) + } + for _, tl := range got { + info, err := tl.Info(context.Background()) + if err != nil { + t.Fatalf("Info: %v", err) + } + if info.Name == "ask_user" { + t.Fatalf("ask_user was not dropped from automation tool set") + } + } +} + +// TestDropInteractiveToolsKeepsAllWhenNoInteractive confirms the filter is a no-op +// for a tool set with nothing to drop (so normal task tool lists are unaffected). +func TestDropInteractiveToolsKeepsAllWhenNoInteractive(t *testing.T) { + all := []tool.BaseTool{ + stubTool{name: "read"}, + stubTool{name: "edit"}, + } + got := dropInteractiveTools(all) + if len(got) != len(all) { + t.Fatalf("want %d tools (nothing to drop), got %d", len(all), len(got)) + } +} diff --git a/internal/session/session.go b/internal/session/session.go index 4167338..fe0232a 100644 --- a/internal/session/session.go +++ b/internal/session/session.go @@ -114,6 +114,17 @@ type SessionMeta struct { Unread bool `json:"unread,omitempty"` Status string `json:"status,omitempty"` // idle/running/done/error (set by the web layer) UpdatedAt string `json:"updated_at,omitempty"` // RFC3339 + // Automation metadata. A run launched by an automation is a normal session + // tagged here: AutomationID is the correlation key for the "Recent runs" + // list, and the main task list excludes any session with AutomationID set so + // nightly runs don't pollute the sidebar. TerminalStatus/EndTime/ErrorReason + // are the run-outcome audit fields (success|error|interrupted) that back the + // Status filter — Status alone is only idle/running. + AutomationID string `json:"automation_id,omitempty"` + TriggerKind string `json:"trigger_kind,omitempty"` // scheduled|manual + TerminalStatus string `json:"terminal_status,omitempty"` + EndTime string `json:"end_time,omitempty"` + ErrorReason string `json:"error_reason,omitempty"` } // sessionIndex is the on-disk structure of session.json. diff --git a/internal/tools/automation_tool.go b/internal/tools/automation_tool.go new file mode 100644 index 0000000..675764f --- /dev/null +++ b/internal/tools/automation_tool.go @@ -0,0 +1,113 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/cloudwego/eino/components/tool" + "github.com/cloudwego/eino/schema" + + "github.com/cnjack/jcode/internal/automation" +) + +type automationCreateInput struct { + Name string `json:"name"` + Prompt string `json:"prompt"` + Cadence string `json:"cadence"` // hourly|daily|weekly|manual + Hour int `json:"hour"` // 0-23 (daily/weekly) + Minute int `json:"minute"` // 0-59 + Weekday int `json:"weekday"` // 0=Sun..6=Sat (weekly) + ProjectPath string `json:"project_path"` // defaults to the current working directory +} + +// NewAutomationCreateTool creates the automation_create tool. The agent can +// PROPOSE an automation from natural language, but the automation is always +// created DISABLED with source="agent": only the user can arm it (enable it) on +// the Automations page. This human-in-the-loop gate means a prompt-injected +// agent can never silently stand up a recurring, unattended, auto-approving run. +func (e *Env) NewAutomationCreateTool() tool.InvokableTool { + info := &schema.ToolInfo{ + Name: "automation_create", + Desc: `Propose a new automation (a scheduled or manual agent task) for the user. + +The automation is created DISABLED and will NOT run until the user reviews it and enables it on the Automations page — you cannot arm it yourself. Use this when the user asks to run something on a recurring schedule (e.g. "every morning summarize new issues") or to save a reusable manual task. + +cadence must be one of: "hourly" (uses minute), "daily" (uses hour+minute), "weekly" (uses weekday+hour+minute), or "manual" (no schedule). It runs in the current project by default.`, + ParamsOneOf: schema.NewParamsOneOfByParams(map[string]*schema.ParameterInfo{ + "name": {Type: schema.String, Desc: "Short human-readable name for the automation.", Required: true}, + "prompt": {Type: schema.String, Desc: "The instruction the agent will run each time the automation fires.", Required: true}, + "cadence": {Type: schema.String, Desc: `One of "hourly", "daily", "weekly", "manual".`, Required: true}, + "hour": {Type: schema.Integer, Desc: "Hour of day 0-23 (daily/weekly)."}, + "minute": {Type: schema.Integer, Desc: "Minute of hour 0-59."}, + "weekday": {Type: schema.Integer, Desc: "Day of week 0=Sunday..6=Saturday (weekly)."}, + "project_path": {Type: schema.String, Desc: "Absolute local project path; defaults to the current working directory."}, + }), + } + return &automationCreateTool{env: e, info: info} +} + +type automationCreateTool struct { + env *Env + info *schema.ToolInfo +} + +func (t *automationCreateTool) Info(_ context.Context) (*schema.ToolInfo, error) { return t.info, nil } + +func (t *automationCreateTool) InvokableRun(_ context.Context, argumentsInJSON string, _ ...tool.Option) (string, error) { + var in automationCreateInput + if err := json.Unmarshal([]byte(argumentsInJSON), &in); err != nil { + return "", fmt.Errorf("failed to parse automation_create input: %w", err) + } + + trigger, err := triggerFromCadence(in) + if err != nil { + return "", err + } + project := in.ProjectPath + if project == "" { + project = t.env.Pwd() + } + + // Write through the server's live store so the new automation is immediately + // visible to the REST API and scheduler (a throwaway store would only touch + // disk, leaving the server's in-memory cache stale). Fall back to a fresh + // store in contexts with no live server (CLI/ACP). + store := t.env.AutomationStore + if store == nil { + var err error + if store, err = automation.NewStore(); err != nil { + return "", fmt.Errorf("automation store unavailable: %w", err) + } + } + created, err := store.Create(automation.Automation{ + Name: in.Name, + Prompt: in.Prompt, + Trigger: trigger, + ProjectPath: project, + Mode: "full_access", + Source: automation.SourceAgent, + Enabled: false, // human-in-the-loop: the user must enable it + }) + if err != nil { + return "", err + } + return fmt.Sprintf( + "Proposed automation %q (%s) — created DISABLED. Ask the user to review and enable it on the Automations page; it will not run until they do. (id: %s)", + created.Name, automation.HumanSchedule(created.Trigger), created.ID), nil +} + +func triggerFromCadence(in automationCreateInput) (automation.Trigger, error) { + switch in.Cadence { + case "manual", "": + return automation.Trigger{Type: automation.TriggerManual}, nil + case "hourly": + return automation.Trigger{Type: automation.TriggerSchedule, Cadence: automation.CadenceHourly, Minute: in.Minute}, nil + case "daily": + return automation.Trigger{Type: automation.TriggerSchedule, Cadence: automation.CadenceDaily, Hour: in.Hour, Minute: in.Minute}, nil + case "weekly": + return automation.Trigger{Type: automation.TriggerSchedule, Cadence: automation.CadenceWeekly, Weekday: in.Weekday, Hour: in.Hour, Minute: in.Minute}, nil + default: + return automation.Trigger{}, fmt.Errorf("invalid cadence %q (want hourly|daily|weekly|manual)", in.Cadence) + } +} diff --git a/internal/tools/automation_tool_test.go b/internal/tools/automation_tool_test.go new file mode 100644 index 0000000..2236275 --- /dev/null +++ b/internal/tools/automation_tool_test.go @@ -0,0 +1,40 @@ +package tools + +import ( + "context" + "testing" + + "github.com/cnjack/jcode/internal/automation" +) + +// The automation_create tool must write through the Env's live store so the +// created automation is immediately visible to the server's in-memory cache, +// REST API, and scheduler — not just persisted to disk via a throwaway store. +func TestAutomationCreateTool_UsesEnvStore(t *testing.T) { + store, err := automation.NewStoreDir(t.TempDir()) + if err != nil { + t.Fatal(err) + } + env := NewEnv(t.TempDir(), "darwin/arm64") + env.AutomationStore = store + + tl := env.NewAutomationCreateTool() + out, err := tl.InvokableRun(context.Background(), + `{"name":"Nightly","prompt":"do the thing","cadence":"daily","hour":9,"minute":0,"project_path":"`+t.TempDir()+`"}`) + if err != nil { + t.Fatalf("InvokableRun: %v (%s)", err, out) + } + + // Visible in the SAME store instance the server would serve from. + list := store.List() + if len(list) != 1 { + t.Fatalf("want 1 automation in the live store, got %d", len(list)) + } + got := list[0] + if got.Enabled { + t.Fatal("agent-created automation must be DISABLED (human-in-the-loop)") + } + if got.Source != automation.SourceAgent { + t.Fatalf("source = %q, want %q", got.Source, automation.SourceAgent) + } +} diff --git a/internal/tools/env.go b/internal/tools/env.go index 43b85c6..c7dd54c 100644 --- a/internal/tools/env.go +++ b/internal/tools/env.go @@ -11,6 +11,7 @@ import ( "strings" "time" + "github.com/cnjack/jcode/internal/automation" appconfig "github.com/cnjack/jcode/internal/config" "golang.org/x/crypto/ssh" ) @@ -29,6 +30,13 @@ type Env struct { OnEnvChange func(envLabel string, isLocal bool, err error) Depth int // subagent nesting depth, 0 for top-level + // AutomationStore is the process-wide automation store shared with the web + // server and its scheduler. The automation_create tool MUST write through it + // (not a throwaway automation.NewStore()) so a created automation is visible + // to the server's in-memory cache, its REST API, and the scheduler. nil falls + // back to opening a fresh store (CLI/ACP contexts with no live server). + AutomationStore *automation.Store + // origExec and origPwd remember the initial executor state so that // ResetToLocal can restore the correct local executor after SSH. origExec Executor diff --git a/internal/web/automation_api.go b/internal/web/automation_api.go new file mode 100644 index 0000000..b9b5ca3 --- /dev/null +++ b/internal/web/automation_api.go @@ -0,0 +1,289 @@ +package web + +import ( + "encoding/json" + "errors" + "net/http" + "sort" + "strconv" + + "github.com/cnjack/jcode/internal/automation" + "github.com/cnjack/jcode/internal/session" +) + +// defaultRunsLimit bounds how many automation runs handleListAutomationRuns +// returns when the client does not pass an explicit ?limit. +const defaultRunsLimit = 100 + +// automationItem is an automation definition plus derived display fields and its +// volatile run-state, as returned to the web UI. +type automationItem struct { + automation.Automation + HumanSchedule string `json:"human_schedule"` + Badge string `json:"badge"` + State automation.RunState `json:"state"` +} + +func (s *Server) autoStore(w http.ResponseWriter) (*automation.Store, bool) { + if s.automations == nil { + writeJSON(w, http.StatusServiceUnavailable, map[string]string{"error": "automations unavailable (setup mode)"}) + return nil, false + } + return s.automations, true +} + +func toItem(st *automation.Store, a *automation.Automation) automationItem { + return automationItem{ + Automation: *a, + HumanSchedule: automation.HumanSchedule(a.Trigger), + Badge: automation.Badge(a.Trigger), + State: st.State(a.ID), + } +} + +func (s *Server) handleListAutomations(w http.ResponseWriter, r *http.Request) { + st, ok := s.autoStore(w) + if !ok { + return + } + list := st.List() + items := make([]automationItem, 0, len(list)) + for _, a := range list { + items = append(items, toItem(st, a)) + } + writeJSON(w, http.StatusOK, items) +} + +func (s *Server) handleGetAutomation(w http.ResponseWriter, r *http.Request) { + st, ok := s.autoStore(w) + if !ok { + return + } + a := st.Get(r.PathValue("id")) + if a == nil { + writeJSON(w, http.StatusNotFound, map[string]string{"error": "automation not found"}) + return + } + writeJSON(w, http.StatusOK, toItem(st, a)) +} + +func (s *Server) handleCreateAutomation(w http.ResponseWriter, r *http.Request) { + st, ok := s.autoStore(w) + if !ok { + return + } + var req struct { + automation.Automation + RunNow bool `json:"run_now"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"}) + return + } + if req.Source == "" { + req.Source = automation.SourceManual + } + created, err := st.Create(req.Automation) + if err != nil { + writeJSON(w, http.StatusBadRequest, map[string]string{"error": err.Error()}) + return + } + if req.RunNow { + // A freshly-created automation has a brand-new id, so the claim always + // succeeds; ignore the result. + s.runAutomationAsync(created) + } + writeJSON(w, http.StatusOK, toItem(st, created)) +} + +func (s *Server) handleUpdateAutomation(w http.ResponseWriter, r *http.Request) { + st, ok := s.autoStore(w) + if !ok { + return + } + // PUT is a partial patch (the TS client sends Partial): a field + // is only overwritten when present in the body. Pointer fields distinguish + // "omitted" from "zero value", so editing an automation that carries a + // provider/model override — or is paused — never silently clears it. + var req struct { + Name *string `json:"name"` + Prompt *string `json:"prompt"` + Trigger *automation.Trigger `json:"trigger"` + ProjectPath *string `json:"project_path"` + Mode *string `json:"mode"` + Provider *string `json:"provider"` + Model *string `json:"model"` + Enabled *bool `json:"enabled"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"}) + return + } + updated, err := st.Update(r.PathValue("id"), func(a *automation.Automation) { + if req.Name != nil { + a.Name = *req.Name + } + if req.Prompt != nil { + a.Prompt = *req.Prompt + } + if req.Trigger != nil { + a.Trigger = *req.Trigger + } + if req.ProjectPath != nil { + a.ProjectPath = *req.ProjectPath + } + if req.Mode != nil { + a.Mode = *req.Mode + } + if req.Provider != nil { + a.Provider = *req.Provider + } + if req.Model != nil { + a.Model = *req.Model + } + if req.Enabled != nil { + a.Enabled = *req.Enabled + } + }) + if err != nil { + status := http.StatusBadRequest + if errors.Is(err, automation.ErrNotFound) { + status = http.StatusNotFound + } + writeJSON(w, status, map[string]string{"error": err.Error()}) + return + } + writeJSON(w, http.StatusOK, toItem(st, updated)) +} + +func (s *Server) handleDeleteAutomation(w http.ResponseWriter, r *http.Request) { + st, ok := s.autoStore(w) + if !ok { + return + } + if err := st.Delete(r.PathValue("id")); err != nil { + writeJSON(w, http.StatusNotFound, map[string]string{"error": err.Error()}) + return + } + writeJSON(w, http.StatusOK, map[string]string{"status": "deleted"}) +} + +func (s *Server) handleRunAutomation(w http.ResponseWriter, r *http.Request) { + st, ok := s.autoStore(w) + if !ok { + return + } + a := st.Get(r.PathValue("id")) + if a == nil { + writeJSON(w, http.StatusNotFound, map[string]string{"error": "automation not found"}) + return + } + // Reject a manual run if one is already in flight (this server's manual guard) + // or a scheduled run is currently executing (shared run-state), so a + // double-click or a run-now racing a scheduled fire can't spawn parallel + // sessions mutating the same project. + if st.State(a.ID).LastStatus == automation.StatusRunning || !s.runAutomationAsync(a) { + writeJSON(w, http.StatusConflict, map[string]string{"error": "a run is already in progress for this automation"}) + return + } + writeJSON(w, http.StatusAccepted, map[string]string{"status": "started"}) +} + +// runAutomationAsync fires a manual run in the background, recording terminal +// state via the shared ExecuteRun bookkeeping. It claims a per-automation +// in-flight slot first and returns false without starting if one is already +// held (concurrency guard); the slot is released when the run completes. +func (s *Server) runAutomationAsync(a *automation.Automation) bool { + s.autoRunMu.Lock() + if s.autoRunInflight == nil { + s.autoRunInflight = make(map[string]bool) + } + if s.autoRunInflight[a.ID] { + s.autoRunMu.Unlock() + return false + } + s.autoRunInflight[a.ID] = true + s.autoRunMu.Unlock() + + go func() { + defer func() { + s.autoRunMu.Lock() + delete(s.autoRunInflight, a.ID) + s.autoRunMu.Unlock() + }() + ctx := s.rootCtx() + if ctx == nil { + return + } + _, _ = automation.ExecuteRun(ctx, s.automations, s.AutomationRunner(), a, automation.KindManual) + }() + return true +} + +func (s *Server) handleAutomationTemplates(w http.ResponseWriter, r *http.Request) { + writeJSON(w, http.StatusOK, automation.BuiltinTemplates()) +} + +// automationRun is one entry in "Recent runs". +type automationRun struct { + SessionID string `json:"session_id"` + AutomationID string `json:"automation_id"` + Title string `json:"title"` + Project string `json:"project"` + TriggerKind string `json:"trigger_kind"` + StartTime string `json:"start_time"` + EndTime string `json:"end_time,omitempty"` + TerminalStatus string `json:"terminal_status,omitempty"` + Status string `json:"status,omitempty"` + ErrorReason string `json:"error_reason,omitempty"` +} + +func (s *Server) handleListAutomationRuns(w http.ResponseWriter, r *http.Request) { + q := r.URL.Query() + filter := q.Get("automation_id") + before := q.Get("before") // RFC3339 cursor: only runs that started strictly before + limit := defaultRunsLimit + if v := q.Get("limit"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 { + limit = n + } + } + all, err := session.ListAllSessions() + if err != nil { + writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()}) + return + } + runs := make([]automationRun, 0) + for project, metas := range all { + for _, m := range metas { + if m.AutomationID == "" { + continue + } + if filter != "" && m.AutomationID != filter { + continue + } + if before != "" && m.StartTime >= before { + continue + } + runs = append(runs, automationRun{ + SessionID: m.UUID, + AutomationID: m.AutomationID, + Title: m.Title, + Project: project, + TriggerKind: m.TriggerKind, + StartTime: m.StartTime, + EndTime: m.EndTime, + TerminalStatus: m.TerminalStatus, + Status: m.Status, + ErrorReason: m.ErrorReason, + }) + } + } + sort.Slice(runs, func(i, j int) bool { return runs[i].StartTime > runs[j].StartTime }) + // Bound the response (newest first). The underlying scan is still O(total + // sessions); a dedicated automation-runs index is a documented follow-up. + if len(runs) > limit { + runs = runs[:limit] + } + writeJSON(w, http.StatusOK, runs) +} diff --git a/internal/web/automation_api_test.go b/internal/web/automation_api_test.go new file mode 100644 index 0000000..f9484f8 --- /dev/null +++ b/internal/web/automation_api_test.go @@ -0,0 +1,202 @@ +package web + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/cnjack/jcode/internal/automation" +) + +func newAutomationTestServer(t *testing.T) *Server { + t.Helper() + store, err := automation.NewStoreDir(t.TempDir()) + if err != nil { + t.Fatal(err) + } + return &Server{automations: store} +} + +func TestAutomationAPI_CRUD(t *testing.T) { + s := newAutomationTestServer(t) + proj := t.TempDir() + + // Create + body := `{"name":"Nightly","prompt":"do the thing","project_path":"` + proj + + `","trigger":{"type":"schedule","cadence":"daily","hour":9,"minute":0},"enabled":true}` + rec := httptest.NewRecorder() + s.handleCreateAutomation(rec, httptest.NewRequest(http.MethodPost, "/api/automations", strings.NewReader(body))) + if rec.Code != http.StatusOK { + t.Fatalf("create: status %d body %s", rec.Code, rec.Body.String()) + } + var created struct { + ID string `json:"id"` + Name string `json:"name"` + HumanSchedule string `json:"human_schedule"` + Badge string `json:"badge"` + } + if err := json.Unmarshal(rec.Body.Bytes(), &created); err != nil { + t.Fatal(err) + } + if created.ID == "" || created.Name != "Nightly" || created.Badge != "Daily" { + t.Fatalf("unexpected created: %+v", created) + } + + // List + rec = httptest.NewRecorder() + s.handleListAutomations(rec, httptest.NewRequest(http.MethodGet, "/api/automations", nil)) + var list []map[string]any + _ = json.Unmarshal(rec.Body.Bytes(), &list) + if len(list) != 1 { + t.Fatalf("list: want 1 got %d", len(list)) + } + + // Update (rename) + upd := `{"name":"Renamed","prompt":"do the thing","project_path":"` + proj + + `","trigger":{"type":"schedule","cadence":"daily","hour":10,"minute":30},"enabled":false}` + rec = httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPut, "/api/automations/"+created.ID, strings.NewReader(upd)) + req.SetPathValue("id", created.ID) + s.handleUpdateAutomation(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("update: status %d body %s", rec.Code, rec.Body.String()) + } + if s.automations.Get(created.ID).Name != "Renamed" { + t.Fatal("rename not applied") + } + + // Delete + rec = httptest.NewRecorder() + req = httptest.NewRequest(http.MethodDelete, "/api/automations/"+created.ID, nil) + req.SetPathValue("id", created.ID) + s.handleDeleteAutomation(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("delete: status %d", rec.Code) + } + if len(s.automations.List()) != 0 { + t.Fatal("delete did not remove") + } +} + +func TestAutomationAPI_UpdateIsPartialPatch(t *testing.T) { + s := newAutomationTestServer(t) + proj := t.TempDir() + // Create with a provider/model override, enabled. + a, err := s.automations.Create(automation.Automation{ + Name: "N", Prompt: "p", ProjectPath: proj, Provider: "openai", Model: "gpt-x", + Enabled: true, Trigger: automation.Trigger{Type: automation.TriggerSchedule, Cadence: automation.CadenceDaily, Hour: 9}, + }) + if err != nil { + t.Fatal(err) + } + // PUT a body that OMITS provider/model/enabled (what the editor sends when + // pausing/editing). They must be preserved, not wiped. + body := `{"name":"N2","prompt":"p2","project_path":"` + proj + + `","trigger":{"type":"schedule","cadence":"daily","hour":10,"minute":0}}` + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPut, "/api/automations/"+a.ID, strings.NewReader(body)) + req.SetPathValue("id", a.ID) + s.handleUpdateAutomation(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("update: status %d body %s", rec.Code, rec.Body.String()) + } + got := s.automations.Get(a.ID) + if got.Name != "N2" || got.Trigger.Hour != 10 { + t.Fatalf("patched fields not applied: %+v", got) + } + if got.Provider != "openai" || got.Model != "gpt-x" { + t.Fatalf("provider/model override was wiped: %+v", got) + } + if !got.Enabled { + t.Fatal("enabled was flipped by an omitted field") + } +} + +func TestAutomationAPI_CreateValidationError(t *testing.T) { + s := newAutomationTestServer(t) + // Empty project must be rejected (no-project automations can't run unattended). + body := `{"name":"X","prompt":"p","project_path":"","trigger":{"type":"manual"}}` + rec := httptest.NewRecorder() + s.handleCreateAutomation(rec, httptest.NewRequest(http.MethodPost, "/api/automations", strings.NewReader(body))) + if rec.Code != http.StatusBadRequest { + t.Fatalf("want 400, got %d (%s)", rec.Code, rec.Body.String()) + } +} + +// Updating a non-existent automation must return 404, not 400, so clients can +// distinguish a missing resource from a validation error. +func TestAutomationAPI_UpdateNotFound(t *testing.T) { + s := newAutomationTestServer(t) + body := `{"name":"x"}` + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPut, "/api/automations/nope", strings.NewReader(body)) + req.SetPathValue("id", "nope") + s.handleUpdateAutomation(rec, req) + if rec.Code != http.StatusNotFound { + t.Fatalf("want 404, got %d (%s)", rec.Code, rec.Body.String()) + } +} + +// A manual "Run Now" must be rejected with 409 while a run for the same +// automation is already in flight (scheduled run recorded as running, or a +// manual run already claimed), so a double-click can't spawn parallel sessions. +func TestAutomationAPI_RunNowConflict(t *testing.T) { + s := newAutomationTestServer(t) + proj := t.TempDir() + a, err := s.automations.Create(automation.Automation{ + Name: "n", Prompt: "p", ProjectPath: proj, Trigger: automation.Trigger{Type: automation.TriggerManual}, + }) + if err != nil { + t.Fatal(err) + } + + // Case 1: a scheduled (or prior) run is recorded as running. + _ = s.automations.UpdateState(a.ID, func(rs *automation.RunState) { rs.LastStatus = automation.StatusRunning }) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/api/automations/"+a.ID+"/run", nil) + req.SetPathValue("id", a.ID) + s.handleRunAutomation(rec, req) + if rec.Code != http.StatusConflict { + t.Fatalf("want 409 (run in progress), got %d (%s)", rec.Code, rec.Body.String()) + } + + // Case 2: state clear, but a manual run already holds the in-flight slot. + _ = s.automations.UpdateState(a.ID, func(rs *automation.RunState) { rs.LastStatus = "" }) + s.autoRunMu.Lock() + if s.autoRunInflight == nil { + s.autoRunInflight = map[string]bool{} + } + s.autoRunInflight[a.ID] = true + s.autoRunMu.Unlock() + rec = httptest.NewRecorder() + req = httptest.NewRequest(http.MethodPost, "/api/automations/"+a.ID+"/run", nil) + req.SetPathValue("id", a.ID) + s.handleRunAutomation(rec, req) + if rec.Code != http.StatusConflict { + t.Fatalf("want 409 (already claimed), got %d (%s)", rec.Code, rec.Body.String()) + } +} + +func TestAutomationAPI_Templates(t *testing.T) { + s := newAutomationTestServer(t) + rec := httptest.NewRecorder() + s.handleAutomationTemplates(rec, httptest.NewRequest(http.MethodGet, "/api/automation-templates", nil)) + var tpls []map[string]any + if err := json.Unmarshal(rec.Body.Bytes(), &tpls); err != nil { + t.Fatal(err) + } + if len(tpls) < 6 { + t.Fatalf("want >=6 templates, got %d", len(tpls)) + } +} + +func TestAutomationAPI_SetupModeUnavailable(t *testing.T) { + s := &Server{} // no automations store (setup mode) + rec := httptest.NewRecorder() + s.handleListAutomations(rec, httptest.NewRequest(http.MethodGet, "/api/automations", nil)) + if rec.Code != http.StatusServiceUnavailable { + t.Fatalf("want 503 in setup mode, got %d", rec.Code) + } +} diff --git a/internal/web/automation_run.go b/internal/web/automation_run.go new file mode 100644 index 0000000..6e89e9b --- /dev/null +++ b/internal/web/automation_run.go @@ -0,0 +1,177 @@ +package web + +import ( + "context" + "fmt" + "os" + "time" + + "github.com/cnjack/jcode/internal/automation" + "github.com/cnjack/jcode/internal/handler" + "github.com/cnjack/jcode/internal/session" +) + +// AutomationRunner adapts the web Server to automation.Runner so the scheduler +// (and the CLI, via the web server) can execute a run by reusing the Engine. +func (s *Server) AutomationRunner() automation.Runner { + return automationRunner{s: s} +} + +type automationRunner struct{ s *Server } + +func (r automationRunner) StartRun(ctx context.Context, a *automation.Automation, kind string) (string, error) { + return r.s.runAutomation(ctx, a, kind) +} + +// doneCapture wraps an event handler to capture the run's terminal error while +// preserving the wrapped handler's behavior (it still drives WS broadcast and +// WeChat/BLE completion notifications). The terminal error is delivered exactly +// once on the buffered channel. +type doneCapture struct { + handler.AgentEventHandler + done chan error +} + +func (d *doneCapture) OnAgentDone(err error) { + d.AgentEventHandler.OnAgentDone(err) + select { + case d.done <- err: + default: + } +} + +// runAutomation executes one automation run to completion by building a fresh, +// throwaway headless Engine, injecting the prompt, and blocking until the agent +// is done. The run is recorded as a normal session tagged with the automation id +// and trigger kind. Because there is no idle-evict, the engine is torn down on +// completion. Scheduled runs are forced to full_access (headless approvals would +// hang); ctx carries the liveness ceiling for scheduled fires. +func (s *Server) runAutomation(ctx context.Context, a *automation.Automation, kind string) (string, error) { + if s.newEngine == nil { + return "", fmt.Errorf("automation runs are unavailable (setup mode)") + } + if info, err := os.Stat(a.ProjectPath); err != nil || !info.IsDir() { + return "", fmt.Errorf("project path is missing or not a directory: %s", a.ProjectPath) + } + + mode := a.Mode + if a.Trigger.Type == automation.TriggerSchedule || mode == "" { + mode = "full_access" // headless: Ask/Plan would block forever on approvals + } + + // Automation runs are unattended, so they must use a headless engine that + // drops interactive tools (ask_user). An agent calling ask_user in a run with + // no watching client would otherwise block on the WS channel forever, stalling + // the run until the liveness ceiling cancels it. Falls back to the regular + // local-engine factory when the dedicated headless one isn't wired (setup mode). + eng, err := s.buildLocalEngineWith("", a.ProjectPath, mode, + func(taskID, pwd, modeStr string) (*EngineConfig, error) { + factory := s.newAutomationEngine + if factory == nil { + factory = s.newEngine + } + return factory(taskID, pwd, modeStr) + }) + if err != nil { + return "", err + } + sid := eng.taskID + + // Provider/model override (otherwise inherits the foreground/startup model). + if a.Provider != "" && eng.createAgent != nil { + _, curMdl, _ := eng.modelSnapshot() + mdl := a.Model + if mdl == "" { + mdl = curMdl + } + if ag, agErr := eng.createAgent(a.Provider, mdl); agErr == nil { + eng.applyModelSwitch(ag, a.Provider, mdl) + } + } + + // Wrap the event handler to capture the run's terminal error without + // disturbing the existing notifier chain. + done := make(chan error, 1) + eng.emu.Lock() + eng.eventHandler = &doneCapture{AgentEventHandler: eng.eventHandler, done: done} + eng.emu.Unlock() + + if !eng.running.CompareAndSwap(false, true) { + s.deleteEngine(sid) + return sid, fmt.Errorf("engine busy") + } + _ = s.submitMessage(eng, a.Prompt, mode, "automation", sid, nil) + s.stampAutomationMeta(sid, a, kind) + + var runErr error + completed := false + select { + case runErr = <-done: + completed = true + case <-ctx.Done(): + // Liveness ceiling hit or server shutting down: cancel the in-flight run, + // then give it a moment to flush a terminal record. + eng.emu.Lock() + cancel := eng.runCancel + eng.emu.Unlock() + if cancel != nil { + cancel() + } + select { + case runErr = <-done: + completed = true + case <-time.After(3 * time.Second): + runErr = ctx.Err() + } + } + + s.finalizeAutomationMeta(sid, runErr) + if completed { + s.deleteEngine(sid) + } else { + // The run goroutine is still live after the cancel; tearing down now would + // Close the recorder under a live writer and truncate the session. Drain in + // the background and reclaim the engine only once the run actually finishes. + go func() { + <-done + s.deleteEngine(sid) + }() + } + return sid, runErr +} + +// stampAutomationMeta tags a run's session with its automation id, trigger kind +// and a title, so it surfaces in "Recent runs" and is excluded from the main +// task list. +func (s *Server) stampAutomationMeta(sessionID string, a *automation.Automation, kind string) { + _, _ = session.UpdateSessionMeta(sessionID, func(m *session.SessionMeta) { + m.AutomationID = a.ID + m.TriggerKind = kind + if m.Title == "" { + m.Title = a.Name + } + m.UpdatedAt = time.Now().Format(time.RFC3339) + }) +} + +// finalizeAutomationMeta records the run-outcome audit fields used by the Status +// (Success/Failed) filter. +func (s *Server) finalizeAutomationMeta(sessionID string, runErr error) { + _, _ = session.UpdateSessionMeta(sessionID, func(m *session.SessionMeta) { + m.EndTime = time.Now().Format(time.RFC3339) + m.UpdatedAt = m.EndTime + if runErr != nil { + m.TerminalStatus = automation.StatusError + m.ErrorReason = truncateReason(runErr.Error(), 300) + } else { + m.TerminalStatus = automation.StatusSuccess + } + }) +} + +func truncateReason(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] + "…" +} diff --git a/internal/web/automation_run_test.go b/internal/web/automation_run_test.go new file mode 100644 index 0000000..83600f4 --- /dev/null +++ b/internal/web/automation_run_test.go @@ -0,0 +1,40 @@ +package web + +import "testing" + +// runAutomation keys the engine on eng.taskID (the single registration done by +// buildLocalEngine) and reclaims it with deleteEngine(eng.taskID). This guards +// the Finding-1 contract: the engine must live under exactly ONE tasks-map key +// so a run can't leak an entry (the earlier code registered a second time under +// a different id, leaking one entry per run and exhausting the pool after +// maxLiveEngines runs). +func TestAutomationEngineRegisteredOnceAndReclaimed(t *testing.T) { + s := stubFactoryServer(t) + + for i := 0; i < maxLiveEngines+8; i++ { + eng, err := s.buildLocalEngine("", "/proj/auto", "full_access") + if err != nil { + t.Fatalf("run %d: buildLocalEngine: %v (engine pool leaked?)", i, err) + } + sid := eng.taskID // exactly what runAutomation uses as the session id + + s.tasksMu.RLock() + n := len(s.tasks) + _, ok := s.tasks[sid] + s.tasksMu.RUnlock() + if !ok { + t.Fatalf("run %d: engine not registered under its taskID", i) + } + if n != 1 { + t.Fatalf("run %d: want exactly 1 live engine, got %d (double registration leaks)", i, n) + } + + s.deleteEngine(sid) // run completion + s.tasksMu.RLock() + n = len(s.tasks) + s.tasksMu.RUnlock() + if n != 0 { + t.Fatalf("run %d: engine not reclaimed, %d still live", i, n) + } + } +} diff --git a/internal/web/concurrency_test.go b/internal/web/concurrency_test.go index 33f344a..c9566e3 100644 --- a/internal/web/concurrency_test.go +++ b/internal/web/concurrency_test.go @@ -99,7 +99,9 @@ func TestBrokerDeliversByTaskID(t *testing.T) { // TestEnginePumpStampsTaskID is the end-to-end pump test: an engine's handler // event reaches a subscribed client tagged with that engine's task id. func TestEnginePumpStampsTaskID(t *testing.T) { - s := &Server{Engine: &Engine{}, tasks: make(map[string]*Engine), wsBroker: NewWSBroker(), ctx: context.Background()} + s := &Server{Engine: &Engine{}, tasks: make(map[string]*Engine), wsBroker: NewWSBroker()} + bg := context.Background() + s.ctxPtr.Store(&bg) h := handler.NewWebHandler() eng := &Engine{taskID: "task-1", handler: h} @@ -136,7 +138,9 @@ func TestEnginePumpStampsTaskID(t *testing.T) { func stubFactoryServer(t *testing.T) *Server { t.Helper() t.Setenv("HOME", t.TempDir()) - s := &Server{Engine: &Engine{}, tasks: make(map[string]*Engine), wsBroker: NewWSBroker(), ctx: context.Background()} + s := &Server{Engine: &Engine{}, tasks: make(map[string]*Engine), wsBroker: NewWSBroker()} + bg := context.Background() + s.ctxPtr.Store(&bg) s.newEngine = func(taskID, pwd, modeStr string) (*EngineConfig, error) { rec, _ := session.NewRecorder(pwd, "prov", "model") if taskID != "" && rec != nil { diff --git a/internal/web/engine.go b/internal/web/engine.go index 6194663..495d050 100644 --- a/internal/web/engine.go +++ b/internal/web/engine.go @@ -280,7 +280,7 @@ func (s *Server) registerEngine(eng *Engine) error { } var pumpCtx context.Context if !existed { - base := s.ctx + base := s.rootCtx() if base == nil { base = context.Background() } @@ -322,7 +322,15 @@ func (s *Server) buildLocalEngine(taskID, pwd, modeStr string) (*Engine, error) if s.newEngine == nil { return nil, fmt.Errorf("task creation is not supported") } - ec, err := s.newEngine(taskID, pwd, modeStr) + return s.buildLocalEngineWith(taskID, pwd, modeStr, s.newEngine) +} + +// buildLocalEngineWith assembles, model-inherits, and registers a local task +// engine using the supplied factory. The factory is a parameter so automation +// runs can pass the headless factory (which drops interactive tools) while +// sharing all the registration/model-inheritance plumbing with normal tasks. +func (s *Server) buildLocalEngineWith(taskID, pwd, modeStr string, factory func(taskID, pwd, mode string) (*EngineConfig, error)) (*Engine, error) { + ec, err := factory(taskID, pwd, modeStr) if err != nil { return nil, err } diff --git a/internal/web/git_test.go b/internal/web/git_test.go index dd15146..c5f3abf 100644 --- a/internal/web/git_test.go +++ b/internal/web/git_test.go @@ -49,7 +49,9 @@ func TestGitCheckoutRejectsDashBranch(t *testing.T) { t.Fatal(err) } - s := &Server{Engine: &Engine{pwd: repo}, ctx: context.Background()} + s := &Server{Engine: &Engine{pwd: repo}} + bg := context.Background() + s.ctxPtr.Store(&bg) rec := httptest.NewRecorder() req := httptest.NewRequest(http.MethodPost, "/api/git/checkout", strings.NewReader(`{"branch":"-f"}`)) s.handleGitCheckout(rec, req) diff --git a/internal/web/server.go b/internal/web/server.go index aaee9ff..c8fc106 100644 --- a/internal/web/server.go +++ b/internal/web/server.go @@ -17,12 +17,14 @@ import ( "sort" "strings" "sync" + "sync/atomic" "time" "github.com/cloudwego/eino/adk" "github.com/cloudwego/eino/schema" "github.com/gorilla/websocket" + "github.com/cnjack/jcode/internal/automation" "github.com/cnjack/jcode/internal/channel" "github.com/cnjack/jcode/internal/config" "github.com/cnjack/jcode/internal/handler" @@ -62,8 +64,11 @@ type Server struct { // once tasks truly run in parallel). mu sync.RWMutex - // Server-level context (from Start), used for background agent work. - ctx context.Context + // ctxPtr holds the server-level context (set in Start), used for background + // agent work. Stored atomically because the automation scheduler/manual-run + // goroutines (launched by command.runWebServer before Start) may read it + // concurrently with Start's write. Read via rootCtx. + ctxPtr atomic.Pointer[context.Context] // Dependencies set during initialization. tracer *telemetry.LangfuseTracer @@ -82,6 +87,11 @@ type Server struct { // to a remote executor (SSH or Docker) instead of a local pwd. newRemoteEngine func(taskID string, executor tools.RemoteExecutor, remotePwd, mode string) (*EngineConfig, error) + // newAutomationEngine builds a headless task engine for automation runs: like + // newEngine but drops interactive tools (ask_user) so an unattended run can't + // stall waiting on a human. Falls back to newEngine when unset (back-compat). + newAutomationEngine func(taskID, pwd, mode string) (*EngineConfig, error) + // remoteConns holds SSH connections established by the remote-connect wizard // that have not yet been bound to the live env (keyed by connection id). remoteConns *remoteConnRegistry @@ -115,39 +125,53 @@ type Server struct { // usageStore backs the global usage-statistics endpoint. nil falls back to // usage.Default(); tests inject a temp-dir store. usageStore *usage.Store + + // automations is the automation definition/run store (nil in setup mode). + // Run execution reuses the Engine via automationRunner; the periodic + // scheduler is owned by command.runWebServer. + automations *automation.Store + + // autoRunMu guards autoRunInflight, the set of automation ids with a manual + // run currently in flight on this server. It is the manual-run analogue of the + // scheduler's own inflight guard: without it a double-click (or two clients) + // would launch parallel agent sessions mutating the same project directory. + autoRunMu sync.Mutex + autoRunInflight map[string]bool } // ServerConfig holds the configuration for creating a new Server. type ServerConfig struct { - Port int - Host string - OpenBrowser bool - Pwd string - Version string - Agent *adk.ChatModelAgent - CreateAgent func(providerName, modelName string) (*adk.ChatModelAgent, error) - RebuildForMode func(planMode bool) (*adk.ChatModelAgent, error) - NewEngine func(taskID, pwd, mode string) (*EngineConfig, error) // factory for new concurrent task engines (local) - NewRemoteEngine func(taskID string, executor tools.RemoteExecutor, remotePwd, mode string) (*EngineConfig, error) // remote sibling of NewEngine (SSH or Docker) - InitialMode string // unified startup mode string ("approval"/"plan"/"full_access") - TodoStore *tools.TodoStore - Recorder *session.Recorder - Tracer *telemetry.LangfuseTracer - Env *tools.Env - ProviderName string - ModelName string - Config *config.Config - Registry *model.ModelRegistry - ApprovalState *runner.ApprovalState - SkillLoader *skills.Loader - ReloadMCP func(servers map[string]*config.MCPServer) ([]tools.MCPStatus, error) // optional: hot-reload MCP tools - InitialMCPStatuses []tools.MCPStatus // statuses from the startup MCP load - WechatClient channel.Channel // optional WeChat channel - WebHandler *handler.WebHandler // optional: pre-created handler for sharing with tools - EventHandler handler.AgentEventHandler // optional: handler for runner (e.g. NotifyingHandler) - NeedsSetup bool // true when no providers are configured (setup mode) - TokenUsage *model.TokenUsage // optional: shared token tracker (created when nil) - ContextBreakdownFn func() usage.ContextBreakdown // optional: live per-task context breakdown + Port int + Host string + OpenBrowser bool + Pwd string + Version string + Agent *adk.ChatModelAgent + CreateAgent func(providerName, modelName string) (*adk.ChatModelAgent, error) + RebuildForMode func(planMode bool) (*adk.ChatModelAgent, error) + NewEngine func(taskID, pwd, mode string) (*EngineConfig, error) // factory for new concurrent task engines (local) + NewRemoteEngine func(taskID string, executor tools.RemoteExecutor, remotePwd, mode string) (*EngineConfig, error) // remote sibling of NewEngine (SSH or Docker) + NewAutomationEngine func(taskID, pwd, mode string) (*EngineConfig, error) // headless sibling of NewEngine for automation runs (drops interactive tools) + InitialMode string // unified startup mode string ("approval"/"plan"/"full_access") + TodoStore *tools.TodoStore + Recorder *session.Recorder + Tracer *telemetry.LangfuseTracer + Env *tools.Env + ProviderName string + ModelName string + Config *config.Config + Registry *model.ModelRegistry + ApprovalState *runner.ApprovalState + SkillLoader *skills.Loader + ReloadMCP func(servers map[string]*config.MCPServer) ([]tools.MCPStatus, error) // optional: hot-reload MCP tools + InitialMCPStatuses []tools.MCPStatus // statuses from the startup MCP load + WechatClient channel.Channel // optional WeChat channel + WebHandler *handler.WebHandler // optional: pre-created handler for sharing with tools + EventHandler handler.AgentEventHandler // optional: handler for runner (e.g. NotifyingHandler) + NeedsSetup bool // true when no providers are configured (setup mode) + TokenUsage *model.TokenUsage // optional: shared token tracker (created when nil) + ContextBreakdownFn func() usage.ContextBreakdown // optional: live per-task context breakdown + Automations *automation.Store // optional: automation store (nil in setup mode) } // NewServer creates a new web server. @@ -187,29 +211,32 @@ func NewServer(cfg *ServerConfig) *Server { boot.taskID = boot.recorder.UUID() } s := &Server{ - Engine: boot, - tasks: make(map[string]*Engine), - port: cfg.Port, - host: cfg.Host, - openBrowser: cfg.OpenBrowser, - version: cfg.Version, - wsBroker: NewWSBroker(), - newEngine: cfg.NewEngine, - newRemoteEngine: cfg.NewRemoteEngine, - remoteConns: newRemoteConnRegistry(), - tracer: cfg.Tracer, - cfg: cfg.Config, - registry: cfg.Registry, - ptyMgr: newPTYManager(), - skillLoader: cfg.SkillLoader, - reloadMCP: cfg.ReloadMCP, - mcpStatuses: make(map[string]tools.MCPStatus), - mcpLogins: make(map[string]*mcpLoginState), - wechatClient: cfg.WechatClient, - needsSetup: cfg.NeedsSetup, + Engine: boot, + tasks: make(map[string]*Engine), + port: cfg.Port, + host: cfg.Host, + openBrowser: cfg.OpenBrowser, + version: cfg.Version, + wsBroker: NewWSBroker(), + newEngine: cfg.NewEngine, + newRemoteEngine: cfg.NewRemoteEngine, + newAutomationEngine: cfg.NewAutomationEngine, + remoteConns: newRemoteConnRegistry(), + tracer: cfg.Tracer, + cfg: cfg.Config, + registry: cfg.Registry, + ptyMgr: newPTYManager(), + skillLoader: cfg.SkillLoader, + reloadMCP: cfg.ReloadMCP, + mcpStatuses: make(map[string]tools.MCPStatus), + mcpLogins: make(map[string]*mcpLoginState), + wechatClient: cfg.WechatClient, + needsSetup: cfg.NeedsSetup, + automations: cfg.Automations, + autoRunInflight: make(map[string]bool), } // The bootstrap engine is registered (and its pump started) in Start, once - // s.ctx exists. + // the root context exists. for _, st := range cfg.InitialMCPStatuses { s.mcpStatuses[st.Name] = st } @@ -226,9 +253,19 @@ func (s *Server) Handler() *handler.WebHandler { return s.activeHandler() } +// rootCtx returns the server-level context set by Start, or nil before Start +// has run. Background goroutines (the automation scheduler/manual runs) must +// tolerate a nil result, which means the server has not started serving yet. +func (s *Server) rootCtx() context.Context { + if p := s.ctxPtr.Load(); p != nil { + return *p + } + return nil +} + // Start starts the web server. Blocks until context is cancelled. func (s *Server) Start(ctx context.Context) error { - s.ctx = ctx + s.ctxPtr.Store(&ctx) mux := http.NewServeMux() // API routes @@ -279,6 +316,14 @@ func (s *Server) Start(ctx context.Context) error { mux.HandleFunc("POST /api/remote/save-alias", s.handleRemoteSaveAlias) mux.HandleFunc("GET /api/docker/containers", s.handleListContainers) mux.HandleFunc("POST /api/remote/save-docker-alias", s.handleRemoteSaveDockerAlias) + mux.HandleFunc("GET /api/automations", s.handleListAutomations) + mux.HandleFunc("POST /api/automations", s.handleCreateAutomation) + mux.HandleFunc("GET /api/automations/runs", s.handleListAutomationRuns) + mux.HandleFunc("GET /api/automations/{id}", s.handleGetAutomation) + mux.HandleFunc("PUT /api/automations/{id}", s.handleUpdateAutomation) + mux.HandleFunc("DELETE /api/automations/{id}", s.handleDeleteAutomation) + mux.HandleFunc("POST /api/automations/{id}/run", s.handleRunAutomation) + mux.HandleFunc("GET /api/automation-templates", s.handleAutomationTemplates) mux.HandleFunc("GET /api/skills", s.handleListSkills) mux.HandleFunc("POST /api/skills/{name}/toggle", s.handleToggleSkill) mux.HandleFunc("GET /api/slash-commands", s.handleSlashCommands) @@ -693,8 +738,13 @@ func (s *Server) submitMessage(eng *Engine, message, mode, source, sessionID str eng.emu.Unlock() // Stream response via WebSocket — run agent in background. Each task derives - // its own cancellable context so /stop cancels only that task. - runCtx, runCancel := context.WithCancel(s.ctx) + // its own cancellable context so /stop cancels only that task. Fall back to + // Background if a run is somehow submitted before Start set the root context. + base := s.rootCtx() + if base == nil { + base = context.Background() + } + runCtx, runCancel := context.WithCancel(base) eng.emu.Lock() eng.runGen++ gen := eng.runGen @@ -773,6 +823,12 @@ func (s *Server) handleListAllTasks(w http.ResponseWriter, r *http.Request) { items := make([]taskItem, 0) for project, metas := range all { for _, m := range metas { + // Automation runs are surfaced on the Automations page ("Recent + // runs"), not the main task list — exclude them here so a nightly + // automation doesn't bury the sidebar. + if m.AutomationID != "" { + continue + } items = append(items, taskItem{ UUID: m.UUID, Project: project, @@ -1544,7 +1600,7 @@ func (s *Server) handleExec(w http.ResponseWriter, r *http.Request) { return } - ctx, cancel := context.WithTimeout(s.ctx, 30*1e9) // 30 seconds + ctx, cancel := context.WithTimeout(s.rootCtx(), 30*1e9) // 30 seconds defer cancel() cmd := exec.CommandContext(ctx, "sh", "-c", req.Command) @@ -1595,7 +1651,7 @@ func (s *Server) handleDiff(w http.ResponseWriter, r *http.Request) { args = []string{"diff", "--no-color"} } - cmd := exec.CommandContext(s.ctx, "git", args...) + cmd := exec.CommandContext(s.rootCtx(), "git", args...) cmd.Dir = s.activePwd() output, _ := cmd.CombinedOutput() @@ -1612,12 +1668,12 @@ func (s *Server) handleDiff(w http.ResponseWriter, r *http.Request) { rawDiff := string(output) // Also get changed file list for status - statCmd := exec.CommandContext(s.ctx, "git", "diff", "--stat", "--no-color") + statCmd := exec.CommandContext(s.rootCtx(), "git", "diff", "--stat", "--no-color") switch mode { case "staged": - statCmd = exec.CommandContext(s.ctx, "git", "diff", "--cached", "--stat", "--no-color") + statCmd = exec.CommandContext(s.rootCtx(), "git", "diff", "--cached", "--stat", "--no-color") case "branch": - statCmd = exec.CommandContext(s.ctx, "git", "diff", "HEAD~1", "--stat", "--no-color") + statCmd = exec.CommandContext(s.rootCtx(), "git", "diff", "HEAD~1", "--stat", "--no-color") } statCmd.Dir = s.activePwd() _, _ = statCmd.CombinedOutput() @@ -1709,13 +1765,13 @@ func (s *Server) takeSessionSnapshot(eng *Engine) { } // Use "git stash create" to get a tree-ish of the current state without // actually stashing. If there are no changes, use HEAD. - cmd := exec.CommandContext(s.ctx, "git", "stash", "create") + cmd := exec.CommandContext(s.rootCtx(), "git", "stash", "create") cmd.Dir = eng.pwd out, err := cmd.Output() snapshot := strings.TrimSpace(string(out)) if err != nil || snapshot == "" { // No local changes — use HEAD as baseline - cmd2 := exec.CommandContext(s.ctx, "git", "rev-parse", "HEAD") + cmd2 := exec.CommandContext(s.rootCtx(), "git", "rev-parse", "HEAD") cmd2.Dir = eng.pwd out2, _ := cmd2.Output() snapshot = strings.TrimSpace(string(out2)) @@ -1758,7 +1814,7 @@ func (s *Server) handleSessionDiff(w http.ResponseWriter, _ *http.Request) { } // Diff from snapshot to current working tree - cmd := exec.CommandContext(s.ctx, "git", "diff", snapshot, "--no-color") + cmd := exec.CommandContext(s.rootCtx(), "git", "diff", snapshot, "--no-color") cmd.Dir = pwd output, _ := cmd.CombinedOutput() @@ -2137,7 +2193,7 @@ func (s *Server) setMCPLogin(name, status, msg string) { } func (s *Server) runMCPLogin(name string) { - ctx, cancel := context.WithTimeout(s.ctx, 5*time.Minute) + ctx, cancel := context.WithTimeout(s.rootCtx(), 5*time.Minute) defer cancel() s.mu.RLock() diff --git a/internal/web/tasks_test.go b/internal/web/tasks_test.go index 58bc18a..3a4fd38 100644 --- a/internal/web/tasks_test.go +++ b/internal/web/tasks_test.go @@ -35,7 +35,9 @@ func seedIndex(t *testing.T, sessions map[string][]session.SessionMeta) { // P0-1: GET /api/workspace on a non-git directory returns empty branch + not dirty. func TestWorkspaceNonGit(t *testing.T) { - s := &Server{Engine: &Engine{pwd: t.TempDir()}, ctx: context.Background()} + s := &Server{Engine: &Engine{pwd: t.TempDir()}} + bg := context.Background() + s.ctxPtr.Store(&bg) rec := httptest.NewRecorder() s.handleWorkspace(rec, httptest.NewRequest(http.MethodGet, "/api/workspace", nil)) if rec.Code != http.StatusOK { diff --git a/web/src/App.vue b/web/src/App.vue index 44640cb..6ad1ffa 100644 --- a/web/src/App.vue +++ b/web/src/App.vue @@ -23,10 +23,16 @@ import RightPanel from '@/components/RightPanel.vue' import SetupView from '@/components/SetupView.vue' import TopBar from '@/components/TopBar.vue' import CommandPalette from '@/components/CommandPalette.vue' +import AutomationsView from '@/components/AutomationsView.vue' +import AutomationRunView from '@/components/AutomationRunView.vue' +import ChannelsView from '@/components/ChannelsView.vue' import { useNotifications } from '@/composables/notifications' +import { useAutomationStore } from '@/stores/automation' +import type { AutomationRun } from '@/types/automation' const store = useChatStore() const projectStore = useProjectStore() +const automationStore = useAutomationStore() const { t } = useI18n() const { resolvedTheme, toggleTheme } = useTheme() const { refresh: refreshBranch } = useBranch() @@ -35,6 +41,16 @@ const messagesEl = ref(null) const settingsOpen = ref(false) const projectsOpen = ref(false) const paletteOpen = ref(false) +// View switch inside the shell: the main column shows either the chat canvas, +// the Automations page, the Channels page, or an automation run detail page. +// Unlike the dialog overlays (Settings/Projects), this is a real page change — +// each renders as a wrapped inset surface that shares
's geometry with +// the chat panel, not a full-bleed takeover. +const activeView = ref<'chat' | 'automations' | 'channels' | 'automation-run'>('chat') +// The run drilled into from the Automations page (clicking a run row, or Run +// again from a card). Resolved against the automation it belongs to so the +// detail header can show the name + schedule. +const activeRun = ref(null) // Remote-connect (SSH) wizard. `openRemoteConnect` is provided to descendants // (WorkspacePicker, ProjectSwitcher, Sidebar) so any of them can launch or @@ -56,6 +72,10 @@ provide('onWorkspaceSwitched', () => onProjectSwitched()) // reload, like onProjectSwitched) but land on a fresh welcome screen so the next // message starts a brand-new task there — instead of restoring its last session. provide('onNewTaskInProject', (path: string) => startNewTaskInProject(path)) +// Switch the shell back to the chat page. Provided to the sidebar so the +// "New task" button (and any other chat-bound nav) returns from the Automations +// page to a fresh chat instead of doing nothing behind the overlay. +provide('goToChat', () => { activeView.value = 'chat' }) // When the wizard is launched from Settings it stacks ON TOP of the Settings // overlay. headlessui treats a click inside the wizard as an "outside" click for @@ -81,6 +101,18 @@ function onPaletteAction(name: 'settings' | 'projects' | 'theme') { else if (name === 'theme') toggleTheme() } +// Drill into an automation run detail page from the Automations list. Resolves +// the parent automation (for the header name + schedule) from the store. +function openAutomationRun(run: AutomationRun) { + activeRun.value = run + activeView.value = 'automation-run' +} +// The automation backing the active run detail view (best-effort lookup by id). +function automationForRun(run: AutomationRun | null) { + if (!run) return null + return automationStore.items.find((a) => a.id === run.automation_id) ?? null +} + const bottomPanel = ref<'none' | 'terminal'>('none') const bottomPanelHeight = ref(260) const isResizingPanel = ref(false) @@ -187,7 +219,12 @@ function handleGlobalKeydown(e: KeyboardEvent) { paletteOpen.value = !paletteOpen.value return } - if ((e.ctrlKey || e.metaKey) && e.shiftKey && e.key === 'N') { + // ⌘N — New task. (Used to be ⌘⇧N; ⌘N is the conventional "new" binding.) The + // other app shortcuts keep a Shift modifier, so ⌘N alone never collides with + // ⌘⇧K / ⌘⇧E etc. Ignored while the user is typing in a field only if it would + // conflict with the browser's native "new window" — but in the Tauri shell + // there's no such binding, and on web the preventDefault keeps it ours. + if ((e.ctrlKey || e.metaKey) && !e.shiftKey && (e.key === 'n' || e.key === 'N')) { e.preventDefault() store.newSession() return @@ -197,6 +234,9 @@ function handleGlobalKeydown(e: KeyboardEvent) { settingsOpen.value = !settingsOpen.value return } + // Automations / Channels are reached only via the nav buttons — they no longer + // have keyboard shortcuts (the ⌘⇧A / ⌘⇧C bindings were removed). Esc still + // steps back from these pages to chat. // Esc stops the agent only when no overlay is open — otherwise pressing Esc to // dismiss a dialog (Settings/Projects/Palette/Wizard) would also kill the run. if ( @@ -205,12 +245,30 @@ function handleGlobalKeydown(e: KeyboardEvent) { !settingsOpen.value && !projectsOpen.value && !paletteOpen.value && - !remoteWizardOpen.value + !remoteWizardOpen.value && + activeView.value === 'chat' ) { e.preventDefault() store.stopAgent() return } + // On a non-chat page (Automations/Channels/automation-run, no overlay open) + // Esc steps back one level: from a run detail → the Automations page, and + // from Automations/Channels → chat. It should not also kill a running agent, + // mirroring the overlay-dismissal guard above. + if ( + e.key === 'Escape' && + !settingsOpen.value && + !projectsOpen.value && + !paletteOpen.value && + !remoteWizardOpen.value && + activeView.value !== 'chat' + ) { + e.preventDefault() + if (activeView.value === 'automation-run') activeView.value = 'automations' + else activeView.value = 'chat' + return + } if ((e.ctrlKey || e.metaKey) && e.key === '`') { e.preventDefault() togglePanel('terminal') @@ -415,8 +473,12 @@ function startResize(e: MouseEvent) {