diff --git a/.claude/launch.json b/.claude/launch.json new file mode 100644 index 00000000..8aaa746e --- /dev/null +++ b/.claude/launch.json @@ -0,0 +1,17 @@ +{ + "version": "0.0.1", + "configurations": [ + { + "name": "dsgui-site", + "runtimeExecutable": "npm", + "runtimeArgs": ["run", "dev", "--prefix", "/Users/zxy/codeproject/ds_project/dsgui-admin"], + "autoPort": true + }, + { + "name": "renderer-live", + "runtimeExecutable": "sleep", + "runtimeArgs": ["86400"], + "port": 5173 + } + ] +} diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index b62f8c88..08d9df01 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -1,5 +1,5 @@ name: Bug report -description: Report a reproducible bug or regression in DeepSeek GUI +description: Report a reproducible bug or regression in Kun title: "[Bug] " labels: - bug @@ -7,7 +7,7 @@ body: - type: markdown attributes: value: | - Thanks for helping us improve DeepSeek GUI. + Thanks for helping us improve Kun. Please include enough detail for someone else to reproduce the issue. - type: textarea id: summary @@ -45,7 +45,7 @@ body: - type: input id: version attributes: - label: DeepSeek GUI version + label: Kun version placeholder: e.g. v0.1.0 validations: required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 40d7af20..6dfc808e 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -4,5 +4,5 @@ contact_links: url: mailto:security@deepseek-gui.com about: Please report security issues privately instead of opening a public issue. - name: Contribution guide - url: https://github.com/XingYu-Zhong/DeepSeek-GUI/blob/master/docs/CONTRIBUTING.md + url: https://github.com/KunAgent/Kun/blob/master/docs/CONTRIBUTING.md about: Read the contribution workflow and validation expectations before opening a PR. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 66920eeb..683b80c0 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -20,6 +20,7 @@ ## Validation +- [ ] I agree that this contribution is submitted under the [Contributor License Agreement](https://github.com/KunAgent/Kun/blob/develop/CLA.md). - [ ] `npm run test` - [ ] `npm run typecheck` - [ ] `npm run build` diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 550a5637..134884c9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -54,6 +54,8 @@ jobs: runs-on: macos-latest needs: prepare env: + KUN_APP_VERSION: ${{ needs.prepare.outputs.version }} + KUN_UPDATE_CHANNEL: stable DEEPSEEK_GUI_APP_VERSION: ${{ needs.prepare.outputs.version }} DEEPSEEK_GUI_UPDATE_CHANNEL: stable RELEASE_CHANNEL: stable @@ -127,9 +129,9 @@ jobs: if-no-files-found: error retention-days: 7 path: | - dist/DeepSeek-GUI-*-mac-*.dmg - dist/DeepSeek-GUI-*-mac-*.zip - dist/DeepSeek-GUI-*-mac-*.blockmap + dist/Kun-*-mac-*.dmg + dist/Kun-*-mac-*.zip + dist/Kun-*-mac-*.blockmap dist/latest-mac.yml build-windows: @@ -137,6 +139,8 @@ jobs: runs-on: windows-latest needs: prepare env: + KUN_APP_VERSION: ${{ needs.prepare.outputs.version }} + KUN_UPDATE_CHANNEL: stable DEEPSEEK_GUI_APP_VERSION: ${{ needs.prepare.outputs.version }} DEEPSEEK_GUI_UPDATE_CHANNEL: stable RELEASE_CHANNEL: stable @@ -167,8 +171,8 @@ jobs: if-no-files-found: error retention-days: 7 path: | - dist/DeepSeek-GUI-*-win-x64.exe - dist/DeepSeek-GUI-*-win-x64.exe.blockmap + dist/Kun-*-win-x64.exe + dist/Kun-*-win-x64.exe.blockmap dist/latest.yml build-linux: @@ -176,6 +180,8 @@ jobs: runs-on: ubuntu-latest needs: prepare env: + KUN_APP_VERSION: ${{ needs.prepare.outputs.version }} + KUN_UPDATE_CHANNEL: stable DEEPSEEK_GUI_APP_VERSION: ${{ needs.prepare.outputs.version }} DEEPSEEK_GUI_UPDATE_CHANNEL: stable RELEASE_CHANNEL: stable @@ -211,8 +217,8 @@ jobs: if-no-files-found: error retention-days: 7 path: | - dist/DeepSeek-GUI-*-linux-x86_64.AppImage - dist/DeepSeek-GUI-*-linux-x86_64.AppImage.blockmap + dist/Kun-*-linux-x86_64.AppImage + dist/Kun-*-linux-x86_64.AppImage.blockmap dist/latest-linux.yml publish: @@ -230,6 +236,7 @@ jobs: RELEASE_NAME: ${{ needs.prepare.outputs.release_name }} PREVIOUS_TAG: ${{ needs.prepare.outputs.previous_tag }} RELEASE_CHANNEL: stable + KUN_UPDATE_CHANNEL: stable DEEPSEEK_GUI_UPDATE_CHANNEL: stable R2_BUCKET: ${{ secrets.R2_BUCKET }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} @@ -265,12 +272,12 @@ jobs: run: | set -euo pipefail required=( - "DeepSeek-GUI-*-mac-arm64.dmg" - "DeepSeek-GUI-*-mac-x64.dmg" - "DeepSeek-GUI-*-mac-arm64.zip" - "DeepSeek-GUI-*-mac-x64.zip" - "DeepSeek-GUI-*-win-x64.exe" - "DeepSeek-GUI-*-linux-x86_64.AppImage" + "Kun-*-mac-arm64.dmg" + "Kun-*-mac-x64.dmg" + "Kun-*-mac-arm64.zip" + "Kun-*-mac-x64.zip" + "Kun-*-win-x64.exe" + "Kun-*-linux-x86_64.AppImage" "latest-mac.yml" "latest.yml" "latest-linux.yml" @@ -348,7 +355,7 @@ jobs: set -euo pipefail mapfile -d '' assets < <( find release-artifacts -type f \ - \( -name 'DeepSeek-GUI-*' -o -name 'latest*.yml' \) \ + \( -name 'Kun-*' -o -name 'latest*.yml' \) \ -print0 ) diff --git a/CLA.md b/CLA.md new file mode 100644 index 00000000..6f23c904 --- /dev/null +++ b/CLA.md @@ -0,0 +1,85 @@ +# Contributor License Agreement + +Version 1.0 + +This Contributor License Agreement ("Agreement") applies to any contribution +submitted to Kun, including code, documentation, design assets, tests, examples, +configuration, issue text, pull request text, and any other material submitted +for inclusion in the project ("Contribution"). + +By submitting a Contribution, you agree to the terms below. + +## 1. Ownership + +You retain copyright and any other rights you have in your Contribution. +This Agreement does not transfer ownership of your Contribution to the project +owner. + +## 2. Copyright License + +You grant the project owner a perpetual, worldwide, non-exclusive, +irrevocable, royalty-free, sublicensable, transferable, and relicensable +copyright license to use, reproduce, modify, prepare derivative works of, +publicly display, publicly perform, distribute, sublicense, and otherwise +exploit your Contribution, in whole or in part, under any license terms. + +This license includes the right for the project owner to license, sublicense, +or relicense your Contribution as part of Kun or related works under +noncommercial, commercial, proprietary, source-available, open-source, or other +license terms, without needing additional permission from you. + +## 3. Patent License + +If your Contribution is covered by patent claims that you can license, you +grant the project owner a perpetual, worldwide, non-exclusive, irrevocable, +royalty-free, sublicensable, transferable, and relicensable patent license to +make, have made, use, sell, offer for sale, import, and otherwise transfer your +Contribution and derivative works of it. + +## 4. Moral Rights + +To the maximum extent allowed by law, you waive and agree not to assert any +moral rights, author's rights, or similar rights that would interfere with the +project owner's exercise of the licenses granted in this Agreement. + +## 5. Right To Submit + +You represent that: + +- you have the legal right to submit the Contribution and grant these licenses; +- the Contribution is your original work, or you have sufficient rights to + submit it under this Agreement; +- if your employer, client, school, or another party may have rights in the + Contribution, you have received any necessary permission before submitting it; +- the Contribution does not knowingly violate any third-party intellectual + property right, confidentiality obligation, or legal restriction. + +## 6. Project License + +The project owner may make the project available under the license stated in +the repository, currently the PolyForm Noncommercial License 1.0.0, and may +also offer separate commercial licenses or other license terms. + +You understand that commercial licensing decisions for Kun are controlled by +the project owner and do not require additional approval from contributors who +submitted Contributions under this Agreement. + +## 7. No Obligation + +The project owner is not required to accept, use, publish, maintain, or +distribute any Contribution. + +## 8. No Warranty + +You provide your Contribution "as is", without warranties or conditions of any +kind, express or implied, to the maximum extent allowed by law. + +## 9. Not A Contribution + +If you want to submit material that is not covered by this Agreement, clearly +mark it as "Not a Contribution" in writing before or at the time you submit it. + +## 10. Agreement Scope + +This Agreement applies to all Contributions you submit to Kun unless the +project owner agrees in writing to different terms. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 1c9bb069..ec0a27bc 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -2,7 +2,7 @@ ## Our Pledge -We as contributors and maintainers pledge to make participation in the DeepSeek GUI community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. +We as contributors and maintainers pledge to make participation in the Kun community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. diff --git a/CODE_OF_CONDUCT.zh-CN.md b/CODE_OF_CONDUCT.zh-CN.md index 1f7b8fc2..4b9f9b0e 100644 --- a/CODE_OF_CONDUCT.zh-CN.md +++ b/CODE_OF_CONDUCT.zh-CN.md @@ -2,7 +2,7 @@ ## 我们的承诺 -作为贡献者和维护者,我们承诺让每个人都能在 DeepSeek GUI 社区的参与中获得无骚扰的体验,无论其年龄、体型、明显或不可见的残疾、种族、性别特征、性别认同和表达、经验水平、教育程度、社会经济地位、国籍、个人外表、种族、宗教或性认同和取向。 +作为贡献者和维护者,我们承诺让每个人都能在 Kun 社区的参与中获得无骚扰的体验,无论其年龄、体型、明显或不可见的残疾、种族、性别特征、性别认同和表达、经验水平、教育程度、社会经济地位、国籍、个人外表、种族、宗教或性认同和取向。 我们承诺以有助于建设开放、热情、多元化、包容和健康社区的方式行事和互动。 diff --git a/DESIGN.md b/DESIGN.md index 11924860..1ba1a4e9 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -5,7 +5,7 @@ # invented. Anything not in this block is editorial, not authoritative. schema_version: 1 -project: DeepSeek-GUI +project: Kun single_runtime: kun themes: [light, dark, system] @@ -224,8 +224,8 @@ window: app_region: drag # html/body/-webkit-app-region no_drag_class: ds-no-drag # add to anything clickable in the title bar macos_top_inset_px: 42 # safe area for traffic-light controls - app_icon: src/asset/img/deepseek.png - secondary_logos: [deepseek.svg] + app_icon: src/asset/img/kun.png + secondary_logos: [kun_mac.png, kun_tray.png] # ---------- 9. Iconography ---------- icons: @@ -286,7 +286,7 @@ i18n: # ---------- 13. Brand & voice ---------- brand: - product_name: "DeepSeek GUI" + product_name: "Kun" tagline: "把 Kun 的本地智能体能力带进桌面窗口" hero_kw: [Code, Write, Connect phone] pillars: @@ -323,7 +323,7 @@ dont: - "Use a border radius smaller than 4px on a clickable surface." --- -# DeepSeek GUI — DESIGN.md +# Kun — DESIGN.md > 单一权威设计文档。所有屏幕、所有组件、所有视觉决策,都从这里出。 @@ -353,8 +353,8 @@ the frontmatter wins, and the markdown needs an update. ## 1. Project at a glance -DeepSeek GUI is a local desktop workbench for the **Kun** -runtime. The desktop shell is Electron; the runtime is a TypeScript +Kun (formerly DeepSeek GUI) is a local desktop workbench built +around its namesake **Kun** runtime. The desktop shell is Electron; the runtime is a TypeScript package that speaks HTTP/SSE; the renderer is React 19 + Zustand 5; the visual system is TailwindCSS 3 with a hand-built token layer on top. @@ -588,7 +588,7 @@ containing many cells. Do not animate the composer. ### 3.9 Layout grammar -Every screen in DeepSeek GUI follows the same macro-grammar: +Every screen in Kun follows the same macro-grammar: - **Topbar**: a translucent strip with the back button, session title, mode switcher, and right-side action cluster. The topbar @@ -619,7 +619,8 @@ first. Write, and Connect phone"), second person for the user. No emoji. No marketing language. Error messages are full sentences ending in punctuation; never a raw stack trace. -- The product name is "DeepSeek GUI". The runtime is "Kun". +- The product name is "Kun" (formerly "DeepSeek GUI"). The bundled + runtime shares the name; say "Kun runtime" when the distinction matters. The main workbenches are "Code" and "Write"; the phone/IM surface is "Connect phone" in English and "连接手机" in zh copy. Internal code may still say `claw`, but production copy should not expose it as the product name. @@ -1118,7 +1119,8 @@ only which renderer and local workflow state the store pulls in. | GUI logs | OS app-data dir / `log/` | NDJSON | `logger.ts` | | Inline completion debug | OS app-data dir | NDJSON | `write-inline-completion-service.ts` | -Default OS app-data paths: +Default OS app-data paths (derived from the Electron `productName`, +which current builds still ship as `DeepSeek GUI`): - macOS: `~/Library/Application Support/DeepSeek GUI` - Windows: `%APPDATA%\DeepSeek GUI` diff --git a/DESIGN.zh-CN.md b/DESIGN.zh-CN.md index 34d8c0f6..8ccd3893 100644 --- a/DESIGN.zh-CN.md +++ b/DESIGN.zh-CN.md @@ -5,7 +5,7 @@ # invented. Anything not in this block is editorial, not authoritative. schema_version: 1 -project: DeepSeek-GUI +project: Kun single_runtime: kun themes: [light, dark, system] @@ -224,8 +224,8 @@ window: app_region: drag # html/body/-webkit-app-region no_drag_class: ds-no-drag # add to anything clickable in the title bar macos_top_inset_px: 42 # safe area for traffic-light controls - app_icon: src/asset/img/deepseek.png - secondary_logos: [deepseek.svg] + app_icon: src/asset/img/kun.png + secondary_logos: [kun_mac.png, kun_tray.png] # ---------- 9. Iconography ---------- icons: @@ -286,7 +286,7 @@ i18n: # ---------- 13. Brand & voice ---------- brand: - product_name: "DeepSeek GUI" + product_name: "Kun" tagline: "把 Kun 的本地智能体能力带进桌面窗口" hero_kw: [Code, Write, Connect phone] pillars: @@ -323,7 +323,7 @@ dont: - "Use a border radius smaller than 4px on a clickable surface." --- -# DeepSeek GUI — DESIGN.md +# Kun — DESIGN.md > 单一权威设计文档。所有屏幕、所有组件、所有视觉决策,都从这里出。 @@ -353,8 +353,8 @@ the frontmatter wins, and the markdown needs an update. ## 1. Project at a glance -DeepSeek GUI is a local desktop workbench for the **Kun** -runtime. The desktop shell is Electron; the runtime is a TypeScript +Kun (formerly DeepSeek GUI) is a local desktop workbench built +around its namesake **Kun** runtime. The desktop shell is Electron; the runtime is a TypeScript package that speaks HTTP/SSE; the renderer is React 19 + Zustand 5; the visual system is TailwindCSS 3 with a hand-built token layer on top. @@ -588,7 +588,7 @@ containing many cells. Do not animate the composer. ### 3.9 Layout grammar -Every screen in DeepSeek GUI follows the same macro-grammar: +Every screen in Kun follows the same macro-grammar: - **Topbar**: a translucent strip with the back button, session title, mode switcher, and right-side action cluster. The topbar @@ -619,7 +619,8 @@ first. Write, and Connect phone"), second person for the user. No emoji. No marketing language. Error messages are full sentences ending in punctuation; never a raw stack trace. -- The product name is "DeepSeek GUI". The runtime is "Kun". +- The product name is "Kun" (formerly "DeepSeek GUI"). The bundled + runtime shares the name; say "Kun runtime" when the distinction matters. The main workbenches are "Code" and "Write"; the phone/IM surface is "Connect phone" in English and "连接手机" in zh copy. Internal code may still say `claw`, but production copy should not expose it as the product name. @@ -1118,7 +1119,8 @@ only which renderer and local workflow state the store pulls in. | GUI logs | OS app-data dir / `log/` | NDJSON | `logger.ts` | | Inline completion debug | OS app-data dir | NDJSON | `write-inline-completion-service.ts` | -Default OS app-data paths: +Default OS app-data paths (derived from the Electron `productName`, +which current builds still ship as `DeepSeek GUI`): - macOS: `~/Library/Application Support/DeepSeek GUI` - Windows: `%APPDATA%\DeepSeek GUI` diff --git a/LICENSE b/LICENSE index d688a63d..52c91ecb 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,134 @@ -MIT License - -Copyright (c) 2026 xingyu - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +PolyForm Noncommercial License 1.0.0 + +Required Notice: Copyright (c) 2026 xingyu + +Kun is available for noncommercial use under the PolyForm Noncommercial +License 1.0.0. Commercial use, commercial distribution, SaaS or hosted +service use, resale, or integration into commercial products requires a +separate written commercial license from the copyright holder. + +Educational institutions and public-interest educational organizations may use +Kun for noncommercial teaching, research, coursework, experiments, and +learning/reference purposes without requesting separate authorization. This +permission is not pass-through: it may not be extended to downstream customers, +partners, commercial projects, hosted services, redistribution, sublicensing, +or use as part of any commercial product or commercial service. + +https://polyformproject.org/licenses/noncommercial/1.0.0 + +## Acceptance + +In order to get any license under these terms, you must agree to them +as both strict obligations and conditions to all your licenses. + +## Copyright License + +The licensor grants you a copyright license for the software to do +everything you might do with the software that would otherwise infringe +the licensor's copyright in it for any permitted purpose. However, you +may only distribute the software according to Distribution License and +make changes or new works based on the software according to Changes and +New Works License. + +## Distribution License + +The licensor grants you an additional copyright license to distribute +copies of the software. Your license to distribute covers distributing +the software with changes and new works permitted by Changes and New +Works License. + +## Notices + +You must ensure that anyone who gets a copy of any part of the software +from you also gets a copy of these terms or the URL for them above, as +well as copies of any plain-text lines beginning with `Required Notice:` +that the licensor provided with the software. For example: + +> Required Notice: Copyright Yoyodyne, Inc. (http://example.com) + +## Changes and New Works License + +The licensor grants you an additional copyright license to make changes +and new works based on the software for any permitted purpose. + +## Patent License + +The licensor grants you a patent license for the software that covers +patent claims the licensor can license, or becomes able to license, that +you would infringe by using the software. + +## Noncommercial Purposes + +Any noncommercial purpose is a permitted purpose. + +## Personal Uses + +Personal use for research, experiment, and testing for the benefit of +public knowledge, personal study, private entertainment, hobby projects, +amateur pursuits, or religious observance, without any anticipated +commercial application, is use for a permitted purpose. + +## Noncommercial Organizations + +Use by any charitable organization, educational institution, public +research organization, public safety or health organization, +environmental protection organization, or government institution is use +for a permitted purpose regardless of the source of funding or +obligations resulting from the funding. + +## Fair Use + +You may have "fair use" rights for the software under the law. These +terms do not limit them. + +## No Other Rights + +These terms do not allow you to sublicense or transfer any of your +licenses to anyone else, or prevent the licensor from granting licenses +to anyone else. These terms do not imply any other licenses. + +## Patent Defense + +If you make any written claim that the software infringes or contributes +to infringement of any patent, your patent license for the software +granted under these terms ends immediately. If your company makes such a +claim, your patent license ends immediately for work on behalf of your +company. + +## Violations + +The first time you are notified in writing that you have violated any of +these terms, or done anything with the software not covered by your +licenses, your licenses can nonetheless continue if you come into full +compliance with these terms, and take practical steps to correct past +violations, within 32 days of receiving notice. Otherwise, all your +licenses end immediately. + +## No Liability + +As far as the law allows, the software comes as is, without any warranty +or condition, and the licensor will not be liable to you for any damages +arising out of these terms or the use or nature of the software, under +any kind of legal claim. + +## Definitions + +The licensor is the individual or entity offering these terms, and the +software is the software the licensor makes available under these terms. + +You refers to the individual or entity agreeing to these terms. + +Your company is any legal entity, sole proprietorship, or other kind of +organization that you work for, plus all organizations that have control +over, are under the control of, or are under common control with that +organization. Control means ownership of substantially all the assets of +an entity, or the power to direct its management and policies by vote, +contract, or otherwise. Control can be direct or indirect. + +Your licenses are all the licenses granted to you for the software under +these terms. + +Use means anything you do with the software requiring one of your +licenses. + +© PolyForm Project Inc. diff --git a/README.en.md b/README.en.md index effc68af..439e7955 100644 --- a/README.en.md +++ b/README.en.md @@ -1,442 +1,228 @@
-
+
+ An experiment in requirement-first coding for the next programming paradigm.
+ Use DeepSeek, Xiaomi MiMo, and MiniMax to connect requirement clarification, design drafts, plans, and agent coding into one loop.
+
+ 简体中文 + · + English + · + Download + · + Docs + · + Run from source +
-> Bring Kun's high-token-ROI local agent runtime into a desktop workbench: **Code** for project work, **Write** for documents, and **Connect phone** for IM automation and scheduled tasks. Every token is steered toward requirements, code, decisions, and results. + -[Website](https://deepseek-gui.com) | [Download](https://deepseek-gui.com) +Kun is a product experiment for the future of programming: instead of starting from “ask the agent to edit code,” it starts from requirement clarification and connects requirement documents, design drafts, interactive prototypes, implementation plans, todos, agent coding, and change review in one GUI workflow. -[](https://github.com/XingYu-Zhong/DeepSeek-GUI/releases) -[](./LICENSE) +Kun is for users who want to put AI agents into real everyday work. It is not just a chat client, and it is not only a CLI shell for programmers: you can hand it a local folder for code, requirements, plans, and change review, or use the dedicated Write workspace for long-form Markdown, editing, and document export. -DeepSeek GUI is a local desktop workbench for developers and frequent AI users. It uses Kun as the only runtime and turns the terminal agent experience into an easier, longer-lived app: choose a workspace, start a task, watch reasoning and tool calls stream in, review file changes, and approve sensitive actions when needed. +This is also why Kun treats DeepSeek, Xiaomi MiMo, and MiniMax as the default first-class model stack, not just ordinary optional providers. Requirement-first coding requires more rounds of clarification, research, structuring, planning, execution, and verification. If model cost is too high, that richer workflow cannot become an everyday habit. Kun chooses three cost-efficient Chinese model providers so the full loop is affordable to run, repeat, and refine. -The goal is not to ship another chat wrapper. The goal is to make DeepSeek feel like a reliable desktop partner for real project work. Kun's core advantage is high token ROI: the same context budget spends less on repeated prefixes, giant tool catalogs, and runaway output, and more on the information that actually moves the task forward. +Kun includes the local `kun serve` runtime for the desktop app. Preferences, sessions, logs, and runtime config stay on your machine; model calls use your own provider credentials. For workflows that can read/write files or run commands, Kun gives you tool approvals, filesystem permission modes, inline diffs, and a change-review panel. --- - -## More Demos - -
-
-
-
-
Feishu / Lark / WeChat connection demo.
- -
-
-
+
Requirement drafting and planning demo.
-
-
-
-
-
Web tools demo.
+## Requirement-First Coding -## Why Kun Delivers High Token ROI +Kun explores a next-generation programming workflow: **requirement -> design -> plan -> code -> verify**. It is not just a chat box attached to an IDE. -Kun makes token economy the default behavior of the agent loop, not a cleanup step after the fact. It does more than compress text: before each model call, it decides which information is worth entering context. - -| Kun advantage | Where the ROI comes from | +| Stage | Kun's approach | | --- | --- | -| **Cache-first agent loop** | Stable system prompts, tool schemas, and immutable prefixes make DeepSeek-native cache hits more likely, so long sessions do not keep paying for the same background. | -| **Tool context on demand** | When MCP catalogs are large, Kun can search for relevant tools first, then describe and call the target tool instead of sending every tool schema on every turn. | -| **Context hygiene** | Long tool results, long arguments, base64 payloads, repeated tool loops, and low-value history are bounded while code, paths, errors, decisions, and open tasks are preserved. | -| **Visible usage payback** | Runtime telemetry tracks cache hit/miss, token usage, and estimated savings; the GUI surfaces Token economy savings so cost return is observable over time. | - -The result: Kun is built for real project work with long tasks, long sessions, and many tools. It keeps the model's attention on high-value context, helping the same API budget produce more useful progress. - -## What We Built - -- A desktop app around the Kun local runtime, with default runtime auto-start and management. -- A full chat workbench with multiple sessions, streaming output, history, interruption, and resend flows. -- Local workspace integration so the agent can read, edit, and create files in real projects. -- Change review surfaces that make every file modification visible and inspectable. -- First-run onboarding, settings, language/theme/font controls, notifications, local logs, and update entry points. -- Graphical Skill and MCP management so users can extend the agent without hand-editing every config file. -- Connect phone automation with Feishu / Lark / WeChat integration, dedicated IM agents, local webhook / relay support, and scheduled tasks. -- A dedicated Write workbench with writing spaces, a Markdown file tree, live editing/preview, inline completion, and selection-based inline agent actions. -- New requirement drafts, plans, thread todos, long-running goals, and code review so tasks can move from idea to execution to review. -- Pre-built macOS, Windows, and Linux installers; source builds remain available. - -## Highlights - -- **Desktop chat workbench**: multi-session chat with streamed replies, reasoning, tool calls, approval requests, and file changes in one place. -- **Project workspaces**: choose a local directory for each task, organize sessions by workspace, preview files, open files in your editor, and pick Git branches. -- **New requirements**: draft background, goals, and acceptance criteria; ask Requirement AI to clarify missing questions or research options; then generate an implementation plan. -- **Plans and todos**: `/plan` and New requirement both create editable plan files, while the right-side Plan panel syncs thread todos for trackable execution. -- **Goals**: `/goal` sets a long-running objective for the current thread, with pause, resume, clear, and complete states so the agent can keep working toward the same outcome. -- **Code review**: `/review` can inspect current uncommitted changes, a base branch diff, a commit, or custom review instructions, with findings shown as review cards. -- **Side conversations and thread control**: `/btw` opens a context-inheriting side conversation; threads also support compact, fork, archive, and restore flows. -- **Change review**: inline diffs and a side review panel help you understand exactly what the agent changed. -- **Controlled permissions**: choose read-only, workspace-write, full-access, or external sandbox modes, and decide when tool calls require approval. -- **Managed runtime**: use the bundled Kun by default, or point the app at your own `kun` executable. -- **Skill and MCP support**: create Skills, edit MCP config, add common tools, and open the related folders from the UI. -- **Feature-flagged agent extensions**: Kun can enable MCP, web fetch/search, Skills, standalone CLI use, image attachments, cross-session memory, and delegated subagents by config; Settings shows the runtime-reported capability and diagnostics state. -- **Connect phone**: run a background agent alongside normal chat, with current support for Feishu / Lark / WeChat, IM webhook / relay flows, and scheduled tasks. -- **Scheduled tasks**: create one-time, daily, interval, or manual tasks with their own workspace, model, and reasoning effort so Kun can run while the computer is awake. -- **Write mode**: manage `~/.deepseekgui/write_workspace` and custom writing spaces, browse Markdown files, use live Markdown editing, preview relative images, get DeepSeek FIM short completion / inspiration completion with optional cross-document BM25 + keyword retrieval, export the current document as `HTML / PDF / DOC / DOCX`, and invoke the writing assistant directly from selected text. -- **High token ROI**: Kun keeps prompt prefixes stable, tracks DeepSeek-native cache hit/miss fields, compacts context and tool output, and uses MCP search to discover tools progressively so tokens stay focused on requirements, code, decisions, and results. -- **Friendly first launch**: choose language, add your DeepSeek API key, and optionally set a compatible Base URL. -- **Local-first**: preferences, sessions, logs, and runtime config stay on your machine; model calls use your own DeepSeek API key. -- **English and Chinese UI**: switch languages from Settings at any time. -- **Cross-platform use**: macOS `.dmg/.zip`, Windows `.exe`, and Linux `.AppImage`; source builds remain available. - -## Runtime: Kun - -The only active local agent runtime in DeepSeek-GUI today is -**Kun** (shipped under `kun/`), a self-contained -TypeScript package that boots a local HTTP/SSE server as the -single boundary between the GUI and the agent loop. - -The name Kun is inspired by the great fish in Zhuangzi's line, -"In the northern sea there is a fish; its name is Kun." The idea is -not a temporary chat shell, but a deeper local runtime that can carry -longer context, richer tools, and sustained project collaboration. - -Kun's operating principle is to raise the ROI of every token. The -user's context budget should go toward requirements, code, decisions, -and results, not repeated tool schemas, runaway tool output, invalid -history, or prefixes that could have been reused from cache. It is -optimized less for one-off questions and more for real workflows that -read and write projects, call tools repeatedly, and carry context over -long sessions. - -Kun fuses a design that has been battle-tested in the -wild: - -- **The cache-first agent loop borrowed from Reasonix**: immutable prompt prefix (with sha256 fingerprint), append-only session log, bounded TTL/LRU cache, inflight tracking with guaranteed cleanup, mid-turn steering queue, context compaction that preserves pinned constraints, and cache/usage telemetry. -- **Token economy and tool-context optimization**: Kun stabilizes system prompts and tool schemas, reads DeepSeek-native cache hit/miss fields, bounds long tool results, long arguments, base64 payloads, and repeated tool loops, and can use `mcp_search` / `mcp_describe` / `mcp_call` to discover MCP tools progressively when a tool catalog is too large to advertise all at once. - -> Thanks to the Reasonix team for sharing the runnable references -> that made this design pillar testable in the first place. Nearly -> every performance trait of Kun — cache hit rate, token replay, -> reconnect, and interruptable approvals — can be traced back to -> this project. The full design rationale -> and the borrow map live in -> [`docs/kun-architecture.md`](docs/kun-architecture.md). - -If you want the dedicated write-up for cache behavior, including -stable prefixes, tool schema canonicalization, DeepSeek native -hit/miss accounting, tool-pair healing, and validation strategy, see -[`docs/kun-cache-optimization.md`](docs/kun-cache-optimization.md). - -Kun's larger agent capabilities are controlled by feature flags: -`capabilities.mcp` connects third-party MCP servers, -`capabilities.web` exposes `web_fetch` / `web_search`, -`capabilities.skills` discovers `skill.json` and legacy `SKILL.md`, -`capabilities.attachments` enables image attachments with text-model fallback, `capabilities.memory` -enables cross-session recall, and `capabilities.subagents` allows -budgeted delegated child runs. `kun run`, `kun chat`, and `kun exec` -can run without the GUI. The GUI reads `/v1/runtime/info` and -`/v1/runtime/tools` in Settings to show what is actually available. -These capabilities are off by config or limited by model capability -until explicitly enabled; examples and troubleshooting live in -[`kun/README.md`](kun/README.md). - -Simplified architecture: - -```text -Renderer (React) - → KunRuntimeProvider - → preload: dsGui.runtimeRequest / startSse - → main: LocalHttpRuntimeAdapter - → kun serve (HTTP + SSE) - → cache-first AgentLoop -``` - -Settings live under **Settings → Agent runtime**: binary path, port, -auto-start, API key, base URL, runtime token, data dir, model, -approval policy, sandbox mode, and the insecure switch. If an older -provider was saved before, settings are migrated into -`agents.kun` on load; after saving, only Kun settings -remain. +| **Clarify** | Create requirement drafts in the GUI and ask Requirement AI to find missing questions, research options, and shape boundaries | +| **Document** | Save drafts as `.kunsdd/draft/.../requirement.md`, with structured requirement blocks, acceptance criteria, and requirement history | +| **Design** | Generate UI design drafts, infographics, or interactive HTML prototypes from requirement selections, so requirements become more than text | +| **Plan** | Use `/plan` and `create_plan` to produce GUI-owned `.kunsdd/plan/...` implementation plans linked back to requirements | +| **Code** | Move from plan into todos, file edits, command execution, and change review; when requirements change, Kun can surface affected replanning | +| **Verify** | Bring requirement blocks, acceptance criteria, plan state, and `/review` back together to answer whether the original requirement is done | -The full endpoint list, CLI flags, environment variables, data dir -layout, and SSE event schema are documented in -[`kun/README.md`](kun/README.md). +This is Kun's most important product direction: moving AI coding from instant Q&A into a requirement-driven software production workflow. Models, writing, planning, review, and automation all serve that line. -## Who It Is For +## Core Model Stack -- Developers who want DeepSeek to work on real codebases without living in a terminal. -- Teams that need to see what the agent did, which files changed, and which operations required approval. -- Users who maintain multiple projects or long-running conversations and want reusable Skill/MCP setup. -- Anyone who wants a local desktop workbench connected to the official DeepSeek API or a compatible endpoint. +Kun optimizes for **complete capability + extreme cost efficiency**. A requirement-first workflow is longer than ordinary chat and depends on repeated model calls; first-run setup and provider settings are organized around three Chinese model providers so users can cover more agent scenarios with lower model cost. ---- +| Provider | Role in Kun | +| --- | --- | +| **DeepSeek** | Default text and reasoning provider with `deepseek-v4-pro` / `deepseek-v4-flash`, powering coding, planning, review, long-context sessions, and auto model routing | +| **Xiaomi MiMo** | Cost-efficient multimodal and speech entry point, covering long-context text models, vision input, ASR transcription, TTS generation, and Token Plan | +| **MiniMax** | Full media generation complement, covering Anthropic Messages text models, image generation, speech generation, music generation, video generation, and Token Plan | -## Workbench And Entry Points +This stack lets Kun route different jobs to the right capability: fast models for lightweight clarification, stronger models for complex coding and reasoning, speech for writing and IM flows, and image/music/video generation for design and creative work. You can still add OpenAI-compatible, self-hosted, or other custom providers, but Kun's default experience is built around these three cost-efficient model services. -DeepSeek GUI is centered on two main workbenches, **Code** and **Write**, -with additional entry points for **Connect phone**, **Scheduled tasks**, -and **Plugins / Skills / MCP**. They share the same Kun runtime and -settings, but keep sessions, workspaces, and layouts separate so you -can switch by task. +## Why Kun -### Code Mode +| You want | Kun provides | +| --- | --- | +| A next-generation coding workflow | Requirement clarification, requirement documents, design drafts, implementation plans, agent coding, and verification in one line | +| Complete agent capability at extreme cost efficiency | DeepSeek, Xiaomi MiMo, and MiniMax as the core stack for text, reasoning, vision, speech, image, music, and video | +| AI that works on real projects | Bind a local workspace, read and edit files, search code, run commands, and inspect tool calls and results | +| Requirements that become executable plans | New requirements, `/plan`, todos, `/goal`, side conversations, thread compaction, forking, and archiving | +| Controlled changes | Tool approvals, filesystem permission modes, inline diffs, a change-review panel, and `/review` | +| Writing in the same app | Markdown file tree, Live / Source / Split / Preview, export formats, and selection-based inline agent actions | +| Remote or background triggers | Feishu / Lark / WeChat connection, local webhook / relay, and one-time or recurring scheduled tasks | +| More than one model vendor | Custom Base URLs, protocols, model lists, and capability extensions beyond the three core providers | + +## Core Features + +- **Requirement-first coding**: draft requirements, clarify and structure them with AI, generate design drafts or prototypes, then move into implementation plans, todos, agent coding, and verification. +- **Code workbench**: bind a local project folder, chat around real codebases, read and edit files, run commands, and inspect tool calls and file changes. +- **Planning and review**: new requirements, `/plan`, todos, `/goal`, `/review`, side conversations, thread compaction, forking, and archiving. +- **Controlled changes**: inline diffs, a change-review panel, tool approvals, and filesystem permission modes. +- **Write mode**: dedicated Markdown workspaces with a file tree, Live / Source / Split / Preview modes, completion, selection-based inline agent actions, and `HTML / PDF / DOC / DOCX` export. +- **Connect phone**: Feishu / Lark / WeChat IM agents, local webhook / relay support, and one-time, daily, interval, or manual scheduled tasks. +- **Model-stack-first**: first-run setup, provider presets, and capability auto-wiring are designed around DeepSeek, Xiaomi MiMo, and MiniMax as a cost-efficient full agent stack. +- **Multimodal and media capabilities**: image attachments, vision input, speech transcription, image generation, speech generation, music generation, and video generation, enabled by provider configuration. +- **MCP and Skills**: Model Context Protocol servers and project/global Skills give Kun specialized tools and workflows for different tasks. +- **Local runtime**: `kun serve` provides the HTTP/SSE boundary with a cache-first agent loop, append-only event logs, usage tracking, and context compaction. -The development workbench for real codebases: bind a local project directory, read and edit files, run commands, and review changes. +## More Demos
-
+
+
+
PDF research and source organization demo
-
+
+
+
Requirement clarification, requirement documents, and planning demo
-
+
+
+
iKun UI plugin demo
-- Configure dedicated agents for Feishu / Lark / WeChat and other channels, each with its own profile, default model, and workspace. -- Every IM agent gets its own thread, so you can debug replies and tool calls directly in the GUI. -- Local webhook / relay support for team workflows and personal automation. -- Scheduled tasks can run once, daily, on an interval, or manually. Each task creates a dedicated Kun thread and sends its configured prompt. +## Quick Start ---- +### Path A: Download a Release -## Install +Download the latest build from [GitHub Releases](https://github.com/KunAgent/Kun/releases). -### Download a Pre-built Package +| Platform | Package | Architecture | +| --- | --- | --- | +| macOS | `.dmg` or `.zip` | Intel / Apple Silicon | +| Windows | `.exe`, NSIS installer | x64 | +| Linux | `.AppImage` | x64 | -Download the latest build from [GitHub Releases](https://github.com/XingYu-Zhong/DeepSeek-GUI/releases): +On first launch: -| Platform | Package | -| --- | --- | -| macOS | `.dmg` or `.zip`, Intel and Apple Silicon | -| Windows | `.exe`, NSIS installer, x64 | -| Linux | `.AppImage`, x64 | +1. Choose a UI language. +2. Choose a model provider and enter an API key or Token Plan key. +3. For compatible providers, edit the Base URL, protocol, and model list in Settings. +4. Open Code and bind a local project, or open Write and create a writing workspace. -On first launch, enter your [DeepSeek API key](https://platform.deepseek.com/api_keys). If you use a DeepSeek/OpenAI-compatible endpoint, you can set a custom Base URL in Settings. +### Path B: Run From Source -### Run from Source +Requirements: -For contributors and local development: +| Dependency | Version | +| --- | --- | +| Node.js | 20+ | +| npm | Ships with Node.js | +| Model credentials | At least one of DeepSeek / Xiaomi MiMo / MiniMax / custom provider | ```bash -git clone https://github.com/XingYu-Zhong/DeepSeek-GUI.git -cd DeepSeek-GUI +git clone https://github.com/KunAgent/Kun.git +cd Kun npm install npm run dev ``` -Requirements: - -- Node.js 20+ -- A DeepSeek API key -- Internet access during the first dependency install - For slower network access in mainland China, use an npm mirror: ```bash npm install --registry=https://registry.npmmirror.com ``` ---- - -## First Run - -1. Open DeepSeek GUI. -2. Choose your interface language in the onboarding guide. -3. Enter your DeepSeek API key; set a custom Base URL if needed. -4. Choose a default workspace, or use the default directory created by the app. -5. Start a new session and describe the task you want the agent to handle. +## Common Commands -Typical flow (**Code mode**): - -- Pick or switch a workspace from the sidebar. -- Describe the task in the composer. -- Watch reasoning, tool calls, command execution, and file changes as they happen. -- Allow or deny actions that require approval. -- Inspect changes in the review panel before deciding what to do next. - -See [Workbench And Entry Points](#workbench-and-entry-points) above for Connect phone and Write details. Quick start: - -- **Connect phone**: enable background automation in Settings → add a Feishu / Lark / WeChat connection → configure agent profile, model, and workspace → optionally enable webhook / relay or scheduled tasks. -- **Write**: switch to Write mode → use the default writing space or add a new one → write in the Live editor with completion, selection inline agent, and the right-side writing assistant. - -## Usage and Settings - -Settings manages: - -- DeepSeek API key, Base URL, runtime port, and runtime token. -- Auto-start for the local runtime, plus optional custom `deepseek` path. -- Tool approval policy and filesystem access mode. -- Default workspace, language, theme, font size, and completion notifications. -- GUI updates and local error logs. -- Skill creation, Skill folders, and MCP config editing. -- Connect phone automation, Feishu / Lark / WeChat connections, webhook / relay settings, and scheduled tasks. - -Keyboard shortcuts: - -| Key | Action | +| Command | Description | | --- | --- | -| `Enter` | Send message | -| `Shift+Enter` | Newline in composer | -| `Ctrl+Enter` | Send message | -| `Esc` | Close a panel or dismiss the current overlay | - -## Write Mode Design Notes - -Write mode extends DeepSeek GUI from a code/chat workbench into a long-form writing workspace. Its implementation borrows several ideas from the local `openhanako` reference project: - -- Markdown live editing: openhanako inspired the CodeMirror decorations approach where the active line stays editable as Markdown source while inactive lines render headings, tasks, images, dividers, and tables through widgets. -- Selection inline agent: openhanako inspired the selection-capture and floating-input interaction, so selected text can be sent with file path, line numbers, and bounded original text as structured context. -- AI session isolation: Write uses Kun threads, but the GUI keeps a local write thread registry per writing space so write conversations do not pollute Code / Connect phone sidebars. -- Text completion: writing completion bypasses the local Kun serve runtime (**Kun** is the bundled local HTTP/SSE agent runtime, the single boundary between the GUI and the agent loop — see the [Runtime: Kun](#runtime-kun) section above for details) and calls the DeepSeek FIM Completion API directly for low-latency ghost text. Short completion uses a short debounce, small token budget, and strict local filtering; inspiration completion uses a longer pause, larger token budget, and only runs at line ends or paragraph boundaries. Before completion, the app builds a short-TTL lightweight index over Markdown / text files in the writing space, retrieves cross-document snippets with BM25 + keyword matching, and injects them as a hidden Markdown comment so terminology, facts, and style stay consistent. - ---- - -## Uninstall - -### Windows - -- Open Settings -> Apps -> Installed apps, find `DeepSeek GUI`, and uninstall it. -- Or uninstall from Control Panel -> Programs and Features. -- Or run the uninstaller from the installation directory. - -The Windows installer creates Start Menu and desktop shortcuts by default. It does not force a taskbar pin; pin it manually from the Start Menu if you want one. - -### macOS +| `npm run dev` | Build the Kun runtime and start the Electron dev app | +| `npm run build` | Production build | +| `npm run typecheck` | TypeScript type checking | +| `npm run lint` | ESLint checks | +| `npm run test` | Vitest tests | +| `npm run dist:mac` | Build macOS `.dmg` and `.zip` | +| `npm run dist:win` | Build the Windows NSIS installer | +| `npm run dist:linux` | Build the Linux AppImage | -- Move `DeepSeek GUI.app` from Applications to Trash. -- If macOS blocks the app on first open, right-click it in Finder and choose Open. -- For local unsigned builds, you can remove the quarantine attribute first: +## Configuration and Data -```bash -npm run mac:unquarantine -- '/Applications/DeepSeek GUI.app' -``` - -### Linux +- Preferences, sessions, logs, runtime config, and local runtime data stay on your machine by default. +- Model calls use the provider credentials you configure; provider presets are editable starting points. +- Code / Write / Connect Phone share the same `kun` runtime boundary for sessions, approvals, tools, and usage tracking. +- File writes, command execution, MCP tools, and media generation are governed by permissions and configuration. -- If you built a Linux package from source, delete the related `.AppImage` or installed files. -- If you manually created a desktop entry or shortcut, delete that too. +## Documentation Map -### Remove Local Data - -By default, uninstalling removes the app but keeps local settings, sessions, and runtime config so reinstalling is smoother. For a full cleanup, remove these paths if needed: - -| Platform | App data path | +| Doc | Contents | | --- | --- | -| macOS | `~/Library/Application Support/DeepSeek GUI` | -| Windows | `%APPDATA%\DeepSeek GUI` | -| Linux | `~/.config/DeepSeek GUI` | - -Kun data lives under `~/.deepseekgui/kun` or the configured Kun data dir. Check it before deleting, because it may contain sessions, MCP, or Skill settings you still need. - ---- - -## Updates - -- For regular users: check GUI updates in Settings or download the latest installer from [GitHub Releases](https://github.com/XingYu-Zhong/DeepSeek-GUI/releases). +| [kun/README.md](kun/README.md) | Kun runtime, CLI, environment variables, HTTP API | +| [docs/kun-architecture.en.md](docs/kun-architecture.en.md) | Runtime architecture and GUI integration | +| [docs/kun-cache-optimization.en.md](docs/kun-cache-optimization.en.md) | Cache optimization and token economy | +| [docs/model-provider-presets.md](docs/model-provider-presets.md) | Model provider presets | +| [docs/CONTRIBUTING.en.md](docs/CONTRIBUTING.en.md) | Contribution guide | +| [docs/DEVELOPMENT.en.md](docs/DEVELOPMENT.en.md) | Local development workflow | +| [SECURITY.md](SECURITY.md) | Security disclosure policy | ## Contributing -Contributions are welcome for bug fixes, UI/UX improvements, documentation, localization, build/release workflows, and runtime integration. +Bug fixes, UI/UX improvements, documentation, localization, build/release work, and runtime integration contributions are welcome. Project conventions: -- Day-to-day collaboration and integration happens on `develop`; stable releases land on `master`. -- Start features and fixes from the latest `develop`, preferably on a short-lived feature branch. -- Open pull requests into `develop` by default; maintainers merge reviewed changes into `master` for release. -- Align on scope first for larger or riskier changes. -- Run `npm run typecheck`, `npm run build`, and `npm run test` before opening a PR. -- Include a video or GIF when the UI changes. -- Include unit tests when project logic changes. -- Update both `README.md` and `README.en.md` when usage changes. - -See [CONTRIBUTING.md](./docs/CONTRIBUTING.md) and [DEVELOPMENT.md](./docs/DEVELOPMENT.md) for details. - -## Local Build - -```bash -npm run build # production build -npm run dist:mac # macOS packages -npm run dist:win # Windows installer (run on Windows) -npm run dist:linux # Linux AppImage -npm run release:mac # manual fallback for macOS release assets -npm run release:win # manual fallback for Windows release assets -``` - -For the full development workflow, see [DEVELOPMENT.md](./docs/DEVELOPMENT.md). - -## Documentation - -| Doc | Contents | -| --- | --- | -| [docs/kun-architecture.en.md](docs/kun-architecture.en.md) | Single-Kun runtime plan, GUI removal scope, HTTP/SSE contract, and legacy agent retirement notes | -| [docs/kun-cache-optimization.en.md](docs/kun-cache-optimization.en.md) | Kun cache optimization, token economy, MCP search, tool-output compaction, and usage savings | -| [docs/kun-contributing.en.md](docs/kun-contributing.en.md) | Kun contribution guide: hexagonal architecture, design patterns (Ports & Adapters / Functional Core Imperative Shell / event sourcing / explicit DI / composition root), four typical PR scenarios | -| [kun/README.md](kun/README.md) | Kun package: CLI, env, data dir, HTTP API | -| [CONTRIBUTING.en.md](docs/CONTRIBUTING.en.md) | Contribution guide | -| [DEVELOPMENT.en.md](docs/DEVELOPMENT.en.md) | Local development workflow | -| [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) | Community code of conduct | -| [SECURITY.md](SECURITY.md) | Security disclosure policy | - ---- +- Day-to-day integration happens on `develop`; stable releases land on `master`. +- Open pull requests into `develop` by default. +- Before opening a PR, run `npm run typecheck`, `npm run build`, and `npm run test` when possible. +- External contributions require acceptance of the [Contributor License Agreement](./CLA.md). ## Thanks -Kun stands on the shoulders of prior projects: - -- **Reasonix** — the cache-first agent loop. `ImmutablePrefix` (with sha256 fingerprint) and its explicit mutation API, `AppendOnlySessionLog` (in-memory window + JSONL on disk), `LruCache` / `TtlLruCache`, `InflightTracker` with `finally`-block cleanup, `SteeringQueue` for mid-turn user guidance, `ContextCompactor` that preserves pinned constraints, and `UsageCounter` + `CacheTelemetry` are direct TypeScript ports and refinements of Reasonix's design prototypes. Reasonix's split between reasoning events and assistant text, the `tool_call` / `tool_result` pairing via `callId`, and the usage replay pattern also flow directly into the Kun event contract. - -We are also grateful to: +Thanks to [LobsterAI](https://github.com/netease-youdao/LobsterAI), DeepSeek, Xiaomi MiMo, MiniMax, and everyone who contributes issues, ideas, code, and documentation. -- **[LobsterAI](https://github.com/netease-youdao/LobsterAI)**: its IM management, QR binding, agent binding, and customizable agent-profile flows inspired the Connect phone integration in this project. -- **OpenHanako**: its Markdown live editing, writing-space, and selection inline-agent patterns heavily informed Write mode. -- **[DeepSeek](https://github.com/deepseek-ai)**: for the models and API. -- Everyone who contributes issues, ideas, code, and documentation to DeepSeek GUI. - - -
-
+
+ 探索需求先行的下一代 coding 范式。
+ 用 DeepSeek、Xiaomi MiMo、MiniMax 的高性价比组合,把需求澄清、设计稿、计划和 Agent 编码串成完整闭环。
+
+ English + · + 简体中文 + · + 下载 + · + 文档 + · + 源码运行 +
-> 把 Kun 的高 Token ROI 本地智能体能力带进桌面窗口:**Code** 处理项目、**写作**打磨文档、**连接手机**接入 IM 与定时任务——让每一个 token 尽量花在需求、代码、决策和结果上。 + -[官网](https://deepseek-gui.com) | [下载](https://deepseek-gui.com) +Kun 是一次面向未来编程方式的产品实验:不再从“给 Agent 一句话,让它直接改代码”开始,而是从需求澄清开始,把需求文档、设计稿、交互原型、实施计划、Todo、Agent 编码和变更审查放到一条连续的 GUI 工作流里。 -[](https://github.com/XingYu-Zhong/DeepSeek-GUI/releases) -[](./LICENSE) +Kun 面向希望把 AI Agent 真正放进日常工作的用户。它不是只聊天的客户端,也不是只给程序员的 CLI 外壳:你可以把本地目录交给它处理代码、需求、计划和变更审查,也可以在独立的 Write 工作区里写作、润色和导出文档。 -DeepSeek GUI 是一个面向开发者和高频 AI 工作者的本地桌面工作台。它以 Kun 为唯一运行时,把终端里的智能体体验整理成更容易上手、更适合长期使用的应用:选择工作目录,发起任务,实时查看推理、工具调用和文件改动,并在需要时审批或回退。 +这也是 Kun 为什么把 DeepSeek、Xiaomi MiMo、MiniMax 作为默认的一线模型组合,而不是把它们当成普通的“可选 Provider”。需求先行的 coding 范式会带来更多轮澄清、调研、结构化、规划、执行和验证,如果模型成本太高,这条流程很难成为日常工作方式。Kun 选择三家来自中国的高性价比模型供应商,正是为了让完整流程跑得起、用得久、试得多。 -这个项目的目标不是再造一个聊天壳,而是让 DeepSeek 变成一个可以稳定参与真实项目工作的桌面伙伴。Kun 的核心优势是高 Token ROI:同样的上下文预算,少浪费在重复前缀、庞大工具目录和失控输出上,多投入到真正推动任务完成的信息里。 +Kun 内置同名本地运行时,通过 `kun serve` 连接桌面端。会话、日志、偏好设置和运行时配置默认保存在本机;模型请求使用你自己的模型服务凭据。对会读写文件和执行命令的流程,Kun 提供工具审批、权限模式、内联 diff 和变更审查面板。 --- - -## 更多演示 - -
-
-
-
-
飞书 / Lark / 微信连接演示。
- -
-
-
+
新建需求与计划演示。
-
-
-
-
-
Web 工具演示。
+## 需求先行的 coding 范式 -## Kun 为什么 Token ROI 高 +Kun 想探索的是“需求 -> 设计 -> 计划 -> 编码 -> 验证”的下一代编程工作流,而不是把一个聊天框简单贴到 IDE 上。 -Kun 把“省 token”做成 agent loop 的默认行为,而不是事后补救。它不只是压缩文本,更是在每一轮调用前判断哪些信息值得进入上下文。 - -| Kun 优势 | Token ROI 来源 | +| 阶段 | Kun 的尝试 | | --- | --- | -| **Cache-first agent loop** | 稳定 system prompt、工具 schema 和不可变前缀,让 DeepSeek 原生缓存更容易命中,长会话不必反复为同一段背景付费。 | -| **按需工具上下文** | MCP 工具很多时,先用 `mcp_search` 找相关工具,再描述和调用目标工具,避免每轮把完整工具目录塞进 prompt。 | -| **上下文卫生** | 对超长工具结果、长参数、base64 payload、重复工具循环和低价值历史做边界压缩,保留代码、路径、错误、决策和未解决事项。 | -| **可见的用量收益** | 运行时跟踪 cache hit/miss、token 用量和节省估算,GUI 会把 Token economy 的收益显示出来,方便长期观察成本回报。 | - -结果是:Kun 更适合真实项目里的长任务、长会话和多工具协作。它把模型注意力留给高价值上下文,让用户用同样的 API 预算换到更多有效推进。 - -## 我们做了什么 - -- 把 Kun 本地运行时封装进桌面应用,默认可以自动启动和管理。 -- 做了一套完整的聊天工作台,支持多会话、实时流式输出、历史回看、中断和重新发送。 -- 打通本地工作目录,让智能体可以围绕真实项目读取、编辑和创建文件。 -- 做了文件变更审查视图,让每一次修改都能被看见、理解和确认。 -- 做了首次引导、设置页、语言/主题/字体大小、系统通知、错误日志和更新入口。 -- 做了 Skill 与 MCP 的图形化管理,让用户不用手写很多配置也能扩展智能体能力。 -- 做了连接手机能力,支持飞书 / Lark / 微信接入、独立 IM Agent、本地 webhook / relay 和定时任务。 -- 做了 Write 写作工作台,提供独立写作空间、Markdown 文件树、live 编辑/预览、文本补全和选中文本 inline agent。 -- 做了新建需求、计划面板、线程 Todo、目标追踪和代码审查,让任务可以从想法走到执行再走到复盘。 -- 提供 macOS、Windows、Linux 预构建安装包;也可以从源码自行构建。 - -## 功能亮点 - -- **桌面聊天工作台**:多会话、流式回复、推理过程、工具调用、审批请求和文件改动都在同一个界面中展示。 -- **项目级工作区**:为每个任务选择本地目录,按工作区管理会话,并支持文件预览、编辑器打开和 Git 分支选择。 -- **新建需求**:先写需求草稿(背景、目标、验收标准),让需求 AI 帮忙澄清问题和补齐调研,再一键生成实施计划。 -- **计划与 Todo**:`/plan` 或新建需求都会生成可编辑的计划文件,右侧计划面板会同步线程 Todo,方便把长任务拆成可跟踪步骤。 -- **目标模式**:`/goal` 可以给当前会话设置长期目标,支持暂停、继续、清除和完成状态,让 agent 持续围绕同一个结果推进。 -- **代码审查**:`/review` 可审查当前未提交改动,也可以指定 base branch、commit 或自定义审查范围,结果以 findings 卡片呈现。 -- **旁支对话与会话管理**:`/btw` 可开启继承当前上下文的旁支对话;会话还支持压缩、分叉、归档和恢复。 -- **变更审查**:内联 diff 和侧边审查面板会记录智能体产生的文件改动,便于在应用内完成 review。 -- **权限可控**:支持只读、工作区可写、完全访问等模式,并可配置工具调用前是否需要审批。 -- **运行时托管**:默认使用内置 Kun;也可以在设置中指定自己的 `kun` 可执行文件。 -- **Skill 与 MCP**:在图形界面中创建 Skill、保存 MCP 配置、添加常用工具,并打开对应目录继续管理。 -- **可开关的 agent 扩展能力**:Kun 通过配置开关逐步启用 MCP、Web fetch/search、Skills、独立 CLI、图片附件、跨会话 Memory 和子 agent 委派;设置页会显示运行时实际上报的能力与诊断状态。 -- **连接手机**:可开启独立于普通聊天的后台 Agent,当前支持飞书 / Lark / 微信接入、IM webhook / relay,以及按计划自动执行任务。 -- **定时任务**:创建一次性、每日、间隔或手动任务,指定工作区、模型和推理强度,让 Kun 在电脑唤醒时自动执行。 -- **Write 写作模式**:独立管理 `~/.deepseekgui/write_workspace` 和自定义写作空间,读取 Markdown 文件树,支持 live Markdown 编辑、相对图片预览、DeepSeek FIM 短补全 / 灵感长补全(可用跨文本 BM25 + 关键词检索增强)、当前文档导出为 `HTML / PDF / DOC / DOCX`,以及选中文本后直接唤起 inline 写作助手。 -- **高 Token ROI**:Kun 会稳定 prompt 前缀、跟踪 DeepSeek 原生缓存命中、按需压缩上下文和工具输出,并用 MCP search 渐进发现工具,把 token 留给需求、代码、决策和结果。 -- **首次配置友好**:首次启动会引导你选择语言、填写 DeepSeek API Key,并按需配置兼容服务地址。 -- **本地优先**:设置、会话状态、日志和运行时配置保存在本机;模型调用使用你自己的 DeepSeek API Key。 -- **中英文界面**:应用和 README 均提供中文、英文版本,界面语言可随时切换。 -- **跨平台使用**:提供 macOS `.dmg/.zip`、Windows `.exe`、Linux `.AppImage`;也可以从源码构建。 - -## 运行时:Kun - -DeepSeek-GUI 当前唯一活跃的本地 Agent 运行时是仓库自带的 -**Kun**(位于 `kun/` 目录)。Kun 取意于《庄子·逍遥游》中的 -“北冥有鱼,其名为鲲”:它不是一个临时聊天壳,而是希望把模型能力沉到 -更深的本地运行时里,让它能承载更长的上下文、更复杂的工具调用和更持续的 -项目协作。技术上,Kun 是一个独立的 TypeScript 包,启动本地 HTTP/SSE -服务作为 GUI 与 agent loop 之间的唯一边界。 - -Kun 的核心理念是提高每一个 token 的 ROI。对用户来说,同样的上下文预算 -应该尽量花在需求、代码、决策和结果上,而不是重复的工具 schema、失控的 -工具输出、无效历史或已经可以被缓存复用的前缀上。它适合的不是一次性问答, -而是反复读写项目、持续调用工具、需要长期上下文的真实工作流。 - -Kun 集成了已被验证的设计: - -- **借鉴自 Reasonix 的 cache-first agent loop**:immutable prompt prefix(带 sha256 指纹)、append-only session log、bounded TTL/LRU cache、inflight tracking with guaranteed cleanup、mid-turn steering queue、context compaction(保留 pinned constraints)、cache / usage telemetry。 -- **Token economy 与工具上下文优化**:稳定系统前缀与工具 schema,按 DeepSeek 原生字段统计 cache hit/miss;对超长工具结果、长参数、base64 payload 和重复工具循环做边界压缩或抑制;当 MCP 工具很多时,可用 `mcp_search` / `mcp_describe` / `mcp_call` 渐进发现和调用工具,避免一次性把庞大的 MCP 工具目录全部塞进 prompt。 - -> 致谢:感谢 Reasonix 团队提供的可运行参考。Kun -> 的几乎全部性能特征——cache hit 率、token replay、断线重连、 -> 审批中断——都可以追溯到该项目。具体设计取舍与借鉴映射 -> 详见 [`docs/kun-architecture.md`](docs/kun-architecture.md)。 - -如果你想专门了解 Kun 如何做缓存优化,包括稳定前缀、工具 schema -规范化、DeepSeek 原生 hit/miss 统计、tool pair healing 和验证方法, -可以直接阅读 -[`docs/kun-cache-optimization.md`](docs/kun-cache-optimization.md)。 - -Kun 的大块 agent 能力采用 feature flag 管理:`capabilities.mcp` -接入第三方 MCP server,`capabilities.web` 暴露 `web_fetch` / -`web_search`,`capabilities.skills` 发现 `skill.json` 与 legacy -`SKILL.md`,`capabilities.attachments` 支持图片附件和文本模型 fallback,`capabilities.memory` -启用跨会话记忆,`capabilities.subagents` 允许有预算上限的子 agent -委派。`kun run` / `kun chat` / `kun exec` 可脱离 GUI 运行;GUI 的设置页 -会读取 `/v1/runtime/info` 与 `/v1/runtime/tools` 展示实际可用状态。 -这些能力默认按配置关闭或受模型能力限制,完整配置示例和排障说明见 -[`kun/README.md`](kun/README.md)。 - -技术架构(简化版): - -```text -Renderer (React) - → KunRuntimeProvider - → preload: dsGui.runtimeRequest / startSse - → main: LocalHttpRuntimeAdapter - → kun serve (HTTP + SSE) - → cache-first AgentLoop -``` - -设置项在 **设置 → Agent 运行时** 里维护:binary path、port、 -auto-start、API key、base URL、runtime token、data dir、model、 -approval policy、sandbox mode、insecure 开关。如果之前保存过旧 -provider,settings 会在读取时迁移到 `agents.kun`,再次保存后 -只保留 Kun 配置。 +| **澄清需求** | 在 GUI 中新建需求草稿,让需求 AI 帮你补问题、做实现前调研、整理边界 | +| **沉淀文档** | 把草稿保存为 `.kunsdd/draft/.../requirement.md`,支持结构化需求块、验收标准和需求历史 | +| **生成设计** | 从需求片段生成 UI 设计稿、信息图或交互式 HTML 原型,让需求不只停留在文字里 | +| **形成计划** | 通过 `/plan` 和 `create_plan` 生成 GUI 管理的 `.kunsdd/plan/...` 实施计划,并把计划步骤和需求关联 | +| **Agent 编码** | 计划进入 Todo、文件编辑、命令执行和变更审查;需求变更后可以提示重规划,避免计划和需求脱节 | +| **回到验收** | 结合需求块、验收标准、计划状态和 `/review`,把“做完了吗”落回最初的需求 | -完整的端点、CLI flag、环境变量、data dir 布局、SSE 事件 schema -见 [`kun/README.md`](kun/README.md)。 +这条线是 Kun 最重要的产品方向:让 AI coding 从“即时问答”走向“需求驱动的软件生产流程”。模型、写作、计划、审查和自动化都围绕这条线服务。 -## 适合谁 +## 核心模型组合 -- 想用 DeepSeek 处理真实代码库,但不想一直留在终端里的开发者。 -- 希望清楚看到智能体做了什么、改了哪些文件、哪些操作需要批准的团队。 -- 需要长期维护多个项目、多个会话,并希望把 Skill/MCP 配置沉淀下来的用户。 -- 想用本地工作台连接 DeepSeek 官方 API 或 OpenAI 兼容服务的人。 +Kun 追求的是“完整能力 + 极致性价比”。需求先行的流程比普通聊天更长,也更依赖反复调用模型;首启和设置页围绕三家中国模型供应商组织,让用户可以用更低的模型成本覆盖更多 Agent 场景。 ---- +| 供应商 | 在 Kun 中的角色 | +| --- | --- | +| **DeepSeek** | 默认文本与推理主模型,提供 `deepseek-v4-pro` / `deepseek-v4-flash`,支撑代码、计划、审查、长上下文会话和自动模型路由 | +| **Xiaomi MiMo** | 高性价比多模态与语音入口,覆盖长上下文文本模型、视觉输入、ASR 语音转写、TTS 语音生成和 Token Plan | +| **MiniMax** | 补齐完整媒体生成能力,覆盖 Anthropic Messages 文本模型、图片生成、语音生成、音乐生成、视频生成和 Token Plan | -## 工作台与入口 +这套组合让 Kun 可以把不同任务分配给更合适的能力:轻量澄清走高速模型,复杂代码和推理走更强模型,需求文档和 IM 场景接入语音,设计与创作场景接入图片、音乐和视频。你仍然可以添加 OpenAI 兼容、自托管或其他自定义 Provider,但 Kun 的默认体验会优先围绕这三家高性价比模型服务展开。 -DeepSeek GUI 现在以 **Code** 和 **写作** 两个主工作台为核心,并提供 -**连接手机**、**定时任务**、**插件 / Skill / MCP** 等入口。它们共享同一套 -Kun 运行时与设置,但会话、工作区和界面布局彼此独立,可按任务随时切换。 +## 为什么选择 Kun -### Code 模式 +| 你想要 | Kun 提供 | +| --- | --- | +| 探索下一代 coding 范式 | 从需求澄清、需求文档、设计稿、实施计划一路走到 Agent 编码和验收 | +| 极致性价比的完整 Agent 能力 | 以 DeepSeek、Xiaomi MiMo、MiniMax 为核心组合,覆盖文本、推理、视觉、语音、图片、音乐和视频 | +| 让 AI 面向真实项目工作 | 绑定本地工作区,读写文件、搜索代码、执行命令、查看工具调用和结果 | +| 把需求推进到可执行计划 | 支持新建需求、`/plan`、Todo、`/goal`、旁支对话、会话压缩、分叉和归档 | +| 让改动保持可控 | 工具审批、文件系统权限模式、内联 diff、变更审查面板和 `/review` | +| 在同一个应用里写作 | Markdown 文件树、Live / Source / Split / Preview、多种导出格式、选区 inline agent | +| 离开电脑也能触发任务 | 飞书 / Lark / 微信连接、本地 webhook / relay、一次性或周期性定时任务 | +| 不被单一模型绑定 | 三家核心供应商之外,也支持自定义 Base URL、协议、模型列表和扩展能力 | + +## 核心能力 + +- **需求先行 coding**:新建需求草稿,AI 澄清和结构化需求,生成设计稿或交互原型,再进入实施计划、Todo、Agent 编码和验收。 +- **Code 工作台**:围绕真实代码库对话,读取项目上下文,执行 shell 命令,修改文件,并在提交前审查每一次变更。 +- **需求、计划与审查**:从需求草稿进入计划,再到 Todo、执行、复盘和代码审查;长会话可以压缩、恢复、分叉或归档。 +- **Write 写作模式**:独立 Markdown 工作区,支持文件树、预览模式切换、补全、选区改写、图片附件,以及 `HTML / PDF / DOC / DOCX` 导出。 +- **自动化与远程入口**:把桌面会话接到飞书 / Lark / 微信等 IM,支持本地 webhook、relay 和定时任务,让后台任务也能回到同一套 Agent loop。 +- **模型组合优先**:围绕 DeepSeek、Xiaomi MiMo、MiniMax 设计首启、Provider 预设和能力自动接线,用高性价比模型组合承担完整桌面 Agent 工作流。 +- **多模态与媒体能力**:支持图片附件、视觉输入、语音转写、图片生成、语音生成、音乐生成和视频生成;相关能力随 Provider 配置启用。 +- **MCP 与 Skills**:接入 Model Context Protocol 服务器,加载项目或全局 Skills,让 Kun 按任务获得更专门的工具和工作方式。 +- **本地运行时**:`kun serve` 提供 HTTP/SSE 边界,采用 cache-first agent loop、追加式事件日志、用量统计和上下文压缩策略。 -面向真实代码库的开发工作台:绑定本地项目目录,围绕仓库读写文件、执行命令、审查改动。 +## 更多演示
-
+
+
+
PDF 研究与资料整理演示
-
+
+
+
需求澄清、需求文档与计划演示
-
+
+
+
iKun UI 插件演示
-- 为飞书 / Lark / 微信等渠道配置独立 Agent,分别设定人设、默认模型与工作目录。 -- 每个 IM Agent 拥有独立会话线程,可在 GUI 内直接调试回复与工具调用。 -- 支持本地 webhook / relay,适合把 DeepSeek 接到团队协作或个人自动化流程中。 -- 定时任务可设置一次性、每日、间隔或手动运行,任务会创建独立 Kun thread,并按配置发送 prompt。 +## 快速开始 ---- +### 路径 A:下载发布版 -## 下载安装 +前往 [GitHub Releases](https://github.com/KunAgent/Kun/releases) 下载最新版本。 -### 下载预构建安装包 +| 平台 | 安装包 | 架构 | +| --- | --- | --- | +| macOS | `.dmg` 或 `.zip` | Intel / Apple Silicon | +| Windows | `.exe`,NSIS 安装器 | x64 | +| Linux | `.AppImage` | x64 | -前往 [GitHub Releases](https://github.com/XingYu-Zhong/DeepSeek-GUI/releases) 下载最新版本: +首次启动时: -| 平台 | 安装包 | -| --- | --- | -| macOS | `.dmg` 或 `.zip`,支持 Intel 与 Apple Silicon | -| Windows | `.exe`,NSIS 安装器,x64 | -| Linux | `.AppImage`,x64 | +1. 选择界面语言。 +2. 选择模型服务并填写 API Key 或 Token Plan Key。 +3. 如需兼容服务,在设置里编辑 Base URL、协议和模型列表。 +4. 进入 Code 绑定本地项目,或进入 Write 创建写作工作区。 -首次启动时需要填写 [DeepSeek API Key](https://platform.deepseek.com/api_keys)。如果你使用兼容 DeepSeek / OpenAI 的服务,也可以在设置里修改 Base URL。 +### 路径 B:从源码运行 -### 从源码运行 +环境要求: -适合贡献者或需要本地开发的人: +| 依赖 | 版本 | +| --- | --- | +| Node.js | 20+ | +| npm | 随 Node.js 安装 | +| 模型服务凭据 | DeepSeek / Xiaomi MiMo / MiniMax / 自定义 Provider 至少一个 | ```bash -git clone https://github.com/XingYu-Zhong/DeepSeek-GUI.git -cd DeepSeek-GUI +git clone https://github.com/KunAgent/Kun.git +cd Kun npm install npm run dev ``` -环境要求: - -- Node.js 20+ -- 可用的 DeepSeek API Key -- 首次安装依赖时需要联网 - 中国大陆访问较慢时,可以使用 npm 镜像: ```bash npm install --registry=https://registry.npmmirror.com ``` ---- - -## 首次使用 +## 常用命令 -1. 打开 DeepSeek GUI。 -2. 在首次引导中选择界面语言。 -3. 填入 DeepSeek API Key;如果需要,设置自定义 Base URL。 -4. 选择默认工作目录,或使用应用自动创建的默认目录。 -5. 新建会话,输入任务,让智能体开始工作。 - -常用流程(**Code 模式**): - -- 在左侧选择或切换工作区。 -- 在聊天框描述你要完成的任务。 -- 观察回复中的推理、工具调用、命令执行和文件改动。 -- 对需要审批的操作选择允许或拒绝。 -- 在变更审查面板里检查改动,再决定下一步。 - -**连接手机** 与 **写作** 的详细说明见上文 [工作台与入口](#工作台与入口)。简要步骤: - -- **连接手机**:在设置页启用后台自动化 → 添加飞书 / Lark / 微信连接 → 配置 Agent 人设、模型与工作目录 → 按需开启 webhook / relay 或定时任务。 -- **Write**:切换到 Write 模式 → 使用默认写作空间或添加新空间 → 在 Live 编辑器中写作,配合补全、选区 inline agent 与右侧写作助手。 - -## 设置与使用 - -设置页集中管理这些内容: - -- DeepSeek API Key、Base URL、运行时端口和运行时 Token。 -- 是否自动启动本地运行时,以及是否使用自定义 `deepseek` 路径。 -- 工具审批策略和文件系统权限范围。 -- 默认工作目录、语言、主题、字体大小和完成通知。 -- GUI 更新和本地错误日志。 -- Skill 创建与目录管理、MCP 配置编辑。 -- 连接手机后台自动化、飞书 / Lark / 微信连接、Webhook / Relay 和定时任务。 - -快捷键: - -| 按键 | 功能 | +| 命令 | 说明 | | --- | --- | -| `Enter` | 发送消息 | -| `Shift+Enter` | 在输入框中换行 | -| `Ctrl+Enter` | 发送消息 | -| `Esc` | 关闭面板或退出当前浮层 | - -## Write 模式设计参考 - -Write 模式的目标是把 DeepSeek GUI 从“代码/聊天工作台”扩展成真正可长期写作的桌面工作区。实现时参考了本地 `openhanako` 项目中的几个方案: - -- Markdown live 编辑:借鉴 openhanako 的 CodeMirror decorations 思路,当前行保留 Markdown 源码,非当前行用装饰层渲染标题、任务项、图片、分割线和表格。 -- 选区 inline agent:借鉴 openhanako 的选区捕获与浮动输入框交互,用户选中文本后可以直接输入“润色/续写/分析”等指令,并把文件路径、行号和原文作为结构化引用交给写作助手。 -- AI 会话隔离:Write 使用 Kun thread,但在 GUI 本地按写作空间维护 write thread registry,避免写作会话污染 Code / 连接手机侧栏。 -- 文本补全:写作补全不走本地 Kun serve(**Kun** 是仓库自带的本地 HTTP/SSE Agent 运行时,唯一负责 GUI 与 agent loop 之间的通信,详见上一节「运行时:Kun」),而是直接调用 DeepSeek FIM Completion API,方便在纯写作场景里获得低延迟 ghost text。短补全使用较短 debounce、较小 token 预算和严格本地过滤;灵感长补全使用更长停顿触发、更大 token 预算,并只在行尾 / 段落边界工作。补全前会对写作空间内的 Markdown / 文本文件建立短 TTL 轻量索引,使用 BM25 + 关键词匹配召回跨文本片段,并以隐藏 Markdown comment 的形式注入 prompt,帮助模型保持术语、事实和风格连续性。 - ---- - -## 卸载 - -### Windows - -- 打开“设置 -> 应用 -> 已安装的应用”,找到 `DeepSeek GUI` 并卸载。 -- 或在“控制面板 -> 程序和功能”中卸载。 -- 也可以运行安装目录中的卸载程序。 - -Windows 安装器默认会创建开始菜单和桌面快捷方式。安装包不会强制固定到任务栏;如需固定,可在开始菜单中右键 `DeepSeek GUI` 并选择固定。 - -### macOS - -- 将 `DeepSeek GUI.app` 从“应用程序”移到废纸篓。 -- 如果首次打开被系统拦截,可在 Finder 中右键应用并选择“打开”。 -- 本地未公证构建可先运行: - -```bash -npm run mac:unquarantine -- '/Applications/DeepSeek GUI.app' -``` - -### Linux +| `npm run dev` | 构建 Kun 运行时并启动 Electron 开发环境 | +| `npm run build` | 生产构建 | +| `npm run typecheck` | TypeScript 类型检查 | +| `npm run lint` | ESLint 检查 | +| `npm run test` | 运行 Vitest 测试 | +| `npm run dist:mac` | 构建 macOS `.dmg` 和 `.zip` | +| `npm run dist:win` | 构建 Windows NSIS 安装器 | +| `npm run dist:linux` | 构建 Linux AppImage | -- 如果你是从源码构建的 Linux 包,删除对应的 `.AppImage` 或安装文件即可。 -- 如果你手动创建了桌面入口或快捷方式,也一并删除。 +## 配置与数据 -### 清理本地数据 +- 偏好设置、会话、日志、运行时配置和本地运行时数据默认保存在本机。 +- 模型调用通过你配置的 Provider 凭据发起;Provider 预设可以作为起点,字段仍可编辑。 +- Code / Write / 连接手机共用同一个 `kun` 运行时边界,便于复用会话、审批、工具和用量统计。 +- 文件读写、命令执行、MCP 工具、媒体生成等高权限能力会经过权限与配置控制。 -默认卸载只移除应用文件,会保留本地设置、会话和运行时配置,便于后续重装恢复。若要彻底清理,可按需删除: +## 文档地图 -| 平台 | 应用数据位置 | +| 文档 | 内容 | | --- | --- | -| macOS | `~/Library/Application Support/DeepSeek GUI` | -| Windows | `%APPDATA%\DeepSeek GUI` | -| Linux | `~/.config/DeepSeek GUI` | - -Kun 数据默认位于 `~/.deepseekgui/kun` 或应用数据目录下的 Kun data dir。删除前请确认其中没有你还需要的会话、MCP 或 Skill 配置。 - ---- - -## 更新 - -- 普通用户:可在设置页检查 GUI 更新,或前往 [GitHub Releases](https://github.com/XingYu-Zhong/DeepSeek-GUI/releases) 下载最新安装包。 +| [kun/README.zh-CN.md](kun/README.zh-CN.md) | Kun 运行时、CLI、环境变量、HTTP API | +| [docs/kun-architecture.md](docs/kun-architecture.md) | 单运行时架构与 GUI 集成 | +| [docs/kun-cache-optimization.md](docs/kun-cache-optimization.md) | 缓存优化、token economy 与可观测性 | +| [docs/model-provider-presets.md](docs/model-provider-presets.md) | 模型 Provider 预设与扩展能力 | +| [docs/DEVELOPMENT.zh-CN.md](docs/DEVELOPMENT.zh-CN.md) | 本地开发流程、分支策略和发布说明 | +| [docs/CONTRIBUTING.zh-CN.md](docs/CONTRIBUTING.zh-CN.md) | 贡献说明 | +| [SECURITY.zh-CN.md](SECURITY.zh-CN.md) | 安全漏洞披露方式 | -## 贡献指南 +## 贡献 欢迎提交 bug 修复、UI/UX 优化、文档改进、本地化内容、构建发布流程和运行时集成相关改动。 协作约定: - 日常协作与集成分支为 `develop`,稳定发布分支为 `master`。 -- 新功能和修复建议从最新 `develop` 拉出短期功能分支开始。 -- PR 默认提交到 `develop`,由维护者审核后再由维护者合入 `master` 发布。 -- 对高风险改动请先沟通范围,再进入实现。 -- 发起 PR 前运行 `npm run typecheck`、`npm run build`,以及 `npm run test`。 -- 如果改动影响界面,请附上视频或 GIF。 -- 如果改动影响项目逻辑,请附上对应单元测试。 -- 如果改动影响使用方式,请同步更新 `README.md` 和 `README.en.md`。 - -详见 [CONTRIBUTING.zh-CN.md](./docs/CONTRIBUTING.zh-CN.md) 和 [DEVELOPMENT.zh-CN.md](./docs/DEVELOPMENT.zh-CN.md)。 - -## 本地构建 - -```bash -npm run build # 生产构建 -npm run dist:mac # macOS 安装包 -npm run dist:win # Windows 安装包(在 Windows 上运行) -npm run dist:linux # Linux AppImage -npm run release:mac # 手动兜底:构建并上传 macOS release 资源 -npm run release:win # 手动兜底:构建并上传 Windows release 资源 -``` - -更多开发流程请看 [DEVELOPMENT.zh-CN.md](./docs/DEVELOPMENT.zh-CN.md)。 - -## 文档 - -| 文档 | 内容 | -| --- | --- | -| [docs/kun-architecture.md](docs/kun-architecture.md) | Kun 单运行时方案、GUI 拆改范围、HTTP/SSE 合约、旧 agent 拆除说明 | -| [docs/kun-cache-optimization.md](docs/kun-cache-optimization.md) | Kun 缓存优化、token economy、MCP search、工具输出压缩与用量收益统计 | -| [docs/kun-contributing.md](docs/kun-contributing.md) | Kun 贡献指南:六边形架构、设计模式(Ports & Adapters / Functional Core Imperative Shell / 事件溯源 / 显式 DI / Composition Root)、4 个典型 PR 场景 | -| [kun/README.md](kun/README.md) | Kun 包:CLI、env、data dir、HTTP API | -| [CONTRIBUTING.zh-CN.md](docs/CONTRIBUTING.zh-CN.md) | 贡献说明 | -| [DEVELOPMENT.zh-CN.md](docs/DEVELOPMENT.zh-CN.md) | 本地开发与协作流程 | -| [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) | 社区行为准则 | -| [SECURITY.md](SECURITY.md) | 安全漏洞披露方式 | - ---- +- PR 默认提交到 `develop`。 +- 发起 PR 前建议运行 `npm run typecheck`、`npm run build` 和 `npm run test`。 +- 外部贡献需接受 [Contributor License Agreement](./CLA.md)。 ## 致谢 -Kun 的设计站在先行项目的肩膀上: +感谢 [LobsterAI](https://github.com/netease-youdao/LobsterAI)、DeepSeek、Xiaomi MiMo、MiniMax,以及所有提交 issue、建议、代码和文档的贡献者。 -- **Reasonix** —— cache-first agent loop。`ImmutablePrefix`(带 sha256 指纹)+ 显式 mutation API、`AppendOnlySessionLog`(in-memory 窗口 + JSONL 磁盘重放)、`LruCache` / `TtlLruCache`、带 `finally` 清理的 `InflightTracker`、`SteeringQueue`(mid-turn 用户引导)、`ContextCompactor`(保留 pinned constraints)、`UsageCounter` + `CacheTelemetry` —— 这些都是 Reasonix 设计原型的 TypeScript 复刻与改进。Reasonix 的 reasoning events 拆分流、tool call / result 配对、usage replay 等设计也直接延续到 Kun 的事件合约。 - -也感谢以下项目和个人: - -- **[LobsterAI](https://github.com/netease-youdao/LobsterAI)**:IM 管理、扫码绑定、Agent 绑定与自定义人设流程给了本项目连接手机能力很多启发。 -- **OpenHanako**:Markdown live 编辑、写作空间、选中文本 inline agent 等 Write 模式交互和实现方案给了本项目重要参考。 -- **[DeepSeek](https://github.com/deepseek-ai)**:提供模型与 API。 -- 所有为 DeepSeek GUI 提交 issue、建议、代码和文档的贡献者。 - - -`(无 shiki 高亮 NodeView)。
+- 中文 IME 与 ghost text 的交互需在真机专项手测。
+- `@tiptap/*` 固定 3.26.0;升级前必须重跑 `scripts/tiptap-roundtrip-audit.mjs`。
diff --git a/electron-builder.config.cjs b/electron-builder.config.cjs
index ce7be480..d7648a38 100644
--- a/electron-builder.config.cjs
+++ b/electron-builder.config.cjs
@@ -1,9 +1,17 @@
const { existsSync, readFileSync } = require('node:fs')
const { join } = require('node:path')
+// 品牌升级后构建环境变量改用 KUN_* 前缀;旧的 DEEPSEEK_GUI_* 仍然
+// 兼容读取,避免 CI / 本地发布脚本一刀切失效。
+function envWithLegacyFallback(kunName, legacyName) {
+ const value = process.env[kunName]
+ if (value !== undefined && value !== '') return value
+ return process.env[legacyName]
+}
+
function loadLocalReleaseEnv() {
const candidates = [
- process.env.DEEPSEEK_GUI_RELEASE_ENV,
+ envWithLegacyFallback('KUN_RELEASE_ENV', 'DEEPSEEK_GUI_RELEASE_ENV'),
join(__dirname, 'scripts', 'release.local.env'),
join(__dirname, 'release.local.env')
].filter(Boolean)
@@ -43,32 +51,45 @@ const hasNotaryToolCredentials = Boolean(
(process.env.APPLE_API_KEY || process.env.APPLE_API_KEY_BASE64)
)
-const r2PublicBaseUrl = (process.env.R2_PUBLIC_BASE_URL || 'https://deepseek-gui.com/api/r2')
+// R2 release prefix 维持旧值不动:线上老版本轮询的就是
+// `…/deepseek-gui/channels//latest/`,prefix 一改老客户端就再也
+// 收不到更新。默认公开域名优先使用 kun-agent,运行时仍会兜底旧域名。
+const r2PublicBaseUrl = (process.env.R2_PUBLIC_BASE_URL || 'https://www.kun-agent.com/api/r2')
.trim()
.replace(/\/+$/, '')
const r2ReleasePrefix = (process.env.R2_RELEASE_PREFIX || 'deepseek-gui')
.trim()
.replace(/^\/+|\/+$/g, '')
-const updateChannel = normalizeUpdateChannel(process.env.DEEPSEEK_GUI_UPDATE_CHANNEL || 'stable')
+const updateChannel = normalizeUpdateChannel(
+ envWithLegacyFallback('KUN_UPDATE_CHANNEL', 'DEEPSEEK_GUI_UPDATE_CHANNEL') || 'stable'
+)
const genericUpdateUrl = `${r2PublicBaseUrl}/${r2ReleasePrefix}/channels/${updateChannel}/latest/`
-const releaseAppVersion = (process.env.DEEPSEEK_GUI_APP_VERSION || '').trim()
+const releaseAppVersion = (
+ envWithLegacyFallback('KUN_APP_VERSION', 'DEEPSEEK_GUI_APP_VERSION') || ''
+).trim()
const artifactVersion = releaseAppVersion || '${version}'
function normalizeUpdateChannel(raw) {
const value = String(raw || '').trim()
if (value === 'stable' || value === 'frontier') return value
- throw new Error(`DEEPSEEK_GUI_UPDATE_CHANNEL must be "stable" or "frontier", got: ${raw}`)
+ throw new Error(`KUN_UPDATE_CHANNEL (or legacy DEEPSEEK_GUI_UPDATE_CHANNEL) must be "stable" or "frontier", got: ${raw}`)
}
if (releaseAppVersion && !/^\d+\.\d+\.\d+$/.test(releaseAppVersion)) {
throw new Error(
- `DEEPSEEK_GUI_APP_VERSION must be a valid x.y.z semver for electron-updater, got: ${releaseAppVersion}`
+ `KUN_APP_VERSION (or legacy DEEPSEEK_GUI_APP_VERSION) must be a valid x.y.z semver for electron-updater, got: ${releaseAppVersion}`
)
}
module.exports = {
+ // appId 永远保持旧值,即使品牌已改名 Kun:
+ // - macOS 端 Squirrel.Mac 校验更新包签名时锚定 bundle identifier,
+ // 换了 id 老版本会拒绝安装新版本;
+ // - Windows 端 NSIS 以 appId 派生卸载 GUID,换了 id 升级安装不会
+ // 卸载旧版本,用户会装出两份应用;
+ // - macOS TCC 权限、通知授权也都挂在这个 id 上。
appId: 'com.xingyuzhong.deepseekgui',
- productName: 'DeepSeek GUI',
+ productName: 'Kun',
asar: true,
asarUnpack: [
'**/kun/dist/**/*',
@@ -80,7 +101,7 @@ module.exports = {
],
npmRebuild: true,
directories: {
- output: process.env.DEEPSEEK_GUI_DIST_DIR || 'dist'
+ output: envWithLegacyFallback('KUN_DIST_DIR', 'DEEPSEEK_GUI_DIST_DIR') || 'dist'
},
files: [
'out/**/*',
@@ -94,10 +115,12 @@ module.exports = {
'!**/*.ts',
'!**/tsconfig*.json',
'!**/README*',
- '!**/CHANGELOG*',
- '!**/node_modules/openclaw/**/*'
+ '!**/CHANGELOG*'
+ // node_modules/openclaw (the vendor/openclaw-shim file: dep) must ship:
+ // the WeChat bridge imports @tencent-weixin/openclaw-weixin/dist at
+ // runtime to send media, and that chain resolves openclaw/plugin-sdk/*.
],
- artifactName: `DeepSeek-GUI-${artifactVersion}-\${os}-\${arch}.\${ext}`,
+ artifactName: `Kun-${artifactVersion}-\${os}-\${arch}.\${ext}`,
publish: [
{
provider: 'generic',
@@ -117,7 +140,12 @@ module.exports = {
gatekeeperAssess: false,
entitlements: 'build/entitlements.mac.plist',
entitlementsInherit: 'build/entitlements.mac.inherit.plist',
- icon: './src/asset/img/deepseek.png',
+ extendInfo: {
+ // 语音输入:渲染进程通过 getUserMedia 录音做语音转文字。
+ NSMicrophoneUsageDescription: 'Kun uses the microphone for voice-to-text input.'
+ },
+ // macOS 不会自动套圆角遮罩,图标文件本身需要是「圆角方块 + 透明边距」
+ icon: './src/asset/img/kun_mac.png',
// arm64 (Apple Silicon) + x64 (Intel). On M 系列 Mac 本地打包会各出一组 dmg/zip。
target: [
{ target: 'dmg', arch: ['arm64', 'x64'] },
@@ -128,7 +156,7 @@ module.exports = {
sign: hasExplicitMacSigningIdentity
},
win: {
- icon: './src/asset/img/deepseek.png',
+ icon: './src/asset/img/kun.png',
target: [{ target: 'nsis', arch: ['x64'] }]
},
nsis: {
@@ -140,13 +168,13 @@ module.exports = {
// 明确创建快捷方式;always 在覆盖安装时也会重建(即使用户曾删掉桌面图标)
createDesktopShortcut: 'always',
createStartMenuShortcut: true,
- shortcutName: 'DeepSeek GUI',
- uninstallDisplayName: 'DeepSeek GUI',
+ shortcutName: 'Kun',
+ uninstallDisplayName: 'Kun',
deleteAppDataOnUninstall: false
},
linux: {
category: 'Development',
- icon: './src/asset/img/deepseek.png',
+ icon: './src/asset/img/kun.png',
target: [{ target: 'AppImage', arch: ['x64'] }]
},
extraMetadata: {
diff --git a/examples/ui-plugins/starlight/img/bird.png b/examples/ui-plugins/starlight/img/bird.png
new file mode 100644
index 00000000..e69aeb30
Binary files /dev/null and b/examples/ui-plugins/starlight/img/bird.png differ
diff --git a/examples/ui-plugins/starlight/img/greet.png b/examples/ui-plugins/starlight/img/greet.png
new file mode 100644
index 00000000..0f32a399
Binary files /dev/null and b/examples/ui-plugins/starlight/img/greet.png differ
diff --git a/examples/ui-plugins/starlight/img/sit.png b/examples/ui-plugins/starlight/img/sit.png
new file mode 100644
index 00000000..ae69504c
Binary files /dev/null and b/examples/ui-plugins/starlight/img/sit.png differ
diff --git a/examples/ui-plugins/starlight/img/sleep.png b/examples/ui-plugins/starlight/img/sleep.png
new file mode 100644
index 00000000..4fba01c9
Binary files /dev/null and b/examples/ui-plugins/starlight/img/sleep.png differ
diff --git a/examples/ui-plugins/starlight/img/surf.png b/examples/ui-plugins/starlight/img/surf.png
new file mode 100644
index 00000000..e5ac9dee
Binary files /dev/null and b/examples/ui-plugins/starlight/img/surf.png differ
diff --git a/examples/ui-plugins/starlight/manifest.json b/examples/ui-plugins/starlight/manifest.json
new file mode 100644
index 00000000..963923fa
--- /dev/null
+++ b/examples/ui-plugins/starlight/manifest.json
@@ -0,0 +1,44 @@
+{
+ "id": "starlight",
+ "name": "星夜 Kun",
+ "version": "1.0.0",
+ "author": "Kun Team",
+ "description": "官方示例插件:深紫星夜配色的 Kun 形象,含主题色与进行中文案。",
+ "figures": {
+ "swim": "img/bird.png",
+ "surf": "img/surf.png",
+ "greet": "img/greet.png",
+ "sleep": "img/sleep.png",
+ "sit": "img/sit.png",
+ "toggleIcon": "img/greet.png"
+ },
+ "labels": {
+ "zh": {
+ "working": "巡航中…",
+ "workingSprint": "流星冲刺中…",
+ "workingDive": "潜入星海中…",
+ "workingSurf": "星浪滑行中…"
+ },
+ "en": {
+ "working": "Stargazing…",
+ "workingSprint": "Meteor dash…",
+ "workingDive": "Deep diving…",
+ "workingSurf": "Riding stardust…"
+ }
+ },
+ "tokens": {
+ "light": {
+ "--ds-accent": "#7a5fd0",
+ "--ds-accent-soft": "rgba(122, 95, 208, 0.15)",
+ "--ds-selection": "rgba(122, 95, 208, 0.2)"
+ },
+ "dark": {
+ "--ds-accent": "#a78ff0",
+ "--ds-accent-soft": "rgba(167, 143, 240, 0.2)",
+ "--ds-selection": "rgba(167, 143, 240, 0.26)"
+ }
+ },
+ "features": {
+ "cameos": true
+ }
+}
diff --git a/kun/README.md b/kun/README.md
index a1053ab0..28d4c8a9 100644
--- a/kun/README.md
+++ b/kun/README.md
@@ -1,6 +1,6 @@
# Kun
-Kun is the local HTTP/SSE agent runtime for DeepSeek-GUI. It exposes a
+Kun is the local HTTP/SSE agent runtime for the Kun desktop app. It exposes a
TypeScript-typed agent loop with a stable, GUI-friendly contract:
- `kun serve` starts a local HTTP server with `/v1/*` routes.
@@ -11,7 +11,7 @@ TypeScript-typed agent loop with a stable, GUI-friendly contract:
The name Kun is inspired by the great fish in Zhuangzi's line,
"In the northern sea there is a fish; its name is Kun." In
-DeepSeek-GUI, it means a deeper local runtime rather than a thin model
+this project, it means a deeper local runtime rather than a thin model
UI: one agent loop that can carry project context, call tools
reliably, resume sessions, and serve desktop chat, writing, phone
connections, and scheduled tasks.
@@ -272,6 +272,69 @@ Use `GET /v1/runtime/info` for the runtime capability manifest and
`GET /v1/runtime/tools` for redacted provider diagnostics. The GUI
Settings page reads both routes.
+## Hooks
+
+Hooks let external commands observe and intervene in the agent
+lifecycle without rebuilding Kun. They are configured under the
+top-level `hooks` key in `config.json` (so the GUI's
+`~/.deepseekgui/kun/config.json` works out of the box) and run inside
+the serve runtime — main loop, subagents, and CLI alike.
+
+```json
+{
+ "hooks": [
+ {
+ "phase": "PreToolUse",
+ "matcher": "bash|write_file|mcp__*",
+ "command": "node ~/.kun-hooks/guard.js",
+ "timeoutMs": 10000
+ },
+ { "phase": "UserPromptSubmit", "command": "~/.kun-hooks/prompt-context.sh" },
+ { "phase": "TurnEnd", "command": "~/.kun-hooks/notify.sh" }
+ ]
+}
+```
+
+Phases:
+
+- `PreToolUse` — before every tool call. May rewrite `arguments`, deny
+ the call, or auto-approve it (skip the approval prompt).
+- `PostToolUse` — after every tool call. May replace `output` or mark
+ the result as an error.
+- `UserPromptSubmit` — before the first model step of a turn. May deny
+ the turn or inject `additionalContext`, which is persisted as an
+ extra `` user message.
+- `TurnStart`, `TurnEnd`, `PreCompact` — observe-only notifications.
+ Failures surface as `hook_warning` runtime events and never break
+ the turn.
+
+Matching: `matcher` is a glob over the tool name (`*` wildcard, `|`
+alternation); `toolNames` is an exact-name list. Either match runs the
+hook; omit both to run on every tool. Lifecycle phases ignore matchers.
+
+Command protocol: the hook receives the invocation as JSON on stdin
+(`phase` plus phase-specific fields such as `call`, `result`, `prompt`,
+`status`, `reason`). Exit `0` parses stdout as a JSON result
+(`{"decision":"deny"}`, `{"arguments":{...}}`, `{"output":...}`,
+`{"additionalContext":"..."}`); plain-text stdout becomes
+`additionalContext` for `UserPromptSubmit` and a message elsewhere.
+Exit `2` blocks the action with stderr as the reason. Any other exit
+code is a non-blocking `hook_warning`. The default timeout is 60s
+(`timeoutMs` overrides); a timed-out hook fails the tool call closed
+but never blocks observe-only phases.
+
+Hooks chain in declaration order: each hook sees the call or result as
+rewritten by the hooks before it. Embedders that assemble the runtime
+programmatically can also pass in-process function hooks via the
+`hooks` option of `LocalToolHost` and `AgentLoop` (exported from
+`kun/hooks`).
+
+Command hooks execute arbitrary shell commands with the runtime's
+privileges — treat `config.json` as trusted input.
+
+See `../docs/kun-hooks.en.md` for the full reference: per-phase stdin
+payloads, result fields, failure semantics, and example hook scripts.
+
## Data directory layout
`--data-dir` is the on-disk root for everything the runtime owns:
@@ -413,7 +476,7 @@ stay local to one thread, leave it as a pinned constraint.
## GUI integration
-After the legacy provider retirement, the DeepSeek-GUI main process
+After the legacy provider retirement, the desktop app main process
starts Kun through `kun-process.ts` and routes all
`runtimeRequest` calls to the active base URL with a bearer token.
The renderer uses the same `AgentProvider` interface as the legacy
diff --git a/kun/README.zh-CN.md b/kun/README.zh-CN.md
index 7440c4d3..d165695a 100644
--- a/kun/README.zh-CN.md
+++ b/kun/README.zh-CN.md
@@ -1,12 +1,12 @@
# Kun
-Kun 是 DeepSeek-GUI 的本地 HTTP/SSE 代理运行时。它为 GUI 提供稳定、类型化且 GUI 友好的代理循环合约:
+Kun 是同名桌面应用的本地 HTTP/SSE 代理运行时。它为 GUI 提供稳定、类型化且 GUI 友好的代理循环合约:
- `kun serve` 会启动一个本地 HTTP 服务器,并暴露 `/v1/*` 路由。
- 线程、回合(turn)、事件、审批和用量都会以追加写入的 JSONL 日志持久化,并配合原子化索引更新。
- Agent 循环采用 cache-first 设计:不可变的 prompt 前缀、边界受限的 TTL/LRU 缓存、inflight 跟踪,以及显式上下文压缩。
-Kun 取意于《庄子·逍遥游》中的“北冥有鱼,其名为鲲”。在 DeepSeek-GUI
+Kun 取意于《庄子·逍遥游》中的“北冥有鱼,其名为鲲”。在本项目
里,它代表一个更深的本地运行时:不是把模型回复包一层 UI,而是让模型可以
长期携带项目上下文、稳定调用工具、恢复会话,并在桌面、写作、手机连接和
定时任务之间复用同一套 agent loop。
@@ -248,6 +248,61 @@ Kun 默认使用混合存储:`threads/{threadId}/messages.jsonl` 与 `events.j
在渲染端使用 `GET /v1/runtime/info` 获取运行时能力清单,使用
`GET /v1/runtime/tools` 查看 provider 诊断。GUI 设置页会读取这两条接口。
+## Hooks(钩子)
+
+Hooks 允许外部命令观察并干预 agent 生命周期,无需重新编译 Kun。在
+`config.json` 顶层 `hooks` 键下配置(GUI 默认的
+`~/.deepseekgui/kun/config.json` 直接生效),主循环、子代理和 CLI
+共用同一套 hook。
+
+```json
+{
+ "hooks": [
+ {
+ "phase": "PreToolUse",
+ "matcher": "bash|write_file|mcp__*",
+ "command": "node ~/.kun-hooks/guard.js",
+ "timeoutMs": 10000
+ },
+ { "phase": "UserPromptSubmit", "command": "~/.kun-hooks/prompt-context.sh" },
+ { "phase": "TurnEnd", "command": "~/.kun-hooks/notify.sh" }
+ ]
+}
+```
+
+阶段:
+
+- `PreToolUse` — 每次工具调用前。可改写 `arguments`、拒绝调用,或
+ 自动放行(跳过审批弹窗)。
+- `PostToolUse` — 每次工具调用后。可替换 `output` 或把结果标记为错误。
+- `UserPromptSubmit` — 回合首次模型调用前。可拒绝整个回合,或注入
+ `additionalContext`(持久化为一条 `` 用户消息)。
+- `TurnStart`、`TurnEnd`、`PreCompact` — 只读通知。失败只产生
+ `hook_warning` 运行时事件,绝不影响回合。
+
+匹配:`matcher` 是针对工具名的 glob(`*` 通配,`|` 多选);`toolNames`
+是精确名单。两者任一命中即运行;都省略则匹配所有工具。生命周期阶段
+忽略匹配器。
+
+命令协议:invocation 以 JSON 写入 stdin(`phase` 加各阶段字段,如
+`call`、`result`、`prompt`、`status`、`reason`)。退出码 `0` 时 stdout
+按 JSON 结果解析(`{"decision":"deny"}`、`{"arguments":{...}}`、
+`{"output":...}`、`{"additionalContext":"..."}`);纯文本 stdout 在
+`UserPromptSubmit` 中作为 `additionalContext`,其余阶段作为 message。
+退出码 `2` 阻断动作,stderr 为原因。其他非零退出码只产生非阻断的
+`hook_warning`。默认超时 60 秒(`timeoutMs` 可覆盖);工具阶段超时按
+失败关闭处理,只读阶段超时不会阻断。
+
+Hooks 按声明顺序链式执行:每个 hook 看到的是前面 hook 改写后的调用或
+结果。以库方式嵌入运行时的调用方还可以通过 `LocalToolHost` 与
+`AgentLoop` 的 `hooks` 选项传入进程内函数 hook(从 `kun/hooks` 导出)。
+
+命令 hook 以运行时权限执行任意 shell 命令——请把 `config.json` 当作
+可信输入对待。
+
+完整参考见 `../docs/kun-hooks.md`:各阶段 stdin 载荷、结果字段、
+失败语义与示例 hook 脚本。
+
## 数据目录布局
`--data-dir` 即运行时所管理的一切磁盘根目录:
diff --git a/kun/config.example.json b/kun/config.example.json
index a4c32632..58e8c515 100644
--- a/kun/config.example.json
+++ b/kun/config.example.json
@@ -112,5 +112,6 @@
"scopes": ["user", "workspace", "project"],
"maxInjectedRecords": 8
}
- }
+ },
+ "hooks": []
}
diff --git a/kun/package-lock.json b/kun/package-lock.json
index 022a8d8b..ca6f4212 100644
--- a/kun/package-lock.json
+++ b/kun/package-lock.json
@@ -7,6 +7,7 @@
"": {
"name": "kun",
"version": "0.1.0",
+ "license": "PolyForm-Noncommercial-1.0.0",
"dependencies": {
"@modelcontextprotocol/sdk": "^1.29.0",
"better-sqlite3": "^12.10.0",
diff --git a/kun/package.json b/kun/package.json
index bd62c551..24415dba 100644
--- a/kun/package.json
+++ b/kun/package.json
@@ -1,7 +1,8 @@
{
"name": "kun",
"version": "0.1.0",
- "description": "Kun local HTTP/SSE agent runtime for DeepSeek-GUI",
+ "description": "Kun local HTTP/SSE agent runtime",
+ "license": "PolyForm-Noncommercial-1.0.0",
"type": "module",
"private": true,
"main": "./dist/index.js",
@@ -45,6 +46,10 @@
"./services": {
"types": "./dist/services/index.d.ts",
"import": "./dist/services/index.js"
+ },
+ "./hooks": {
+ "types": "./dist/hooks/index.d.ts",
+ "import": "./dist/hooks/index.js"
}
},
"bin": {
diff --git a/kun/src/adapters/file/file-session-store.ts b/kun/src/adapters/file/file-session-store.ts
index 3420d0a3..81ea862e 100644
--- a/kun/src/adapters/file/file-session-store.ts
+++ b/kun/src/adapters/file/file-session-store.ts
@@ -11,6 +11,13 @@ const DEFAULT_USAGE_EVENT_COMPACTION_MAX_BYTES = 5 * 1024 * 1024
const DEFAULT_USAGE_EVENT_RETENTION_DAYS = 365
const MS_PER_DAY = 86_400_000
+/**
+ * The agent loop reloads the full item history on every model step, so
+ * keep the deduped array for recently touched threads in memory instead
+ * of re-reading and re-parsing messages.jsonl each time.
+ */
+const ITEMS_CACHE_MAX_THREADS = 4
+
/**
* File-backed session store. Appends events and items to per-thread
* JSONL files and keeps the canonical session snapshot in a small
@@ -23,6 +30,8 @@ export class FileSessionStore implements SessionStore {
retentionDays: number
nowIso: () => string
}
+ private readonly itemsCache = new Map()
+ private readonly itemsCacheVersion = new Map()
constructor(options: {
dataDir: string
@@ -61,12 +70,16 @@ export class FileSessionStore implements SessionStore {
await this.ensureDir(this.threadDir(threadId))
const path = this.messagesPath(threadId)
await appendFile(path, `${JSON.stringify(item)}\n`, 'utf-8')
+ this.bumpItemsVersion(threadId)
+ this.applyItemToCache(threadId, item)
}
async rewriteItems(threadId: string, items: TurnItem[]): Promise {
await this.ensureDir(this.threadDir(threadId))
const contents = items.map((item) => JSON.stringify(item)).join('\n')
await this.atomicWrite(this.messagesPath(threadId), contents ? `${contents}\n` : '')
+ this.bumpItemsVersion(threadId)
+ this.cacheItems(threadId, [...items])
}
async updateItem(threadId: string, itemId: string, patch: Partial): Promise {
@@ -76,6 +89,8 @@ export class FileSessionStore implements SessionStore {
const updated = { ...current, ...patch } as TurnItem
await this.ensureDir(this.threadDir(threadId))
await appendFile(this.messagesPath(threadId), `${JSON.stringify(updated)}\n`, 'utf-8')
+ this.bumpItemsVersion(threadId)
+ this.applyItemToCache(threadId, updated)
return updated
}
@@ -87,6 +102,12 @@ export class FileSessionStore implements SessionStore {
}
async loadItems(threadId: string): Promise {
+ const cached = this.itemsCache.get(threadId)
+ if (cached) {
+ this.cacheItems(threadId, cached)
+ return [...cached]
+ }
+ const version = this.itemsVersionOf(threadId)
const raw = await readJsonl(this.messagesPath(threadId))
const latestById = new Map()
for (const item of raw) {
@@ -100,6 +121,11 @@ export class FileSessionStore implements SessionStore {
seen.add(item.id)
ordered.unshift(latestById.get(item.id)!)
}
+ // A write that landed while we were reading invalidates this snapshot.
+ if (this.itemsVersionOf(threadId) === version) {
+ this.cacheItems(threadId, ordered)
+ return [...ordered]
+ }
return ordered
}
@@ -123,7 +149,34 @@ export class FileSessionStore implements SessionStore {
}
async resetMemory(): Promise {
- // File-backed store has no in-memory state to reset.
+ this.itemsCache.clear()
+ this.itemsCacheVersion.clear()
+ }
+
+ private itemsVersionOf(threadId: string): number {
+ return this.itemsCacheVersion.get(threadId) ?? 0
+ }
+
+ private bumpItemsVersion(threadId: string): void {
+ this.itemsCacheVersion.set(threadId, this.itemsVersionOf(threadId) + 1)
+ }
+
+ private cacheItems(threadId: string, items: TurnItem[]): void {
+ this.itemsCache.delete(threadId)
+ this.itemsCache.set(threadId, items)
+ while (this.itemsCache.size > ITEMS_CACHE_MAX_THREADS) {
+ const oldest = this.itemsCache.keys().next().value
+ if (oldest === undefined) break
+ this.itemsCache.delete(oldest)
+ }
+ }
+
+ private applyItemToCache(threadId: string, item: TurnItem): void {
+ const cached = this.itemsCache.get(threadId)
+ if (!cached) return
+ const index = cached.findIndex((existing) => existing.id === item.id)
+ if (index >= 0) cached[index] = item
+ else cached.push(item)
}
private threadDir(threadId: string): string {
diff --git a/kun/src/adapters/hybrid/hybrid-session-store.ts b/kun/src/adapters/hybrid/hybrid-session-store.ts
index 5eb8f72a..8fd9b4dc 100644
--- a/kun/src/adapters/hybrid/hybrid-session-store.ts
+++ b/kun/src/adapters/hybrid/hybrid-session-store.ts
@@ -1,7 +1,11 @@
import type { RuntimeEvent } from '../../contracts/events.js'
import type { TurnItem } from '../../contracts/items.js'
import type { AgentSession } from '../../domain/session.js'
-import type { SessionStore } from '../../ports/session-store.js'
+import type {
+ SessionLatestUsageSnapshot,
+ SessionStore,
+ SessionUsageRecord
+} from '../../ports/session-store.js'
import { FileSessionStore } from '../file/file-session-store.js'
import type { HybridThreadStore } from './hybrid-thread-store.js'
@@ -28,7 +32,7 @@ export class HybridSessionStore implements SessionStore {
async appendEvent(threadId: string, event: RuntimeEvent): Promise {
await this.delegate.appendEvent(threadId, event)
- await this.index.noteEventSeq(threadId, event.seq)
+ await this.index.noteEvent(event)
}
async appendItem(threadId: string, item: TurnItem): Promise {
@@ -60,9 +64,19 @@ export class HybridSessionStore implements SessionStore {
}
async highestSeq(threadId: string): Promise {
+ const indexed = await this.index.getEventSeqHighWater(threadId)
+ if (indexed !== null) return indexed
return this.delegate.highestSeq(threadId)
}
+ async loadUsageRecords(options?: { threadId?: string }): Promise {
+ return this.index.loadUsageRecords(options)
+ }
+
+ async loadLatestUsageSnapshots(options?: { threadIds?: string[] }): Promise {
+ return this.index.loadLatestUsageSnapshots(options)
+ }
+
async resetMemory(): Promise {
await this.delegate.resetMemory()
}
diff --git a/kun/src/adapters/hybrid/hybrid-thread-store.ts b/kun/src/adapters/hybrid/hybrid-thread-store.ts
index df310998..613481b4 100644
--- a/kun/src/adapters/hybrid/hybrid-thread-store.ts
+++ b/kun/src/adapters/hybrid/hybrid-thread-store.ts
@@ -1,6 +1,6 @@
-import { mkdir, open, readFile, readdir, rm, stat } from 'node:fs/promises'
+import { mkdir, open, readFile, readdir, rename, rm, stat } from 'node:fs/promises'
import { dirname, join, resolve } from 'node:path'
-import type { Database as BetterSqliteDatabase } from 'better-sqlite3'
+import type { Database as BetterSqliteDatabase, Statement } from 'better-sqlite3'
import type {
ThreadGoal,
ThreadMode,
@@ -16,8 +16,14 @@ import type { TurnItem } from '../../contracts/items.js'
import type { Turn } from '../../contracts/turns.js'
import type { ApprovalPolicy, SandboxMode } from '../../contracts/policy.js'
import type { ThreadStore, ThreadStoreListOptions } from '../../ports/thread-store.js'
+import type { SessionLatestUsageSnapshot, SessionUsageRecord } from '../../ports/session-store.js'
import { toThreadSummary } from '../../domain/thread.js'
import { readJsonl } from '../file/file-thread-store.js'
+import {
+ emptyUsageSnapshot,
+ UsageSnapshotSchema,
+ type UsageSnapshot
+} from '../../contracts/usage.js'
type ThreadMetadataLine = {
kind: 'thread_metadata'
@@ -66,6 +72,17 @@ type ThreadIndexRecord = {
preview: string
}
+type UsageRuntimeEvent = Extract
+
+type UsageRow = {
+ thread_id: string
+ seq: number
+ timestamp: string
+ turn_id: string | null
+ model: string | null
+ usage_json: string
+}
+
/**
* Hybrid store inspired by Codex: JSONL files are canonical and SQLite
* is a rebuildable index. SQLite writes always happen after metadata
@@ -77,7 +94,20 @@ export class HybridThreadStore implements ThreadStore {
private readonly nowIso: () => string
private readonly readyPromise: Promise
private readonly metadataQueues = new Map>()
+ private backfillPromise: Promise | null = null
private db: BetterSqliteDatabase | null = null
+ // Prepared-statement cache for the per-event hot paths; better-sqlite3
+ // re-compiles the SQL on every prepare() call otherwise.
+ private readonly statementCache = new Map()
+ // Reconstructed thread records keyed by the file signatures they were built
+ // from. Thread detail requests re-read multi-megabyte JSONL files otherwise.
+ private readonly threadRecordCache = new Map<
+ string,
+ { metadataSig: string; itemsSig: string; record: ThreadRecord }
+ >()
+ // Per-thread floor that keeps metadata compaction from re-running on every
+ // append when a single snapshot is already larger than the threshold.
+ private readonly metadataCompactFloor = new Map()
constructor(options: { dataDir: string; sqlitePath?: string; nowIso?: () => string }) {
this.dataDir = resolve(options.dataDir, 'threads')
@@ -98,6 +128,11 @@ export class HybridThreadStore implements ThreadStore {
}
}
+ async waitForBackfill(): Promise {
+ await this.ready()
+ await this.backfillPromise
+ }
+
async list(options: ThreadStoreListOptions = {}): Promise {
await this.ready()
if (this.db) {
@@ -130,7 +165,7 @@ export class HybridThreadStore implements ThreadStore {
const thread = await this.readThreadFromDisk(threadId)
if (thread && this.db) {
- this.upsertIndexBestEffort(await this.indexRecordForThread(thread))
+ this.upsertIndexBestEffort(this.indexRecordForThread(thread))
}
return thread
}
@@ -139,7 +174,7 @@ export class HybridThreadStore implements ThreadStore {
await this.ready()
await this.appendMetadata(thread)
if (this.db) {
- this.upsertIndexBestEffort(await this.indexRecordForThread(thread))
+ this.upsertIndexBestEffort(this.indexRecordForThread(thread))
}
return thread
}
@@ -154,25 +189,117 @@ export class HybridThreadStore implements ThreadStore {
}
await rm(dir, { recursive: true, force: true })
this.deleteIndexRow(threadId)
+ this.threadRecordCache.delete(threadId)
+ this.metadataCompactFloor.delete(threadId)
return true
}
async noteEventSeq(threadId: string, seq: number): Promise {
+ await this.noteEventHighWater(threadId, seq)
+ }
+
+ async noteEvent(event: RuntimeEvent): Promise {
await this.ready()
if (!this.db) return
+ this.noteEventHighWaterSync(event.threadId, event.seq)
+ if (event.kind !== 'usage') return
try {
- this.db
+ this.cachedStatement(`
+ INSERT INTO usage_events (
+ thread_id, seq, timestamp, turn_id, model, usage_json
+ )
+ VALUES (
+ @thread_id, @seq, @timestamp, @turn_id, @model, @usage_json
+ )
+ ON CONFLICT(thread_id, seq) DO UPDATE SET
+ timestamp = excluded.timestamp,
+ turn_id = excluded.turn_id,
+ model = excluded.model,
+ usage_json = excluded.usage_json
+ `).run(usageRowFromEvent(event))
+ } catch (error) {
+ warnSqlite('record usage event', error)
+ }
+ }
+
+ async getEventSeqHighWater(threadId: string): Promise {
+ await this.ready()
+ if (!this.db) return null
+ try {
+ const row = this.db
+ .prepare('SELECT event_seq_high_water FROM threads WHERE id = ?')
+ .get(threadId) as { event_seq_high_water?: number } | undefined
+ return typeof row?.event_seq_high_water === 'number' ? row.event_seq_high_water : null
+ } catch (error) {
+ warnSqlite('read event high water', error)
+ return null
+ }
+ }
+
+ async loadUsageRecords(options: { threadId?: string } = {}): Promise {
+ await this.ready()
+ if (!this.db) throw new Error('hybrid sqlite unavailable')
+ try {
+ const threadId = options.threadId?.trim()
+ const rows = threadId
+ ? this.db
+ .prepare(`
+ SELECT * FROM usage_events
+ WHERE thread_id = @thread_id
+ ORDER BY thread_id ASC, seq ASC
+ `)
+ .all({ thread_id: threadId }) as UsageRow[]
+ : this.db
+ .prepare('SELECT * FROM usage_events ORDER BY thread_id ASC, seq ASC')
+ .all() as UsageRow[]
+ return usageRecordsFromRows(rows)
+ } catch (error) {
+ warnSqlite('load usage records', error)
+ throw error
+ }
+ }
+
+ async loadLatestUsageSnapshots(options: { threadIds?: string[] } = {}): Promise {
+ await this.ready()
+ if (!this.db) throw new Error('hybrid sqlite unavailable')
+ try {
+ const threadIds = [...new Set((options.threadIds ?? []).map((id) => id.trim()).filter(Boolean))]
+ if (threadIds.length > 0) {
+ const placeholders = threadIds.map((_id, index) => `@id${index}`).join(', ')
+ const params = Object.fromEntries(threadIds.map((id, index) => [`id${index}`, id]))
+ const rows = this.db
+ .prepare(`
+ SELECT u.*
+ FROM usage_events u
+ JOIN (
+ SELECT thread_id, MAX(seq) AS seq
+ FROM usage_events
+ WHERE thread_id IN (${placeholders})
+ GROUP BY thread_id
+ ) latest
+ ON latest.thread_id = u.thread_id AND latest.seq = u.seq
+ ORDER BY u.thread_id ASC
+ `)
+ .all(params) as UsageRow[]
+ return latestUsageSnapshotsFromRows(rows)
+ }
+ const rows = this.db
.prepare(`
- UPDATE threads
- SET event_seq_high_water = CASE
- WHEN event_seq_high_water > @seq THEN event_seq_high_water
- ELSE @seq
- END
- WHERE id = @id
+ SELECT u.*
+ FROM usage_events u
+ JOIN (
+ SELECT thread_id, MAX(seq) AS seq
+ FROM usage_events
+ GROUP BY thread_id
+ ) latest
+ ON latest.thread_id = u.thread_id AND latest.seq = u.seq
+ ORDER BY u.thread_id ASC
`)
- .run({ id: threadId, seq })
+ .all() as UsageRow[]
+ return latestUsageSnapshotsFromRows(rows)
} catch (error) {
- warnSqlite('note event seq', error)
+ warnSqlite('load latest usage snapshots', error)
+ throw error
}
}
@@ -184,9 +311,10 @@ export class HybridThreadStore implements ThreadStore {
const Database = sqlite.default
this.db = new Database(this.sqlitePath)
this.db.pragma('journal_mode = WAL')
+ this.db.pragma('busy_timeout = 5000')
this.db.pragma('foreign_keys = ON')
this.migrate()
- await this.backfill()
+ this.startBackfill()
} catch (error) {
warnSqlite('initialize', error)
try {
@@ -241,24 +369,73 @@ export class HybridThreadStore implements ThreadStore {
ON threads(status, updated_at_ms DESC, id DESC);
CREATE INDEX IF NOT EXISTS threads_relation_updated_idx
ON threads(relation, updated_at_ms DESC, id DESC);
+ CREATE TABLE IF NOT EXISTS usage_events (
+ thread_id TEXT NOT NULL,
+ seq INTEGER NOT NULL,
+ timestamp TEXT NOT NULL,
+ turn_id TEXT,
+ model TEXT,
+ usage_json TEXT NOT NULL,
+ PRIMARY KEY(thread_id, seq)
+ );
+ CREATE INDEX IF NOT EXISTS usage_events_thread_seq_idx
+ ON usage_events(thread_id, seq);
+ CREATE INDEX IF NOT EXISTS usage_events_timestamp_idx
+ ON usage_events(timestamp);
`)
addColumnIfMissing(this.db, 'threads', 'todos_json TEXT')
+ addColumnIfMissing(this.db, 'threads', 'usage_backfilled INTEGER NOT NULL DEFAULT 0')
+ }
+
+ private cachedStatement(sql: string): Statement {
+ if (!this.db) throw new Error('sqlite unavailable')
+ let statement = this.statementCache.get(sql)
+ if (!statement) {
+ statement = this.db.prepare(sql)
+ this.statementCache.set(sql, statement)
+ }
+ return statement
+ }
+
+ private startBackfill(): void {
+ if (this.backfillPromise) return
+ this.backfillPromise = this.backfill().catch((error) => {
+ warnSqlite('background backfill', error)
+ })
}
private async backfill(): Promise {
if (!this.db) return
- const discovered = new Set()
+ const rows = this.db
+ .prepare('SELECT id, usage_backfilled FROM threads')
+ .all() as Array<{ id: string; usage_backfilled?: number }>
+ const indexed = new Map(rows.map((row) => [row.id, row.usage_backfilled === 1]))
for (const threadId of await this.threadIdsFromFilesystem()) {
- const thread = await this.readThreadFromDisk(threadId)
- if (!thread) continue
- discovered.add(thread.id)
- this.upsertIndexBestEffort(await this.indexRecordForThread(thread))
+ const usageBackfilled = indexed.get(threadId)
+ // Threads marked as backfilled never need their events.jsonl re-read;
+ // without the marker every startup re-scanned the full event history
+ // of threads that simply have no usage events.
+ if (usageBackfilled === true) continue
+ if (usageBackfilled === undefined) {
+ const thread = await this.readThreadFromDisk(threadId)
+ if (!thread) continue
+ const scan = await this.scanEventsForBackfill(threadId)
+ this.upsertIndexBestEffort({
+ ...this.indexRecordForThread(thread),
+ eventSeqHighWater: scan.highWater
+ })
+ await this.insertUsageEventsChunked(threadId, scan.usage)
+ } else {
+ const scan = await this.scanEventsForBackfill(threadId)
+ this.noteEventHighWaterSync(threadId, scan.highWater)
+ await this.insertUsageEventsChunked(threadId, scan.usage)
+ }
+ this.markUsageBackfilled(threadId)
+ await yieldToEventLoop()
}
try {
- const rows = this.db.prepare('SELECT id FROM threads').all() as Array<{ id: string }>
for (const row of rows) {
- if (discovered.has(row.id)) continue
if (!(await pathExists(this.threadDir(row.id)))) {
this.deleteIndexRow(row.id)
}
@@ -268,6 +445,64 @@ export class HybridThreadStore implements ThreadStore {
}
}
+ /** Single pass over events.jsonl: high-water mark plus usage events. */
+ private async scanEventsForBackfill(
+ threadId: string
+ ): Promise<{ highWater: number; usage: UsageRuntimeEvent[] }> {
+ let highWater = 0
+ const usage: UsageRuntimeEvent[] = []
+ try {
+ for (const event of await readJsonl(this.eventsPath(threadId))) {
+ if (event.seq > highWater) highWater = event.seq
+ if (event.kind === 'usage') usage.push(event)
+ }
+ } catch (error) {
+ warnSqlite(`scan events for ${threadId}`, error)
+ }
+ return { highWater, usage }
+ }
+
+ /**
+ * Inserts usage rows in small transactions, yielding between chunks.
+ * better-sqlite3 is synchronous: unchunked backfill of a large history
+ * starved the event loop long enough that the HTTP server never reported
+ * ready within the GUI's startup timeout.
+ */
+ private async insertUsageEventsChunked(threadId: string, events: UsageRuntimeEvent[]): Promise {
+ if (!this.db || events.length === 0) return
+ const insert = this.cachedStatement(`
+ INSERT OR REPLACE INTO usage_events (
+ thread_id, seq, timestamp, turn_id, model, usage_json
+ )
+ VALUES (
+ @thread_id, @seq, @timestamp, @turn_id, @model, @usage_json
+ )
+ `)
+ const insertChunk = this.db.transaction((chunk: UsageRow[]) => {
+ for (const row of chunk) insert.run(row)
+ })
+ const chunkSize = 200
+ for (let start = 0; start < events.length; start += chunkSize) {
+ const chunk = events.slice(start, start + chunkSize).map(usageRowFromEvent)
+ try {
+ insertChunk(chunk)
+ } catch (error) {
+ warnSqlite(`backfill usage events for ${threadId}`, error)
+ return
+ }
+ await yieldToEventLoop()
+ }
+ }
+
+ private markUsageBackfilled(threadId: string): void {
+ if (!this.db) return
+ try {
+ this.db.prepare('UPDATE threads SET usage_backfilled = 1 WHERE id = ?').run(threadId)
+ } catch (error) {
+ warnSqlite('mark usage backfilled', error)
+ }
+ }
+
private queryThreadRows(options: ThreadStoreListOptions): ThreadRow[] {
if (!this.db) return []
const where: string[] = []
@@ -380,6 +615,7 @@ export class HybridThreadStore implements ThreadStore {
if (!this.db) return
try {
this.db.prepare('DELETE FROM threads WHERE id = ?').run(threadId)
+ this.db.prepare('DELETE FROM usage_events WHERE thread_id = ?').run(threadId)
} catch (error) {
warnSqlite('delete index row', error)
}
@@ -396,6 +632,7 @@ export class HybridThreadStore implements ThreadStore {
thread: stripThreadItemBodies(thread)
}
await appendJsonlLine(this.metadataPath(thread.id), line)
+ await this.maybeCompactMetadata(thread.id)
})
const guard = run.then(() => undefined, () => undefined)
this.metadataQueues.set(thread.id, guard)
@@ -408,27 +645,91 @@ export class HybridThreadStore implements ThreadStore {
}
}
- private async indexRecordForThread(thread: ThreadRecord): Promise {
- const items = await this.loadItems(thread.id)
- const itemSource = items.length > 0 ? items : thread.turns.flatMap((turn) => turn.items)
- const eventSeqHighWater = await this.highestSeq(thread.id)
+ /**
+ * Every upsert appends a full thread snapshot, so metadata.jsonl grows
+ * quadratically with turn activity (observed: 4.2MB for an 8-turn thread
+ * whose latest snapshot is 6KB). Once the file passes the threshold it is
+ * rewritten as a single normalized snapshot. Runs inside the per-thread
+ * metadata queue, so no append can interleave with the rewrite.
+ */
+ private async maybeCompactMetadata(threadId: string): Promise {
+ const path = this.metadataPath(threadId)
+ const tmpPath = `${path}.compact.tmp`
+ try {
+ const stats = await stat(path)
+ const floor = this.metadataCompactFloor.get(threadId) ?? METADATA_COMPACT_MIN_BYTES
+ if (stats.size < floor) return
+ const record = await this.readLatestMetadata(threadId)
+ if (!record) return
+ const line: ThreadMetadataLine = {
+ kind: 'thread_metadata',
+ version: 1,
+ timestamp: this.nowIso(),
+ thread: stripThreadItemBodies(record)
+ }
+ const handle = await open(tmpPath, 'w')
+ try {
+ await handle.writeFile(`${JSON.stringify(line)}\n`, 'utf-8')
+ await handle.sync()
+ } finally {
+ await handle.close()
+ }
+ await rename(tmpPath, path)
+ const compacted = await stat(path)
+ this.metadataCompactFloor.set(
+ threadId,
+ Math.max(METADATA_COMPACT_MIN_BYTES, compacted.size * 4)
+ )
+ } catch (error) {
+ // On Windows the atomic rename can fail with EPERM while another
+ // handle has the file open; the next append over the threshold simply
+ // retries. Drop the temp file so failures do not accumulate litter.
+ await rm(tmpPath, { force: true }).catch(() => undefined)
+ console.warn(
+ `[kun] metadata compaction skipped for ${threadId}: ${error instanceof Error ? error.message : String(error)}`
+ )
+ }
+ }
+
+ private indexRecordForThread(thread: ThreadRecord): ThreadIndexRecord {
+ const itemSource = thread.turns.flatMap((turn) => turn.items)
return {
thread,
messageCount: itemSource.length,
- eventSeqHighWater,
+ eventSeqHighWater: 0,
preview: previewFromItems(itemSource)
}
}
private async readThreadFromDisk(threadId: string): Promise {
+ const [metadataSig, itemsSig] = await Promise.all([
+ fileSignature(this.metadataPath(threadId)),
+ fileSignature(this.messagesPath(threadId))
+ ])
+ const cached = this.threadRecordCache.get(threadId)
+ if (cached && cached.metadataSig === metadataSig && cached.itemsSig === itemsSig) {
+ // Refresh LRU position.
+ this.threadRecordCache.delete(threadId)
+ this.threadRecordCache.set(threadId, cached)
+ return cached.record
+ }
const metadata = await this.readLatestMetadata(threadId)
const legacy = metadata ? null : await this.readLegacyThread(threadId)
const source = metadata ?? legacy
if (!source) return null
const items = await this.loadItems(threadId)
- return hydrateThreadItems(source, items, {
+ // Records are treated as immutable by all callers (updates flow through
+ // upsert with fresh objects), so caching the reference is safe.
+ const record = hydrateThreadItems(source, items, {
preserveExistingItemsWhenNoFileItems: Boolean(legacy)
})
+ this.threadRecordCache.set(threadId, { metadataSig, itemsSig, record })
+ while (this.threadRecordCache.size > THREAD_RECORD_CACHE_LIMIT) {
+ const oldest = this.threadRecordCache.keys().next().value
+ if (!oldest) break
+ this.threadRecordCache.delete(oldest)
+ }
+ return record
}
private async readLatestMetadata(threadId: string): Promise {
@@ -471,9 +772,25 @@ export class HybridThreadStore implements ThreadStore {
return ordered
}
- private async highestSeq(threadId: string): Promise {
- const events = await readJsonl(this.eventsPath(threadId))
- return events.reduce((max, event) => Math.max(max, event.seq), 0)
+ private async noteEventHighWater(threadId: string, seq: number): Promise {
+ await this.ready()
+ this.noteEventHighWaterSync(threadId, seq)
+ }
+
+ private noteEventHighWaterSync(threadId: string, seq: number): void {
+ if (!this.db) return
+ try {
+ this.cachedStatement(`
+ UPDATE threads
+ SET event_seq_high_water = CASE
+ WHEN event_seq_high_water > @seq THEN event_seq_high_water
+ ELSE @seq
+ END
+ WHERE id = @id
+ `).run({ id: threadId, seq })
+ } catch (error) {
+ warnSqlite('note event seq', error)
+ }
}
private async listFromFilesystem(): Promise {
@@ -739,6 +1056,8 @@ function summaryFromRow(row: ThreadRow): ThreadSummary {
model: row.model,
mode: row.mode,
status: row.status,
+ approvalPolicy: row.approval_policy,
+ sandboxMode: row.sandbox_mode,
...(row.cost_budget_usd !== null ? { costBudgetUsd: row.cost_budget_usd } : {}),
...(row.cost_budget_warning_sent !== null ? { costBudgetWarningSent: Boolean(row.cost_budget_warning_sent) } : {}),
relation: row.relation,
@@ -832,6 +1151,143 @@ function previewFromItems(items: TurnItem[]): string {
return ''
}
+function usageRowFromEvent(event: RuntimeEvent & { kind: 'usage' }): UsageRow {
+ return {
+ thread_id: event.threadId,
+ seq: event.seq,
+ timestamp: event.timestamp,
+ turn_id: event.turnId ?? null,
+ model: event.model ?? null,
+ usage_json: JSON.stringify(event.usage)
+ }
+}
+
+function usageRecordsFromRows(rows: UsageRow[]): SessionUsageRecord[] {
+ const previousByThread = new Map()
+ const records: SessionUsageRecord[] = []
+ for (const row of rows) {
+ const usage = parseUsageSnapshot(row.usage_json)
+ if (!usage) continue
+ const previous = previousByThread.get(row.thread_id) ?? emptyUsageSnapshot()
+ const delta = diffUsage(usage, previous)
+ previousByThread.set(row.thread_id, usage)
+ if (!hasUsage(delta)) continue
+ records.push({
+ threadId: row.thread_id,
+ ...(row.turn_id ? { turnId: row.turn_id } : {}),
+ ...(row.model ? { model: row.model } : {}),
+ completedAt: row.timestamp,
+ usage: delta
+ })
+ }
+ return records
+}
+
+function latestUsageSnapshotsFromRows(rows: UsageRow[]): SessionLatestUsageSnapshot[] {
+ return rows.flatMap((row) => {
+ const usage = parseUsageSnapshot(row.usage_json)
+ if (!usage) return []
+ return [{
+ threadId: row.thread_id,
+ seq: row.seq,
+ usage
+ }]
+ })
+}
+
+function parseUsageSnapshot(raw: string): UsageSnapshot | null {
+ try {
+ const parsed = UsageSnapshotSchema.safeParse(JSON.parse(raw))
+ return parsed.success ? parsed.data : null
+ } catch {
+ return null
+ }
+}
+
+function diffUsage(current: UsageSnapshot, previous: UsageSnapshot): UsageSnapshot {
+ const promptTokens = diffNumber(current.promptTokens, previous.promptTokens)
+ const completionTokens = diffNumber(current.completionTokens, previous.completionTokens)
+ const reportedTotal = diffNumber(current.totalTokens, previous.totalTokens)
+ const totalTokens = reportedTotal || promptTokens + completionTokens
+ const cachedTokens = diffOptionalNumber(current.cachedTokens, previous.cachedTokens)
+ const cacheHitTokens = diffOptionalNumber(current.cacheHitTokens, previous.cacheHitTokens)
+ const cacheMissTokens = diffOptionalNumber(current.cacheMissTokens, previous.cacheMissTokens)
+ const cacheTotal = (cacheHitTokens ?? 0) + (cacheMissTokens ?? 0)
+ return {
+ promptTokens,
+ completionTokens,
+ totalTokens,
+ ...(cachedTokens !== undefined ? { cachedTokens } : {}),
+ ...(cacheHitTokens !== undefined ? { cacheHitTokens } : {}),
+ ...(cacheMissTokens !== undefined ? { cacheMissTokens } : {}),
+ cacheHitRate: cacheHitTokens !== undefined && cacheTotal > 0 ? cacheHitTokens / cacheTotal : null,
+ turns: diffNumber(current.turns, previous.turns),
+ ...(current.costUsd !== undefined || previous.costUsd !== undefined
+ ? { costUsd: diffNumber(current.costUsd ?? 0, previous.costUsd ?? 0) }
+ : {}),
+ ...(current.costCny !== undefined || previous.costCny !== undefined
+ ? { costCny: diffNumber(current.costCny ?? 0, previous.costCny ?? 0) }
+ : {}),
+ ...(current.cacheSavingsUsd !== undefined || previous.cacheSavingsUsd !== undefined
+ ? { cacheSavingsUsd: diffNumber(current.cacheSavingsUsd ?? 0, previous.cacheSavingsUsd ?? 0) }
+ : {}),
+ ...(current.cacheSavingsCny !== undefined || previous.cacheSavingsCny !== undefined
+ ? { cacheSavingsCny: diffNumber(current.cacheSavingsCny ?? 0, previous.cacheSavingsCny ?? 0) }
+ : {}),
+ ...(current.tokenEconomySavingsTokens !== undefined || previous.tokenEconomySavingsTokens !== undefined
+ ? {
+ tokenEconomySavingsTokens: diffNumber(
+ current.tokenEconomySavingsTokens ?? 0,
+ previous.tokenEconomySavingsTokens ?? 0
+ )
+ }
+ : {}),
+ ...(current.tokenEconomySavingsUsd !== undefined || previous.tokenEconomySavingsUsd !== undefined
+ ? {
+ tokenEconomySavingsUsd: diffNumber(
+ current.tokenEconomySavingsUsd ?? 0,
+ previous.tokenEconomySavingsUsd ?? 0
+ )
+ }
+ : {}),
+ ...(current.tokenEconomySavingsCny !== undefined || previous.tokenEconomySavingsCny !== undefined
+ ? {
+ tokenEconomySavingsCny: diffNumber(
+ current.tokenEconomySavingsCny ?? 0,
+ previous.tokenEconomySavingsCny ?? 0
+ )
+ }
+ : {}),
+ ...(current.hasError ? { hasError: true } : {})
+ }
+}
+
+function diffNumber(current: number, previous: number): number {
+ return Math.max(0, current - previous)
+}
+
+function diffOptionalNumber(current?: number, previous?: number): number | undefined {
+ if (current === undefined && previous === undefined) return undefined
+ return Math.max(0, (current ?? 0) - (previous ?? 0))
+}
+
+function hasUsage(usage: UsageSnapshot): boolean {
+ return usage.promptTokens > 0
+ || usage.completionTokens > 0
+ || usage.totalTokens > 0
+ || (usage.cachedTokens ?? 0) > 0
+ || (usage.cacheHitTokens ?? 0) > 0
+ || (usage.cacheMissTokens ?? 0) > 0
+ || usage.turns > 0
+ || (usage.costUsd ?? 0) > 0
+ || (usage.costCny ?? 0) > 0
+ || (usage.cacheSavingsUsd ?? 0) > 0
+ || (usage.cacheSavingsCny ?? 0) > 0
+ || (usage.tokenEconomySavingsTokens ?? 0) > 0
+ || (usage.tokenEconomySavingsUsd ?? 0) > 0
+ || (usage.tokenEconomySavingsCny ?? 0) > 0
+}
+
function isoToMillis(value: string): number {
const millis = Date.parse(value)
return Number.isFinite(millis) ? millis : 0
@@ -853,6 +1309,18 @@ function addColumnIfMissing(db: BetterSqliteDatabase, table: string, columnSql:
}
}
+const THREAD_RECORD_CACHE_LIMIT = 8
+const METADATA_COMPACT_MIN_BYTES = 1_000_000
+
+async function fileSignature(path: string): Promise {
+ try {
+ const stats = await stat(path)
+ return `${stats.size}:${stats.mtimeMs}`
+ } catch {
+ return 'missing'
+ }
+}
+
async function appendJsonlLine(path: string, value: unknown): Promise {
await mkdir(dirname(path), { recursive: true })
const handle = await open(path, 'a')
@@ -873,6 +1341,10 @@ async function pathExists(path: string): Promise {
}
}
+async function yieldToEventLoop(): Promise {
+ await new Promise((resolve) => setTimeout(resolve, 0))
+}
+
function warnSqlite(action: string, error: unknown): void {
const message = error instanceof Error ? error.message : String(error)
console.warn(`[kun] hybrid sqlite ${action} failed; using JSONL fallback: ${message}`)
diff --git a/kun/src/adapters/in-memory-event-bus.ts b/kun/src/adapters/in-memory-event-bus.ts
index b20e938c..32b098c1 100644
--- a/kun/src/adapters/in-memory-event-bus.ts
+++ b/kun/src/adapters/in-memory-event-bus.ts
@@ -1,20 +1,35 @@
import type { EventBus } from '../ports/event-bus.js'
import type { RuntimeEvent } from '../contracts/events.js'
+/**
+ * Retained events per thread for `snapshotSince`. SSE replay reads the
+ * persisted session store, not the bus, so the bus only needs a recent
+ * tail — retaining every event leaked the full delta stream of every
+ * long-running thread into memory.
+ */
+const MAX_RETAINED_EVENTS_PER_THREAD = 256
+
/**
* In-memory implementation of the event bus used by tests and the
* default runtime. Subscribers receive only events for their thread.
- * The bus is a single source of truth for the SSE replay path.
+ * Live fan-out is the bus's job; durable replay belongs to the
+ * session store.
*/
export class InMemoryEventBus implements EventBus {
private readonly events = new Map()
private readonly subscribers = new Map void>>()
private nextSeq = new Map()
+ private highestSeqByThread = new Map()
publish(event: RuntimeEvent): void {
const list = this.events.get(event.threadId) ?? []
list.push(event)
+ if (list.length > MAX_RETAINED_EVENTS_PER_THREAD) {
+ list.splice(0, list.length - MAX_RETAINED_EVENTS_PER_THREAD)
+ }
this.events.set(event.threadId, list)
+ const highest = this.highestSeqByThread.get(event.threadId) ?? 0
+ if (event.seq > highest) this.highestSeqByThread.set(event.threadId, event.seq)
const subscribers = this.subscribers.get(event.threadId)
if (!subscribers) return
for (const handler of subscribers) {
@@ -32,6 +47,9 @@ export class InMemoryEventBus implements EventBus {
this.subscribers.set(threadId, set)
return () => {
set.delete(handler)
+ if (set.size === 0 && this.subscribers.get(threadId) === set) {
+ this.subscribers.delete(threadId)
+ }
}
}
@@ -41,8 +59,7 @@ export class InMemoryEventBus implements EventBus {
}
highestSeq(threadId: string): number {
- const list = this.events.get(threadId) ?? []
- return list.reduce((max, event) => Math.max(max, event.seq), 0)
+ return this.highestSeqByThread.get(threadId) ?? 0
}
/** Returns the next per-thread `seq` value, allocating one if needed. */
@@ -56,5 +73,6 @@ export class InMemoryEventBus implements EventBus {
this.events.clear()
this.subscribers.clear()
this.nextSeq.clear()
+ this.highestSeqByThread.clear()
}
}
diff --git a/kun/src/adapters/model/deepseek-compat-model-client.ts b/kun/src/adapters/model/deepseek-compat-model-client.ts
index 68606b6f..59a1e75e 100644
--- a/kun/src/adapters/model/deepseek-compat-model-client.ts
+++ b/kun/src/adapters/model/deepseek-compat-model-client.ts
@@ -1,7 +1,9 @@
import type { ModelClient, ModelRequest, ModelStreamChunk, ModelToolSpec } from '../../ports/model-client.js'
import type { TurnItem } from '../../contracts/items.js'
import { emptyUsageSnapshot, type UsageSnapshot } from '../../contracts/usage.js'
-import { estimateDeepseekCacheSavings, estimateDeepseekCost } from './deepseek-pricing.js'
+import type { ModelCapabilityMetadata } from '../../contracts/capabilities.js'
+import { estimateDeepseekCost } from './deepseek-pricing.js'
+import { estimateMiniMaxCost } from './minimax-pricing.js'
import { isToolResultBridgeItem, repairModelHistoryItems } from '../../domain/model-history-repair.js'
import { repairToolArguments } from './tool-argument-repair.js'
import { isDeepSeekHost, probeDeepSeekReachable } from './model-error-probe.js'
@@ -33,6 +35,8 @@ export type DeepseekCompatConfig = {
nonStreaming?: boolean
/** Maximum idle time between streaming chunks before the turn fails. */
streamIdleTimeoutMs?: number
+ /** Optional model capability resolver used for provider-specific reasoning translation. */
+ modelCapabilities?: (model: string) => ModelCapabilityMetadata
}
type ChatMessage = {
@@ -52,11 +56,15 @@ type ChatMessageContentPart =
| { type: 'text'; text: string }
| { type: 'image_url'; image_url: { url: string } }
-type AnthropicContentBlock =
+type AnthropicCacheControl = { type: 'ephemeral' }
+
+type AnthropicContentBlock = (
| { type: 'text'; text: string }
| { type: 'image'; source: { type: 'base64'; media_type: string; data: string } | { type: 'url'; url: string } }
+ | { type: 'thinking'; thinking: string }
| { type: 'tool_use'; id: string; name: string; input: Record }
| { type: 'tool_result'; tool_use_id: string; content: string }
+) & { cache_control?: AnthropicCacheControl }
type AnthropicImageSource = Extract['source']
@@ -162,6 +170,7 @@ export class DeepseekCompatModelClient implements ModelClient {
const endpointFormat = this.endpointFormat()
const url = buildModelEndpointUrl(this.config.baseUrl, endpointFormat)
const stream = request.stream ?? !this.config.nonStreaming
+ const requestModel = request.model?.trim() || this.config.model
const body = this.buildRequestBody(request, stream)
const headers = this.buildHeaders(stream, endpointFormat)
const result = await this.postChatCompletion(url, headers, body, request.abortSignal)
@@ -183,14 +192,14 @@ export class DeepseekCompatModelClient implements ModelClient {
if (response.ok) {
if (this.config.nonStreaming || response.headers.get('content-type')?.includes('application/json')) {
const json = (await response.json()) as ChatCompletionResponse
- yield* this.materializeNonStreaming(json, endpointFormat)
+ yield* this.materializeNonStreaming(json, endpointFormat, requestModel)
return
}
if (!response.body) {
yield { kind: 'error', message: 'model response had no body' }
return
}
- yield* this.streamSse(response.body, request.abortSignal, endpointFormat)
+ yield* this.streamSse(response.body, request.abortSignal, endpointFormat, requestModel)
return
}
const retryText = await response.text()
@@ -212,20 +221,24 @@ export class DeepseekCompatModelClient implements ModelClient {
}
if (this.config.nonStreaming || response.headers.get('content-type')?.includes('application/json')) {
const json = (await response.json()) as ChatCompletionResponse
- yield* this.materializeNonStreaming(json, endpointFormat)
+ yield* this.materializeNonStreaming(json, endpointFormat, requestModel)
return
}
if (!response.body) {
yield { kind: 'error', message: 'model response had no body' }
return
}
- yield* this.streamSse(response.body, request.abortSignal, endpointFormat)
+ yield* this.streamSse(response.body, request.abortSignal, endpointFormat, requestModel)
}
private endpointFormat(): ModelEndpointFormat {
return normalizeModelEndpointFormat(this.config.endpointFormat ?? DEFAULT_MODEL_ENDPOINT_FORMAT)
}
+ private modelReasoningFor(model: string): ModelCapabilityMetadata['reasoning'] | undefined {
+ return this.config.modelCapabilities?.(model).reasoning
+ }
+
private async postChatCompletion(
url: string,
headers: Record,
@@ -248,9 +261,12 @@ export class DeepseekCompatModelClient implements ModelClient {
private buildHeaders(stream: boolean, endpointFormat: ModelEndpointFormat): Record {
const headers: Record = {
- 'Content-Type': 'application/json',
- Accept: stream ? 'text/event-stream' : 'application/json'
+ 'Content-Type': 'application/json'
}
+ // `stream: true` is enough for OpenAI-compatible providers to return SSE.
+ // Some Windows Node/Electron paths time out when routing requests with
+ // `Accept: text/event-stream`, while the same stream works without it.
+ if (!stream) headers.Accept = 'application/json'
if (this.config.apiKey) {
if (endpointFormat === 'messages') {
headers.Authorization = `Bearer ${this.config.apiKey}`
@@ -264,7 +280,7 @@ export class DeepseekCompatModelClient implements ModelClient {
}
private async classifyHttpError(status: number, text: string): Promise<{ message: string; code: string }> {
- const body = text.slice(0, 500)
+ const body = text
if (status === 429) {
return {
message: `model request was rate limited (HTTP 429): ${body}`,
@@ -323,7 +339,11 @@ export class DeepseekCompatModelClient implements ModelClient {
body.stream_options = { include_usage: true }
}
const includeThinking = !isAzureOpenAiEndpoint(this.config.baseUrl)
- applyReasoningEffort(body, request.reasoningEffort, { includeThinking })
+ applyReasoningEffort(body, request.reasoningEffort, {
+ includeThinking,
+ reasoning: this.modelReasoningFor(model),
+ maxReasoningEffort: isDeepSeekHost(this.config.baseUrl) ? 'max' : 'high'
+ })
if (
includeThinking &&
isDeepSeekHost(this.config.baseUrl) &&
@@ -369,7 +389,10 @@ export class DeepseekCompatModelClient implements ModelClient {
if (request.responseFormat === 'json_object') {
body.text = { format: { type: 'json_object' } }
}
- const reasoning = responsesReasoningForEffort(request.reasoningEffort)
+ const reasoning = responsesReasoningForEffort(
+ request.reasoningEffort,
+ this.modelReasoningFor(model)
+ )
if (reasoning) body.reasoning = reasoning
const tools = normalizeToolSpecs(request.tools)
if (tools.length > 0) {
@@ -389,25 +412,34 @@ export class DeepseekCompatModelClient implements ModelClient {
messages: ChatMessage[],
stream: boolean
): Record {
- const converted = messagesToAnthropic(messages)
+ const converted = messagesToAnthropic(
+ messages,
+ this.modelReasoningFor(model)?.requestProtocol === 'anthropic-thinking'
+ )
+ applyAnthropicCacheControl(converted.messages)
const body: Record = {
model,
stream,
max_tokens: request.maxTokens ?? DEFAULT_MESSAGES_MAX_TOKENS,
messages: converted.messages
}
- if (converted.system) body.system = converted.system
+ const systemText = request.responseFormat === 'json_object'
+ ? [converted.system, 'Return a valid JSON object only.']
+ .filter((item) => item.trim().length > 0)
+ .join('\n\n')
+ : converted.system
+ if (systemText) {
+ body.system = [
+ { type: 'text', text: systemText, cache_control: { type: 'ephemeral' } }
+ ] satisfies AnthropicContentBlock[]
+ }
if (request.temperature !== undefined) {
body.temperature = request.temperature
}
if (request.topP !== undefined) {
body.top_p = request.topP
}
- if (request.responseFormat === 'json_object') {
- body.system = [converted.system, 'Return a valid JSON object only.']
- .filter((item): item is string => typeof item === 'string' && item.trim().length > 0)
- .join('\n\n')
- }
+ applyAnthropicReasoningEffort(body, request.reasoningEffort, this.modelReasoningFor(model))
const tools = normalizeToolSpecs(request.tools)
if (tools.length > 0) {
body.tools = tools.map((tool) => ({
@@ -427,18 +459,28 @@ export class DeepseekCompatModelClient implements ModelClient {
if (request.modeInstruction) {
out.push({ role: 'system', content: request.modeInstruction })
}
- for (const instruction of request.contextInstructions ?? []) {
- if (instruction.trim()) out.push({ role: 'system', content: instruction })
- }
const windowSize = this.config.historyLimit
const history = windowSize
? limitHistoryPreservingCompaction(request.history, windowSize)
: request.history
- const thinkingMode = requiresReasoningRoundTrip(request.reasoningEffort, model, this.config.baseUrl)
+ const thinkingMode = requiresReasoningRoundTrip(
+ request.reasoningEffort,
+ model,
+ this.config.baseUrl,
+ this.modelReasoningFor(model)
+ )
out.push(...this.itemsToMessages(
repairModelHistoryItems([...request.prefix, ...history]),
thinkingMode
))
+ // Per-turn context (goal budgets, todo state, memories, skill notes,
+ // drift warnings) is volatile — the goal instruction alone embeds a
+ // tokens-used counter that changes every step. It must trail the
+ // stable history: placed before it, every counter tick invalidated
+ // the provider prompt cache for the entire conversation.
+ for (const instruction of request.contextInstructions ?? []) {
+ if (instruction.trim()) out.push({ role: 'system', content: instruction })
+ }
if (request.attachments?.length) {
attachImagesToLatestUserMessage(out, request.attachments)
}
@@ -611,7 +653,8 @@ export class DeepseekCompatModelClient implements ModelClient {
private async *streamSse(
body: ReadableStream,
signal: AbortSignal,
- endpointFormat: ModelEndpointFormat
+ endpointFormat: ModelEndpointFormat,
+ model: string
): AsyncIterable {
const decoder = new TextDecoder('utf-8')
const reader = body.getReader()
@@ -673,7 +716,8 @@ export class DeepseekCompatModelClient implements ModelClient {
completedToolCalls,
textAccumulator,
reasoningAccumulator,
- endpointFormat
+ endpointFormat,
+ model
)
textAccumulator = result.text
reasoningAccumulator = result.reasoning
@@ -717,7 +761,8 @@ export class DeepseekCompatModelClient implements ModelClient {
completedToolCalls: Set,
textAccumulator: string,
reasoningAccumulator: string,
- endpointFormat: ModelEndpointFormat
+ endpointFormat: ModelEndpointFormat,
+ model: string
): {
chunks: ModelStreamChunk[]
text: string
@@ -725,6 +770,20 @@ export class DeepseekCompatModelClient implements ModelClient {
finishReason: string | null
usage: UsageSnapshot | null
} {
+ const payloadError = modelPayloadError(payload)
+ if (payloadError) {
+ return {
+ chunks: [{
+ kind: 'error',
+ message: payloadError.message,
+ ...(payloadError.code ? { code: payloadError.code } : {})
+ }],
+ text: textAccumulator,
+ reasoning: reasoningAccumulator,
+ finishReason: 'error',
+ usage: null
+ }
+ }
if (endpointFormat === 'responses') {
return this.consumeResponsesStreamPayload(
payload,
@@ -732,7 +791,8 @@ export class DeepseekCompatModelClient implements ModelClient {
pendingByIndex,
completedToolCalls,
textAccumulator,
- reasoningAccumulator
+ reasoningAccumulator,
+ model
)
}
if (endpointFormat === 'messages') {
@@ -742,7 +802,8 @@ export class DeepseekCompatModelClient implements ModelClient {
pendingByIndex,
completedToolCalls,
textAccumulator,
- reasoningAccumulator
+ reasoningAccumulator,
+ model
)
}
const chunks: ModelStreamChunk[] = []
@@ -797,7 +858,7 @@ export class DeepseekCompatModelClient implements ModelClient {
}
const usagePayload = payload.usage as Record | undefined
if (usagePayload) {
- usage = this.mapUsage(usagePayload)
+ usage = this.mapUsage(usagePayload, model)
}
if (finishReason === 'tool_calls' && pendingArguments.size > 0) {
for (const [callId, value] of pendingArguments) {
@@ -821,7 +882,8 @@ export class DeepseekCompatModelClient implements ModelClient {
pendingByIndex: Map,
completedToolCalls: Set,
textAccumulator: string,
- reasoningAccumulator: string
+ reasoningAccumulator: string,
+ model: string
): {
chunks: ModelStreamChunk[]
text: string
@@ -915,7 +977,7 @@ export class DeepseekCompatModelClient implements ModelClient {
skipText: Boolean(text),
pendingArguments,
completedToolCalls
- })
+ }, model)
chunks.push(...materialized.chunks)
if (materialized.usage) usage = materialized.usage
finishReason = materialized.finishReason
@@ -933,7 +995,8 @@ export class DeepseekCompatModelClient implements ModelClient {
pendingByIndex: Map,
completedToolCalls: Set,
textAccumulator: string,
- reasoningAccumulator: string
+ reasoningAccumulator: string,
+ model: string
): {
chunks: ModelStreamChunk[]
text: string
@@ -952,7 +1015,7 @@ export class DeepseekCompatModelClient implements ModelClient {
if (type === 'message_start') {
const message = recordValue(payload, 'message')
const usagePayload = message ? recordValue(message, 'usage') : null
- if (usagePayload) usage = this.mapUsage(usagePayload)
+ if (usagePayload) usage = this.mapUsage(usagePayload, model)
} else if (type === 'content_block_start') {
const block = recordValue(payload, 'content_block')
if (block && recordString(block, 'type') === 'tool_use') {
@@ -1022,7 +1085,7 @@ export class DeepseekCompatModelClient implements ModelClient {
const mappedStopReason = anthropicStopReason(stopReason)
if (mappedStopReason) finishReason = mappedStopReason
const usagePayload = recordValue(payload, 'usage')
- if (usagePayload) usage = this.mapUsage(usagePayload)
+ if (usagePayload) usage = this.mapUsage(usagePayload, model)
} else if (type === 'message_stop') {
finishReason = finishReason ?? 'stop'
} else if (type === 'error') {
@@ -1034,14 +1097,24 @@ export class DeepseekCompatModelClient implements ModelClient {
private *materializeNonStreaming(
payload: ChatCompletionResponse,
- endpointFormat: ModelEndpointFormat
+ endpointFormat: ModelEndpointFormat,
+ model: string
): Generator {
+ const payloadError = modelPayloadError(payload as unknown as Record)
+ if (payloadError) {
+ yield {
+ kind: 'error',
+ message: payloadError.message,
+ ...(payloadError.code ? { code: payloadError.code } : {})
+ }
+ return
+ }
if (endpointFormat === 'responses') {
- yield* this.materializeResponsesNonStreaming(payload as unknown as ResponsesApiResponse)
+ yield* this.materializeResponsesNonStreaming(payload as unknown as ResponsesApiResponse, model)
return
}
if (endpointFormat === 'messages') {
- yield* this.materializeAnthropicMessagesNonStreaming(payload as unknown as AnthropicMessageResponse)
+ yield* this.materializeAnthropicMessagesNonStreaming(payload as unknown as AnthropicMessageResponse, model)
return
}
const choice = payload.choices?.[0]
@@ -1069,7 +1142,7 @@ export class DeepseekCompatModelClient implements ModelClient {
}
}
if (payload.usage) {
- yield { kind: 'usage', usage: this.mapUsage(payload.usage) }
+ yield { kind: 'usage', usage: this.mapUsage(payload.usage, model) }
}
let stopReason: 'stop' | 'tool_calls' | 'length' | 'error' = 'stop'
if (choice.finish_reason === 'tool_calls') stopReason = 'tool_calls'
@@ -1079,13 +1152,14 @@ export class DeepseekCompatModelClient implements ModelClient {
}
private *materializeResponsesNonStreaming(
- payload: ResponsesApiResponse
+ payload: ResponsesApiResponse,
+ model: string
): Generator {
if (payload.error?.message) {
yield { kind: 'error', message: payload.error.message, code: payload.error.type }
return
}
- const materialized = this.materializeResponsesOutput(payload)
+ const materialized = this.materializeResponsesOutput(payload, {}, model)
yield* materialized.chunks
if (materialized.usage) {
yield { kind: 'usage', usage: materialized.usage }
@@ -1099,7 +1173,8 @@ export class DeepseekCompatModelClient implements ModelClient {
skipText?: boolean
pendingArguments?: Map
completedToolCalls?: Set
- } = {}
+ } = {},
+ model = this.config.model
): {
chunks: ModelStreamChunk[]
finishReason: ModelStopReason
@@ -1134,7 +1209,7 @@ export class DeepseekCompatModelClient implements ModelClient {
arguments: this.parseToolArguments(argsRaw)
})
}
- const usage = payload.usage ? this.mapUsage(payload.usage) : null
+ const usage = payload.usage ? this.mapUsage(payload.usage, model) : null
let finishReason: ModelStopReason = sawToolCall ? 'tool_calls' : 'stop'
if (payload.status === 'incomplete') {
finishReason = payload.incomplete_details?.reason === 'max_output_tokens' ? 'length' : 'error'
@@ -1145,7 +1220,8 @@ export class DeepseekCompatModelClient implements ModelClient {
}
private *materializeAnthropicMessagesNonStreaming(
- payload: AnthropicMessageResponse
+ payload: AnthropicMessageResponse,
+ model: string
): Generator {
let sawToolCall = false
for (const block of payload.content ?? []) {
@@ -1172,15 +1248,13 @@ export class DeepseekCompatModelClient implements ModelClient {
}
}
if (payload.usage) {
- yield { kind: 'usage', usage: this.mapUsage(payload.usage) }
+ yield { kind: 'usage', usage: this.mapUsage(payload.usage, model) }
}
yield { kind: 'completed', stopReason: anthropicStopReason(payload.stop_reason) ?? (sawToolCall ? 'tool_calls' : 'stop') }
}
- private mapUsage(usage: Record): UsageSnapshot {
- const promptTokens = Number(usage.prompt_tokens ?? usage.prompt_eval_count ?? usage.input_tokens ?? 0) || 0
+ private mapUsage(usage: Record, model = this.config.model): UsageSnapshot {
const completionTokens = Number(usage.completion_tokens ?? usage.eval_count ?? usage.output_tokens ?? 0) || 0
- const totalTokens = Number(usage.total_tokens ?? promptTokens + completionTokens) || 0
const promptDetails = usage.prompt_tokens_details as
| { cached_tokens?: number }
| undefined
@@ -1190,21 +1264,41 @@ export class DeepseekCompatModelClient implements ModelClient {
const cachedTokens = Number(promptDetails?.cached_tokens ?? 0) || 0
const cacheRead = Number(usage.cache_read_input_tokens ?? 0) || 0
const cacheCreation = Number(usage.cache_creation_input_tokens ?? 0) || 0
+ // Anthropic-protocol usage (MiniMax et al.) reports input_tokens
+ // EXCLUDING cache reads/writes; OpenAI-style prompt_tokens includes
+ // everything and marks the cached subset in prompt_tokens_details.
+ const anthropicUsage = usage.prompt_tokens === undefined &&
+ usage.prompt_eval_count === undefined &&
+ usage.input_tokens !== undefined
+ const reportedPromptTokens = Number(usage.prompt_tokens ?? usage.prompt_eval_count ?? usage.input_tokens ?? 0) || 0
+ const promptTokens = anthropicUsage
+ ? reportedPromptTokens + cacheRead + cacheCreation
+ : reportedPromptTokens
const cacheHit = hasNativeCache ? nativeHit : (cachedTokens > 0 ? cachedTokens : cacheRead)
const cacheMiss = hasNativeCache ? nativeMiss : Math.max(promptTokens - cacheHit, 0)
const cacheTotal = cacheHit + cacheMiss
const cacheHitRate = cacheTotal === 0 ? null : cacheHit / cacheTotal
+ const totalTokens = anthropicUsage
+ ? promptTokens + completionTokens
+ : Number(usage.total_tokens ?? promptTokens + completionTokens) || 0
+ const pricingCacheRead = cacheRead || cacheHit
+ const pricingCacheWrite = cacheCreation
+ const pricingInputTokens = anthropicUsage
+ ? reportedPromptTokens
+ : Math.max(promptTokens - pricingCacheRead - pricingCacheWrite, 0)
const estimatedCost = estimateDeepseekCost({
- model: this.config.model,
+ model,
providerHost: this.config.baseUrl,
cacheHitTokens: cacheHit,
cacheMissTokens: cacheMiss,
outputTokens: completionTokens
- })
- const estimatedSavings = estimateDeepseekCacheSavings({
- model: this.config.model,
+ }) ?? estimateMiniMaxCost({
+ model,
providerHost: this.config.baseUrl,
- cacheHitTokens: cacheHit
+ inputTokens: pricingInputTokens,
+ cacheReadTokens: pricingCacheRead,
+ cacheWriteTokens: pricingCacheWrite,
+ outputTokens: completionTokens
})
const reportedCostUsd = Number(usage.cost_usd ?? usage.costUsd)
const reportedCostCny = Number(usage.cost_cny ?? usage.costCny)
@@ -1219,9 +1313,7 @@ export class DeepseekCompatModelClient implements ModelClient {
cacheHitRate,
turns: 1,
costUsd: Number.isFinite(reportedCostUsd) ? reportedCostUsd : estimatedCost?.costUsd,
- costCny: Number.isFinite(reportedCostCny) ? reportedCostCny : estimatedCost?.costCny,
- cacheSavingsUsd: estimatedSavings?.costUsd,
- cacheSavingsCny: estimatedSavings?.costCny
+ costCny: Number.isFinite(reportedCostCny) ? reportedCostCny : estimatedCost?.costCny
}
}
@@ -1273,13 +1365,28 @@ function messagesToResponsesInput(messages: ChatMessage[]): Array 0) {
+ appendTrailingInstruction(out, text)
+ continue
+ }
+ system.push(text)
continue
}
if (message.role === 'tool') {
@@ -1300,6 +1407,10 @@ function messagesToAnthropic(messages: ChatMessage[]): { system: string; message
: content.trim()
? [{ type: 'text' as const, text: content }]
: []
+ if (includeThinkingBlocks && message.role === 'assistant') {
+ const thinking = message.reasoning_content?.trim()
+ if (thinking) blocks.unshift({ type: 'thinking', thinking })
+ }
for (const call of message.tool_calls ?? []) {
blocks.push({
type: 'tool_use',
@@ -1316,6 +1427,46 @@ function messagesToAnthropic(messages: ChatMessage[]): { system: string; message
return { system: system.join('\n\n'), messages: out }
}
+/**
+ * Folds a trailing system instruction into the conversation as user
+ * content. Appends to the final user message when one exists so the
+ * request keeps strict user/assistant alternation.
+ */
+function appendTrailingInstruction(out: AnthropicMessage[], text: string): void {
+ const block: AnthropicContentBlock = { type: 'text', text }
+ const last = out[out.length - 1]
+ if (last && last.role === 'user') {
+ if (typeof last.content === 'string') {
+ last.content = last.content.trim()
+ ? [{ type: 'text', text: last.content }, block]
+ : [block]
+ return
+ }
+ last.content.push(block)
+ return
+ }
+ out.push({ role: 'user', content: [block] })
+}
+
+/**
+ * Marks the stable prefix for provider-side prompt caching. Anthropic
+ * protocol caching is explicit: providers such as MiniMax only cache
+ * content before `cache_control` breakpoints (up to 4 per request).
+ * One breakpoint goes on the system block (which also covers the tool
+ * definitions that precede it) and one on the final content block of
+ * each of the last two messages, so consecutive agent steps re-hit the
+ * prefix cached by the previous request.
+ */
+function applyAnthropicCacheControl(messages: AnthropicMessage[]): void {
+ let breakpoints = 0
+ for (let i = messages.length - 1; i >= 0 && breakpoints < 2; i -= 1) {
+ const content = messages[i].content
+ if (typeof content === 'string' || content.length === 0) continue
+ content[content.length - 1].cache_control = { type: 'ephemeral' }
+ breakpoints += 1
+ }
+}
+
function chatContentToResponsesContent(
content: ChatMessage['content']
): string | Array> | undefined {
@@ -1377,19 +1528,26 @@ function chatContentToPlainText(content: ChatMessage['content']): string {
}).join('\n')
}
-function responsesReasoningForEffort(effort: string | undefined): Record | null {
- const normalized = effort?.trim().toLowerCase()
+type ModelReasoningCapability = NonNullable
+type NormalizedReasoningEffort = ModelReasoningCapability['defaultEffort']
+
+function responsesReasoningForEffort(
+ effort: string | undefined,
+ reasoning?: ModelReasoningCapability
+): Record | null {
+ if (reasoning && reasoning.requestProtocol !== 'openai-responses') return null
+ const resolved = reasoning
+ ? resolveReasoningEffort(effort, reasoning)
+ : normalizeReasoningEffortValue(effort)
+ if (resolved === 'auto' || resolved === 'off' || !resolved) return null
+ const normalized = resolved
switch (normalized) {
case 'low':
- case 'minimal':
return { effort: 'low' }
case 'medium':
- case 'mid':
return { effort: 'medium' }
case 'high':
case 'max':
- case 'maximum':
- case 'xhigh':
return { effort: 'high' }
default:
return null
@@ -1484,6 +1642,66 @@ function responseErrorMessage(payload: Record): string {
return message || recordString(payload, 'message') || 'model stream reported an error'
}
+function modelPayloadError(payload: Record): { message: string; code?: string } | null {
+ const rawError = payload.error
+ if (typeof rawError === 'string' && rawError.trim()) {
+ return { message: rawError.trim() }
+ }
+ const directError = modelErrorObject(recordValue(payload, 'error'))
+ if (directError) return directError
+ const responseError = modelErrorObject(recordValue(recordValue(payload, 'response'), 'error'))
+ if (responseError) return responseError
+ const baseResp = recordValue(payload, 'base_resp') ?? recordValue(payload, 'baseResp')
+ if (baseResp) {
+ const code = errorCodeString(
+ baseResp.status_code ?? baseResp.status ?? baseResp.code ?? baseResp.err_code
+ )
+ if (code && !successErrorCode(code)) {
+ return {
+ message:
+ recordString(baseResp, 'status_msg') ||
+ recordString(baseResp, 'message') ||
+ recordString(baseResp, 'msg') ||
+ `model provider error (${code})`,
+ code
+ }
+ }
+ }
+ const topLevelCode = errorCodeString(payload.code ?? payload.type ?? payload.status_code ?? payload.err_code)
+ const topLevelMessage =
+ recordString(payload, 'message') ||
+ recordString(payload, 'error_msg') ||
+ recordString(payload, 'status_msg')
+ if (topLevelCode && topLevelMessage && !successErrorCode(topLevelCode)) {
+ return { message: topLevelMessage, code: topLevelCode }
+ }
+ return null
+}
+
+function modelErrorObject(error: Record | null): { message: string; code?: string } | null {
+ if (!error) return null
+ const message =
+ recordString(error, 'message') ||
+ recordString(error, 'msg') ||
+ recordString(error, 'status_msg') ||
+ recordString(error, 'error_msg')
+ const code = errorCodeString(error.code ?? error.type ?? error.status ?? error.status_code ?? error.err_code)
+ if (message) return { message, ...(code ? { code } : {}) }
+ if (code && !successErrorCode(code)) return { message: `model provider error (${code})`, code }
+ return null
+}
+
+function errorCodeString(value: unknown): string {
+ if (typeof value === 'string') return value.trim()
+ if (typeof value === 'number' && Number.isFinite(value)) return String(value)
+ return ''
+}
+
+function successErrorCode(code: string): boolean {
+ const normalized = code.trim().toLowerCase()
+ return normalized === '0' || normalized === 'ok' || normalized === 'success'
+}
+
function anthropicStopReason(value: unknown): ModelStopReason | undefined {
if (typeof value !== 'string') return undefined
switch (value) {
@@ -1535,44 +1753,154 @@ function mergeUsageSnapshots(current: UsageSnapshot | null, next: UsageSnapshot)
cacheMissTokens: Math.max(current.cacheMissTokens ?? 0, next.cacheMissTokens ?? 0),
cacheHitRate: next.cacheHitRate ?? current.cacheHitRate,
costUsd: next.costUsd ?? current.costUsd,
- costCny: next.costCny ?? current.costCny,
- cacheSavingsUsd: next.cacheSavingsUsd ?? current.cacheSavingsUsd,
- cacheSavingsCny: next.cacheSavingsCny ?? current.cacheSavingsCny
+ costCny: next.costCny ?? current.costCny
}
}
function applyReasoningEffort(
body: Record,
effort: string | undefined,
- options: { includeThinking?: boolean } = {}
+ options: {
+ includeThinking?: boolean
+ reasoning?: ModelReasoningCapability
+ maxReasoningEffort?: 'high' | 'max'
+ } = {}
): void {
- const normalized = effort?.trim().toLowerCase()
+ const normalized = options.reasoning
+ ? resolveReasoningEffort(effort, options.reasoning)
+ : normalizeReasoningEffortValue(effort)
if (!normalized) return
const includeThinking = options.includeThinking !== false
+ if (options.reasoning) {
+ applyProfileReasoningEffort(body, normalized, options.reasoning, includeThinking)
+ return
+ }
switch (normalized) {
case 'off':
- case 'disabled':
- case 'none':
- case 'false':
if (includeThinking) body.thinking = { type: 'disabled' }
break
case 'low':
- case 'minimal':
case 'medium':
- case 'mid':
case 'high':
body.reasoning_effort = 'high'
if (includeThinking) body.thinking = { type: 'enabled' }
break
case 'max':
- case 'maximum':
- case 'xhigh':
- body.reasoning_effort = 'max'
+ body.reasoning_effort = options.maxReasoningEffort ?? 'max'
if (includeThinking) body.thinking = { type: 'enabled' }
break
}
}
+function applyProfileReasoningEffort(
+ body: Record,
+ effort: NormalizedReasoningEffort,
+ reasoning: ModelReasoningCapability,
+ includeThinking: boolean
+): void {
+ switch (reasoning.requestProtocol) {
+ case 'none':
+ case 'openai-responses':
+ case 'anthropic-thinking':
+ return
+ case 'deepseek-chat-completions':
+ applyDeepSeekChatReasoningEffort(body, effort, includeThinking)
+ return
+ case 'mimo-chat-completions':
+ applyMimoChatReasoningEffort(body, effort, includeThinking)
+ return
+ }
+}
+
+function applyDeepSeekChatReasoningEffort(
+ body: Record,
+ effort: NormalizedReasoningEffort,
+ includeThinking: boolean
+): void {
+ if (effort === 'off') {
+ if (includeThinking) body.thinking = { type: 'disabled' }
+ return
+ }
+ if (effort === 'max') {
+ body.reasoning_effort = 'max'
+ } else if (effort !== 'auto') {
+ body.reasoning_effort = 'high'
+ }
+ if (includeThinking && effort !== 'auto') body.thinking = { type: 'enabled' }
+}
+
+function applyMimoChatReasoningEffort(
+ body: Record,
+ effort: NormalizedReasoningEffort,
+ includeThinking: boolean
+): void {
+ if (effort === 'off') {
+ if (includeThinking) body.thinking = { type: 'disabled' }
+ return
+ }
+ if (effort === 'low' || effort === 'medium' || effort === 'high') {
+ body.reasoning_effort = effort
+ if (includeThinking) body.thinking = { type: 'enabled' }
+ }
+}
+
+function applyAnthropicReasoningEffort(
+ body: Record,
+ effort: string | undefined,
+ reasoning?: ModelReasoningCapability
+): void {
+ if (reasoning?.requestProtocol !== 'anthropic-thinking') return
+ const resolved = resolveReasoningEffort(effort, reasoning)
+ if (!resolved) return
+ body.thinking = {
+ type: resolved === 'off' ? 'disabled' : 'adaptive'
+ }
+}
+
+function resolveReasoningEffort(
+ effort: string | undefined,
+ reasoning: ModelReasoningCapability
+): NormalizedReasoningEffort | undefined {
+ const normalized = normalizeReasoningEffortValue(effort)
+ if (!normalized) return undefined
+ if (reasoning.supportedEfforts.includes(normalized)) return normalized
+ if (
+ normalized === 'low' &&
+ reasoning.supportedEfforts.includes('off') &&
+ !reasoning.supportedEfforts.includes('low')
+ ) {
+ return 'off'
+ }
+ return reasoning.defaultEffort
+}
+
+function normalizeReasoningEffortValue(effort: string | undefined): NormalizedReasoningEffort | undefined {
+ switch (effort?.trim().toLowerCase()) {
+ case 'auto':
+ case 'adaptive':
+ return 'auto'
+ case 'off':
+ case 'disabled':
+ case 'none':
+ case 'false':
+ return 'off'
+ case 'low':
+ case 'minimal':
+ return 'low'
+ case 'medium':
+ case 'mid':
+ return 'medium'
+ case 'high':
+ return 'high'
+ case 'max':
+ case 'maximum':
+ case 'xhigh':
+ return 'max'
+ default:
+ return undefined
+ }
+}
+
function shouldRetryWithoutStreamUsage(
status: number,
text: string,
@@ -1602,8 +1930,16 @@ function isThinkingMode(effort: string | undefined): boolean {
function requiresReasoningRoundTrip(
effort: string | undefined,
model: string | undefined,
- baseUrl: string
+ baseUrl: string,
+ reasoning?: ModelReasoningCapability
): boolean {
+ if (reasoning) {
+ const resolved = resolveReasoningEffort(effort, reasoning)
+ if (resolved) {
+ return resolved !== 'off' && reasoning.requestProtocol !== 'none'
+ }
+ return isDeepSeekHost(baseUrl) && isThinkingProducerModel(model)
+ }
// Thinking-mode round trip is a DeepSeek-specific protocol extension.
// OpenAI-compat providers (OpenRouter, llama.cpp, etc.) may reject
// or misinterpret the `thinking` field, so we only auto-enable it
@@ -1869,6 +2205,7 @@ function formatAttachmentTextFallback(
return [
'[Attached image as base64 text]',
`Name: ${attachment.name}`,
+ `FilePath: ${attachment.localFilePath ?? 'unknown'}`,
`MIME: ${attachment.mimeType}`,
`Dimensions: ${formatAttachmentDimensions(attachment)}`,
`Bytes: ${attachment.byteSize}`,
diff --git a/kun/src/adapters/model/deepseek-pricing.ts b/kun/src/adapters/model/deepseek-pricing.ts
index 247dfe3e..f3e745d8 100644
--- a/kun/src/adapters/model/deepseek-pricing.ts
+++ b/kun/src/adapters/model/deepseek-pricing.ts
@@ -103,35 +103,6 @@ export function estimateDeepseekCost(input: {
}
}
-export function estimateDeepseekInputTokenCost(input: {
- model: string
- inputTokens: number
- providerHost?: string
-}): DeepseekCurrencyCosts | null {
- return estimateDeepseekCost({
- model: input.model,
- cacheHitTokens: 0,
- cacheMissTokens: input.inputTokens,
- outputTokens: 0,
- providerHost: input.providerHost
- })
-}
-
-export function estimateDeepseekCacheSavings(input: {
- model: string
- cacheHitTokens: number
- providerHost?: string
-}): DeepseekCurrencyCosts | null {
- if (input.providerHost !== undefined && !isDeepSeekHost(input.providerHost)) {
- return null
- }
- const tier = pricingTierForModel(input.model)
- if (!tier) return null
- const prices = DEEPSEEK_V4_PRICES[tier]
- return {
- costUsd: (input.cacheHitTokens / TOKENS_PER_MILLION) *
- Math.max(0, prices.usd.inputCacheMiss - prices.usd.inputCacheHit),
- costCny: (input.cacheHitTokens / TOKENS_PER_MILLION) *
- Math.max(0, prices.cny.inputCacheMiss - prices.cny.inputCacheHit)
- }
-}
+// Savings are reported in tokens only. Money estimates for savings were
+// removed: list prices drift and third-party providers make any currency
+// figure unreliable, so the UI now shows saved tokens instead.
diff --git a/kun/src/adapters/model/minimax-pricing.ts b/kun/src/adapters/model/minimax-pricing.ts
new file mode 100644
index 00000000..ac437427
--- /dev/null
+++ b/kun/src/adapters/model/minimax-pricing.ts
@@ -0,0 +1,142 @@
+export type MiniMaxCurrencyCosts = {
+ costUsd?: number
+ costCny: number
+}
+
+type MiniMaxPrice = {
+ input: number
+ output: number
+ cacheRead: number
+ cacheWrite?: number
+}
+
+const TOKENS_PER_MILLION = 1_000_000
+const M3_LONG_CONTEXT_THRESHOLD = 512_000
+
+// Official MiniMax pay-as-you-go language model prices, CNY per 1M tokens.
+// Token Plan credits are deducted at the matching pay-as-you-go list price.
+const MINIMAX_TEXT_PRICES: Record = {
+ 'minimax-m2.7': {
+ input: 2.1,
+ output: 8.4,
+ cacheRead: 0.42,
+ cacheWrite: 2.625
+ },
+ 'minimax-m2.7-highspeed': {
+ input: 4.2,
+ output: 16.8,
+ cacheRead: 0.42,
+ cacheWrite: 2.625
+ },
+ 'minimax-m2.5': {
+ input: 2.1,
+ output: 8.4,
+ cacheRead: 0.21,
+ cacheWrite: 2.625
+ },
+ 'minimax-m2.5-highspeed': {
+ input: 4.2,
+ output: 16.8,
+ cacheRead: 0.21,
+ cacheWrite: 2.625
+ },
+ 'minimax-m2.1': {
+ input: 2.1,
+ output: 8.4,
+ cacheRead: 0.21,
+ cacheWrite: 2.625
+ },
+ 'minimax-m2.1-highspeed': {
+ input: 4.2,
+ output: 16.8,
+ cacheRead: 0.21,
+ cacheWrite: 2.625
+ },
+ 'minimax-m2': {
+ input: 2.1,
+ output: 8.4,
+ cacheRead: 0.21,
+ cacheWrite: 2.625
+ }
+}
+
+const MINIMAX_M3_STANDARD_PRICE: MiniMaxPrice = {
+ input: 2.1,
+ output: 8.4,
+ cacheRead: 0.42
+}
+
+const MINIMAX_M3_LONG_CONTEXT_PRICE: MiniMaxPrice = {
+ input: 4.2,
+ output: 16.8,
+ cacheRead: 0.84
+}
+
+function isMiniMaxHost(baseUrl: string): boolean {
+ try {
+ const host = new URL(baseUrl).hostname.toLowerCase()
+ return host === 'api.minimaxi.com' || host === 'api.minimax.io' || host === 'api.minimax.chat'
+ } catch {
+ return false
+ }
+}
+
+function normalizeModel(model: string): string {
+ const normalized = model.trim().toLowerCase()
+ const parts = normalized.split('/').filter(Boolean)
+ return parts.at(-1) ?? normalized
+}
+
+function priceForModel(model: string, billableInputTokens: number): MiniMaxPrice | null {
+ const normalized = normalizeModel(model)
+ if (normalized === 'minimax-m3') {
+ return billableInputTokens > M3_LONG_CONTEXT_THRESHOLD
+ ? MINIMAX_M3_LONG_CONTEXT_PRICE
+ : MINIMAX_M3_STANDARD_PRICE
+ }
+ return MINIMAX_TEXT_PRICES[normalized] ?? null
+}
+
+function costCnyForPrice(input: {
+ price: MiniMaxPrice
+ inputTokens: number
+ cacheReadTokens: number
+ cacheWriteTokens: number
+ outputTokens: number
+}): number {
+ const cacheWritePrice = input.price.cacheWrite ?? input.price.input
+ return (
+ (input.inputTokens / TOKENS_PER_MILLION) * input.price.input +
+ (input.cacheReadTokens / TOKENS_PER_MILLION) * input.price.cacheRead +
+ (input.cacheWriteTokens / TOKENS_PER_MILLION) * cacheWritePrice +
+ (input.outputTokens / TOKENS_PER_MILLION) * input.price.output
+ )
+}
+
+export function estimateMiniMaxCost(input: {
+ model: string
+ providerHost?: string
+ inputTokens: number
+ cacheReadTokens: number
+ cacheWriteTokens: number
+ outputTokens: number
+}): MiniMaxCurrencyCosts | null {
+ if (input.providerHost !== undefined && !isMiniMaxHost(input.providerHost)) {
+ return null
+ }
+ const billableInputTokens = Math.max(
+ 0,
+ input.inputTokens + input.cacheReadTokens + input.cacheWriteTokens
+ )
+ const price = priceForModel(input.model, billableInputTokens)
+ if (!price) return null
+ return {
+ costCny: costCnyForPrice({
+ price,
+ inputTokens: Math.max(0, input.inputTokens),
+ cacheReadTokens: Math.max(0, input.cacheReadTokens),
+ cacheWriteTokens: Math.max(0, input.cacheWriteTokens),
+ outputTokens: Math.max(0, input.outputTokens)
+ })
+ }
+}
diff --git a/kun/src/adapters/tool/builtin-file-tools.ts b/kun/src/adapters/tool/builtin-file-tools.ts
index 18ec80dc..07a14585 100644
--- a/kun/src/adapters/tool/builtin-file-tools.ts
+++ b/kun/src/adapters/tool/builtin-file-tools.ts
@@ -14,6 +14,26 @@ import { withFileMutationQueue } from './file-mutation-queue.js'
import type { EditLocalToolOptions, WriteLocalToolOptions } from './builtin-tool-types.js'
import { defaultEditLocalToolOperations, defaultWriteLocalToolOperations } from './builtin-tool-operations.js'
import { parseEditInstructions, resolveWorkspacePath, withToolBoundary } from './builtin-tool-utils.js'
+import { assertCanWritePath } from './sandbox-policy.js'
+
+/**
+ * Arguments that failed JSON parsing arrive as `{ __raw: "" }`
+ * (tool-argument-repair fallback). The dominant cause is the model's output
+ * limit truncating an oversized payload mid-string, so answer with guidance
+ * the model can act on instead of a generic missing-field error.
+ */
+function truncatedArgumentsError(raw: unknown): { output: { error: string }; isError: true } | null {
+ if (typeof raw !== 'string') return null
+ return {
+ output: {
+ error:
+ 'tool arguments were not valid JSON — they were likely truncated by your output limit. ' +
+ `Received ${raw.length} characters. Retry with a much smaller payload: ` +
+ 'write a short skeleton first, then extend the file with several small edit calls.'
+ },
+ isError: true
+ }
+}
export function createWriteLocalTool(_options: WriteLocalToolOptions = {}): LocalTool {
const mkdirOp = _options.operations?.mkdir ?? defaultWriteLocalToolOperations.mkdir!
@@ -33,12 +53,15 @@ export function createWriteLocalTool(_options: WriteLocalToolOptions = {}): Loca
policy: 'on-request',
toolKind: 'file_change',
execute: async (args, context) => withToolBoundary(async () => {
+ const truncated = truncatedArgumentsError(args.__raw)
+ if (truncated) return truncated
const rawPath = typeof args.path === 'string' ? args.path : ''
const content = typeof args.content === 'string' ? args.content : null
if (!rawPath.trim() || content == null) {
return { output: { error: 'path and content are required' }, isError: true }
}
const { absolutePath, relativePath } = resolveWorkspacePath(rawPath, context)
+ assertCanWritePath(absolutePath, context)
return withFileMutationQueue(absolutePath, async () => {
await mkdirOp(dirname(absolutePath))
await writeFileOp(absolutePath, content)
@@ -88,12 +111,15 @@ export function createEditLocalTool(_options: EditLocalToolOptions = {}): LocalT
policy: 'on-request',
toolKind: 'file_change',
execute: async (args, context) => withToolBoundary(async () => {
+ const truncated = truncatedArgumentsError(args.__raw)
+ if (truncated) return truncated
const rawPath = typeof args.path === 'string' ? args.path : ''
const edits = parseEditInstructions(args)
if (!rawPath.trim() || edits.length === 0) {
return { output: { error: 'path and at least one edit are required' }, isError: true }
}
const { absolutePath, relativePath } = resolveWorkspacePath(rawPath, context)
+ assertCanWritePath(absolutePath, context)
return withFileMutationQueue(absolutePath, async () => {
const rawSource = await readFileOp(absolutePath)
const { bom, text: source } = stripBom(rawSource)
diff --git a/kun/src/adapters/tool/capability-registry.ts b/kun/src/adapters/tool/capability-registry.ts
index 03dfbfcf..65552bb6 100644
--- a/kun/src/adapters/tool/capability-registry.ts
+++ b/kun/src/adapters/tool/capability-registry.ts
@@ -4,6 +4,7 @@ import type {
ToolProviderPolicy
} from '../../ports/tool-host.js'
import type { LocalTool } from './local-tool-host.js'
+import { isToolAdvertisedInSandbox } from './sandbox-policy.js'
export type CapabilityToolRecord = {
provider: ToolProviderPolicy
@@ -23,6 +24,16 @@ export type CapabilityToolSpec = {
providerKind: ToolProviderKind
}
+const PLAN_MODE_ALLOWED_TOOL_NAMES = new Set([
+ 'read',
+ 'grep',
+ 'find',
+ 'ls',
+ 'create_plan',
+ 'user_input',
+ 'request_user_input'
+])
+
export class CapabilityRegistry {
private readonly providers = new Map()
private readonly tools = new Map()
@@ -63,6 +74,7 @@ export class CapabilityRegistry {
for (const record of this.tools.values()) {
if (!this.canUseProvider(record.provider, context)) continue
if (!this.canUseTool(record.tool.name, context)) continue
+ if (!isToolAdvertisedInSandbox(record.tool, context)) continue
if (record.tool.shouldAdvertise) {
if (!context || !record.tool.shouldAdvertise(context)) continue
}
@@ -110,11 +122,18 @@ export class CapabilityRegistry {
}
private canUseTool(toolName: string, context?: ToolHostContext): boolean {
+ if (isPlanModeContext(context) && !PLAN_MODE_ALLOWED_TOOL_NAMES.has(toolName)) {
+ return false
+ }
const allowed = context?.allowedToolNames
return !allowed || allowed.includes(toolName)
}
}
+function isPlanModeContext(context: ToolHostContext | undefined): boolean {
+ return context?.threadMode === 'plan' || Boolean(context?.guiPlan)
+}
+
function providerPolicy(provider: ToolProviderPolicy): ToolProviderPolicy {
return {
id: provider.id,
diff --git a/kun/src/adapters/tool/create-plan-tool.ts b/kun/src/adapters/tool/create-plan-tool.ts
index e474ad46..599d0310 100644
--- a/kun/src/adapters/tool/create-plan-tool.ts
+++ b/kun/src/adapters/tool/create-plan-tool.ts
@@ -15,6 +15,7 @@ import {
type CreatePlanToolOutput,
type GuiPlanOperation
} from '../../shared/gui-plan.js'
+import { canWritePath } from './sandbox-policy.js'
/**
* Shared tool name. Kept in sync with the renderer contract so the
@@ -290,6 +291,16 @@ export async function executeCreatePlanTool(
? normalize(join(resolvedWorkspace, resolved.relativePath))
: normalize(join(planDirectory(resolvedWorkspace), basename(resolved.relativePath)))
assertWithinWorkspace(absolutePath, resolvedWorkspace)
+ const writePermission = canWritePath(absolutePath, context)
+ if (!writePermission.ok) {
+ return {
+ output: {
+ code: writePermission.block.code,
+ error: writePermission.block.message
+ },
+ isError: true
+ }
+ }
if (context.abortSignal.aborted) {
return { output: { error: 'plan write aborted' }, isError: true }
}
diff --git a/kun/src/adapters/tool/image-gen-network-error.test.ts b/kun/src/adapters/tool/image-gen-network-error.test.ts
new file mode 100644
index 00000000..39ede02f
--- /dev/null
+++ b/kun/src/adapters/tool/image-gen-network-error.test.ts
@@ -0,0 +1,80 @@
+import { afterEach, describe, expect, it, vi } from 'vitest'
+import { describeNetworkError, OpenAiCompatImageClient } from './image-gen-tool-provider.js'
+
+describe('describeNetworkError', () => {
+ it('unwraps the cause behind undici fetch failed errors', () => {
+ const dns = Object.assign(new Error('getaddrinfo ENOTFOUND images.example.test'), {
+ code: 'ENOTFOUND'
+ })
+ const wrapped = new TypeError('fetch failed', { cause: dns })
+ expect(describeNetworkError(wrapped)).toBe(
+ 'fetch failed: getaddrinfo ENOTFOUND images.example.test'
+ )
+ })
+
+ it('digs into AggregateError connection failures', () => {
+ const refused = Object.assign(new Error('connect ECONNREFUSED 127.0.0.1:8080'), {
+ code: 'ECONNREFUSED'
+ })
+ const wrapped = new TypeError('fetch failed', { cause: new AggregateError([refused], '') })
+ expect(describeNetworkError(wrapped)).toBe('fetch failed: connect ECONNREFUSED 127.0.0.1:8080')
+ })
+
+ it('appends error codes missing from the message', () => {
+ const tls = Object.assign(new Error('self-signed certificate'), {
+ code: 'DEPTH_ZERO_SELF_SIGNED_CERT'
+ })
+ expect(describeNetworkError(new TypeError('fetch failed', { cause: tls }))).toBe(
+ 'fetch failed: self-signed certificate (DEPTH_ZERO_SELF_SIGNED_CERT)'
+ )
+ })
+
+ it('handles non-error values and empty chains', () => {
+ expect(describeNetworkError('boom')).toBe('boom')
+ expect(describeNetworkError(new Error(''))).toBe('unknown network error')
+ })
+})
+
+describe('OpenAiCompatImageClient network failures', () => {
+ afterEach(() => {
+ vi.unstubAllGlobals()
+ })
+
+ it('surfaces the failing endpoint and root cause instead of bare fetch failed', async () => {
+ const dns = Object.assign(new Error('getaddrinfo ENOTFOUND images.example.test'), {
+ code: 'ENOTFOUND'
+ })
+ vi.stubGlobal('fetch', vi.fn(async () => {
+ throw new TypeError('fetch failed', { cause: dns })
+ }))
+
+ const client = new OpenAiCompatImageClient('https://images.example.test/v1', 'sk-test')
+ await expect(
+ client.generate({
+ prompt: 'a cat',
+ model: 'test-model',
+ timeoutMs: 5_000,
+ signal: new AbortController().signal
+ })
+ ).rejects.toThrow(
+ 'image request to https://images.example.test/v1/images/generations failed: ' +
+ 'fetch failed: getaddrinfo ENOTFOUND images.example.test'
+ )
+ })
+
+ it('reports timeouts with the configured duration', async () => {
+ vi.stubGlobal('fetch', vi.fn(async () => {
+ throw new DOMException('The operation was aborted due to timeout', 'TimeoutError')
+ }))
+
+ const client = new OpenAiCompatImageClient('https://images.example.test/v1', 'sk-test')
+ await expect(
+ client.generate({
+ prompt: 'a cat',
+ model: 'test-model',
+ timeoutMs: 5_000,
+ signal: new AbortController().signal
+ })
+ ).rejects.toThrow('image request to https://images.example.test/v1/images/generations timed out after 5000ms')
+ })
+})
diff --git a/kun/src/adapters/tool/image-gen-tool-provider.ts b/kun/src/adapters/tool/image-gen-tool-provider.ts
new file mode 100644
index 00000000..13d4b5e4
--- /dev/null
+++ b/kun/src/adapters/tool/image-gen-tool-provider.ts
@@ -0,0 +1,655 @@
+import { randomBytes } from 'node:crypto'
+import { mkdir, readFile, writeFile } from 'node:fs/promises'
+import { isAbsolute, join, relative, resolve } from 'node:path'
+import type { KunCapabilitiesConfig } from '../../contracts/capabilities.js'
+import type { AttachmentStore } from '../../attachments/attachment-store.js'
+import { detectImage } from '../../attachments/attachment-store.js'
+import type { CapabilityToolProvider } from './capability-registry.js'
+import { LocalToolHost } from './local-tool-host.js'
+
+const GENERATED_IMAGE_DIR = '.deepseekgui-images'
+const MAX_REFERENCE_IMAGE_BYTES = 10 * 1024 * 1024
+const REFERENCE_MIME_TYPES = new Set(['image/png', 'image/jpeg', 'image/webp'])
+const ASPECT_RATIOS = new Set(['1:1', '4:3', '3:4', '16:9', '9:16', '3:2', '2:3', '21:9'])
+const SIZE_TIERS: Record = { '1K': 1024, '2K': 2048 }
+const SIZE_STEP = 64
+const MIN_EDGE = 256
+
+export type GeneratedImage = { data: Buffer; mimeType: string }
+
+export type ImageGenRequest = {
+ prompt: string
+ model: string
+ size?: string
+ timeoutMs: number
+ signal: AbortSignal
+}
+
+export type ImageGenEditRequest = ImageGenRequest & {
+ images: { name: string; mimeType: string; data: Buffer }[]
+}
+
+export class ImageGenHttpError extends Error {
+ constructor(
+ readonly status: number,
+ readonly body: string
+ ) {
+ super(`HTTP ${status}: ${body}`)
+ }
+}
+
+/**
+ * Node's fetch reports every network failure as a bare `TypeError: fetch
+ * failed`, hiding the actionable detail (DNS, refused connection, TLS, …)
+ * in the `cause` chain. Flatten that chain into one readable message.
+ */
+export function describeNetworkError(error: unknown): string {
+ const parts: string[] = []
+ let current: unknown = error
+ for (let depth = 0; depth < 5 && current != null; depth += 1) {
+ if (current instanceof AggregateError && current.errors.length > 0) {
+ current = current.errors[0]
+ continue
+ }
+ if (!(current instanceof Error)) {
+ parts.push(String(current))
+ break
+ }
+ const code = (current as { code?: unknown }).code
+ const codeText = typeof code === 'string' ? code : ''
+ const message = current.message.trim()
+ if (message) {
+ parts.push(codeText && !message.includes(codeText) ? `${message} (${codeText})` : message)
+ } else if (codeText) {
+ parts.push(codeText)
+ }
+ current = current.cause
+ }
+ const unique = parts.filter((part, index) => parts.indexOf(part) === index)
+ return unique.join(': ') || 'unknown network error'
+}
+
+function imageFetchFailure(
+ url: string,
+ error: unknown,
+ request: { timeoutMs: number }
+): Error {
+ const target = url.split('?')[0]
+ if (error instanceof DOMException && error.name === 'TimeoutError') {
+ return new Error(`image request to ${target} timed out after ${request.timeoutMs}ms`, { cause: error })
+ }
+ if (error instanceof DOMException && error.name === 'AbortError') {
+ return new Error(`image request to ${target} was canceled`, { cause: error })
+ }
+ return new Error(`image request to ${target} failed: ${describeNetworkError(error)}`, { cause: error })
+}
+
+export interface ImageGenClient {
+ id: string
+ generate(request: ImageGenRequest): Promise
+ edit(request: ImageGenEditRequest): Promise
+}
+
+export type ImageGenDiagnostic = {
+ id: 'imageGen'
+ enabled: boolean
+ available: boolean
+ model?: string
+ reason?: string
+}
+
+export type ImageGenToolProviderOptions = {
+ client?: ImageGenClient
+ attachmentStore?: AttachmentStore
+ nowIso?: () => string
+}
+
+export type ImageGenToolProviderBuildResult = {
+ providers: CapabilityToolProvider[]
+ diagnostics: ImageGenDiagnostic[]
+ available: boolean
+}
+
+/**
+ * Map UI-friendly aspect ratio + size tier to an OpenAI-compatible "WxH"
+ * size string. Long edge anchors to the tier (1K→1024, 2K→2048), short edge
+ * follows the ratio snapped to multiples of 64 with a 256px floor. Both args
+ * absent → fall back to the configured default (may be undefined or 'auto').
+ */
+export function mapImageSize(
+ aspectRatio: string | undefined,
+ imageSize: string | undefined,
+ defaultSize: string | undefined
+): string | undefined {
+ if (!aspectRatio && !imageSize) return defaultSize
+ const tier = SIZE_TIERS[imageSize ?? ''] ?? SIZE_TIERS['1K']
+ const parsed = parseRatio(aspectRatio)
+ if (!parsed) return `${tier}x${tier}`
+ const { w, h } = parsed
+ if (w === h) return `${tier}x${tier}`
+ const short = Math.max(MIN_EDGE, Math.round((tier * Math.min(w, h)) / Math.max(w, h) / SIZE_STEP) * SIZE_STEP)
+ return w > h ? `${tier}x${short}` : `${short}x${tier}`
+}
+
+function parseRatio(aspectRatio: string | undefined): { w: number; h: number } | null {
+ if (!aspectRatio || !ASPECT_RATIOS.has(aspectRatio)) return null
+ const [w, h] = aspectRatio.split(':').map(Number)
+ if (!Number.isFinite(w) || !Number.isFinite(h) || w <= 0 || h <= 0) return null
+ return { w, h }
+}
+
+export function buildImageGenToolProviders(
+ config: KunCapabilitiesConfig['imageGen'] | undefined,
+ options: ImageGenToolProviderOptions = {}
+): ImageGenToolProviderBuildResult {
+ if (!config?.enabled) {
+ return { providers: [], diagnostics: [], available: false }
+ }
+
+ const missing = [
+ !config.baseUrl ? 'baseUrl' : undefined,
+ !config.apiKey ? 'apiKey' : undefined,
+ !config.model ? 'model' : undefined
+ ].filter((field): field is string => Boolean(field))
+
+ if (missing.length > 0) {
+ const reason = `image generation provider is not configured (missing ${missing.join(', ')})`
+ return {
+ providers: [{ id: 'imageGen', kind: 'image', enabled: true, available: false, reason, tools: [] }],
+ diagnostics: [{ id: 'imageGen', enabled: true, available: false, model: config.model, reason }],
+ available: false
+ }
+ }
+
+ const client = options.client ?? createImageGenClient(config)
+ const model = config.model!
+
+ const tool = LocalToolHost.defineTool({
+ name: 'generate_image',
+ description: [
+ 'Generate an image from a text prompt using the configured image provider.',
+ 'Optionally pass reference_image_paths (image files inside the workspace) to guide the result (image-to-image).',
+ `The generated image is saved under ${GENERATED_IMAGE_DIR}/ in the workspace and returned as an inline attachment preview.`,
+ 'Generates exactly one image per call; call again for variations.'
+ ].join(' '),
+ inputSchema: {
+ type: 'object',
+ properties: {
+ prompt: { type: 'string', description: 'Detailed description of the image to generate' },
+ aspect_ratio: { type: 'string', enum: [...ASPECT_RATIOS] },
+ image_size: { type: 'string', enum: Object.keys(SIZE_TIERS), description: 'Resolution tier, defaults to 1K' },
+ reference_image_paths: {
+ type: 'array',
+ items: { type: 'string' },
+ maxItems: config.maxReferenceImages,
+ description: 'Workspace-relative paths of reference images for image-to-image guidance'
+ }
+ },
+ required: ['prompt'],
+ additionalProperties: false
+ },
+ policy: 'untrusted',
+ execute: async (args, context) => {
+ const startedAt = Date.now()
+ const prompt = pickString(args.prompt)
+ if (!prompt) return toolError('invalid_prompt', 'prompt is required')
+
+ const aspectRatio = pickString(args.aspect_ratio)
+ const imageSize = pickString(args.image_size)
+ const size = mapImageSize(aspectRatio, imageSize, config.defaultSize)
+
+ const references = await collectReferenceImages(
+ args.reference_image_paths,
+ context.workspace,
+ config.maxReferenceImages
+ )
+ if ('error' in references) return references.error
+
+ const endpoint = references.images.length > 0 ? 'edits' : 'generations'
+ let image: GeneratedImage
+ try {
+ const request = {
+ prompt,
+ model,
+ ...(size && size !== 'auto' ? { size } : {}),
+ timeoutMs: config.timeoutMs,
+ signal: context.abortSignal
+ }
+ image = endpoint === 'edits'
+ ? await client.edit({ ...request, images: references.images })
+ : await client.generate(request)
+ } catch (error) {
+ if (error instanceof ImageGenHttpError) {
+ if (endpoint === 'edits' && (error.status === 404 || error.status === 405 || error.status === 501)) {
+ return toolError(
+ 'edits_unsupported',
+ 'the configured image provider does not support reference images (/images/edits); retry generate_image without reference_image_paths'
+ )
+ }
+ return toolError('provider_error', error.message, telemetry(startedAt, client.id))
+ }
+ return toolError('generation_failed', errorMessage(error), telemetry(startedAt, client.id))
+ }
+
+ const detected = detectImage(image.data)
+ const mimeType = detected?.mimeType ?? image.mimeType ?? 'image/png'
+ const ext = mimeType === 'image/jpeg' ? 'jpg' : mimeType === 'image/webp' ? 'webp' : 'png'
+ const stamp = (options.nowIso?.() ?? new Date().toISOString()).replace(/\D/g, '').slice(0, 14)
+ const fileName = `img-${stamp}-${randomBytes(2).toString('hex')}.${ext}`
+ // Forward slashes regardless of platform: the path is echoed back to the
+ // model and rendered in chat, where POSIX-style relative paths are expected.
+ const relativePath = `${GENERATED_IMAGE_DIR}/${fileName}`
+ const absolutePath = join(context.workspace, GENERATED_IMAGE_DIR, fileName)
+ await mkdir(join(context.workspace, GENERATED_IMAGE_DIR), { recursive: true })
+ await writeFile(absolutePath, image.data)
+
+ const warnings: string[] = []
+ const attachments: { id: string; name: string; mimeType: string; width?: number; height?: number }[] = []
+ if (options.attachmentStore) {
+ try {
+ const attachment = await options.attachmentStore.create({
+ name: fileName,
+ data: image.data,
+ mimeType,
+ threadId: context.threadId,
+ workspace: context.workspace
+ })
+ attachments.push({
+ id: attachment.id,
+ name: attachment.name,
+ mimeType: attachment.mimeType,
+ ...(attachment.width ? { width: attachment.width } : {}),
+ ...(attachment.height ? { height: attachment.height } : {})
+ })
+ } catch (error) {
+ warnings.push(`inline preview unavailable: ${errorMessage(error)}`)
+ }
+ } else {
+ warnings.push('inline preview unavailable: attachment store is disabled')
+ }
+
+ return {
+ output: {
+ files: [{
+ relativePath,
+ absolutePath,
+ mimeType,
+ byteSize: image.data.byteLength,
+ ...(detected?.width ? { width: detected.width } : {}),
+ ...(detected?.height ? { height: detected.height } : {})
+ }],
+ attachments,
+ model,
+ ...(size ? { size } : {}),
+ endpoint,
+ warnings,
+ telemetry: telemetry(startedAt, client.id)
+ }
+ }
+ }
+ })
+
+ return {
+ providers: [{ id: 'imageGen', kind: 'image', enabled: true, available: true, tools: [tool] }],
+ diagnostics: [{ id: 'imageGen', enabled: true, available: true, model }],
+ available: true
+ }
+}
+
+type ReferenceImages = { images: { name: string; mimeType: string; data: Buffer }[] }
+type ReferenceError = { error: { output: unknown; isError: true } }
+
+async function collectReferenceImages(
+ value: unknown,
+ workspace: string,
+ maxCount: number
+): Promise {
+ if (value === undefined || value === null) return { images: [] }
+ if (!Array.isArray(value)) {
+ return { error: toolError('invalid_reference_path', 'reference_image_paths must be an array of strings') }
+ }
+ const paths = value.filter((entry): entry is string => typeof entry === 'string' && entry.trim().length > 0)
+ if (paths.length > maxCount) {
+ return { error: toolError('invalid_reference_path', `at most ${maxCount} reference images are allowed`) }
+ }
+ const images: ReferenceImages['images'] = []
+ for (const rawPath of paths) {
+ const resolved = resolve(workspace, rawPath)
+ const rel = relative(workspace, resolved)
+ if (rel.startsWith('..') || isAbsolute(rel)) {
+ return { error: toolError('invalid_reference_path', `reference image must be inside the workspace: ${rawPath}`) }
+ }
+ let data: Buffer
+ try {
+ data = await readFile(resolved)
+ } catch {
+ return { error: toolError('invalid_reference_path', `reference image not found: ${rawPath}`) }
+ }
+ if (data.byteLength > MAX_REFERENCE_IMAGE_BYTES) {
+ return { error: toolError('invalid_reference_path', `reference image exceeds ${MAX_REFERENCE_IMAGE_BYTES} byte limit: ${rawPath}`) }
+ }
+ const detected = detectImage(data)
+ if (!detected || !REFERENCE_MIME_TYPES.has(detected.mimeType)) {
+ return { error: toolError('invalid_reference_path', `reference image must be png, jpeg, or webp: ${rawPath}`) }
+ }
+ images.push({ name: rawPath.split('/').pop() || 'reference.png', mimeType: detected.mimeType, data })
+ }
+ return { images }
+}
+
+type ImagesApiPayload = { data?: { b64_json?: string; url?: string }[] }
+type MiniMaxImagePayload = {
+ data?: {
+ image_base64?: string[]
+ image_urls?: string[]
+ }
+ base_resp?: {
+ status_code?: number
+ status_msg?: string
+ }
+}
+
+export function createImageGenClient(config: {
+ protocol?: string
+ baseUrl?: string
+ apiKey?: string
+}): ImageGenClient {
+ if (config.protocol === 'minimax-image') {
+ return new MiniMaxImageClient(config.baseUrl!, config.apiKey!)
+ }
+ return new OpenAiCompatImageClient(config.baseUrl!, config.apiKey!)
+}
+
+/**
+ * Endpoint URL for an OpenAI-compatible images API. Mirrors the chat
+ * client's base-url rule so the same provider baseUrl works for both:
+ * a versioned base (`…/v1`) gets the endpoint appended, anything else
+ * gets `/v1` inserted first (e.g. `https://zenmux.ai/api` →
+ * `…/api/v1/images/generations`). A fully-qualified endpoint URL is
+ * kept, including re-routing between generations and edits.
+ */
+export function openAiCompatImageUrl(
+ baseUrl: string,
+ endpoint: 'generations' | 'edits'
+): string {
+ const path = `images/${endpoint}`
+ let normalized = baseUrl.trim().replace(/\/+$/, '')
+ if (!normalized) return `/v1/${path}`
+ const lower = normalized.toLowerCase()
+ if (lower.endsWith(`/${path}`)) return normalized
+ for (const known of ['images/generations', 'images/edits']) {
+ if (lower.endsWith(`/${known}`)) {
+ normalized = normalized.slice(0, -known.length).replace(/\/+$/, '')
+ break
+ }
+ }
+ const lastSegment = normalized.split('/').pop()?.toLowerCase() ?? ''
+ if (/^v\d+$/.test(lastSegment)) return `${normalized}/${path}`
+ return `${normalized}/v1/${path}`
+}
+
+export class OpenAiCompatImageClient implements ImageGenClient {
+ readonly id = 'openai-compat'
+ private readonly baseUrl: string
+
+ constructor(
+ baseUrl: string,
+ private readonly apiKey: string
+ ) {
+ this.baseUrl = baseUrl.replace(/\/+$/, '')
+ }
+
+ async generate(request: ImageGenRequest): Promise {
+ const body = (includeResponseFormat: boolean) =>
+ JSON.stringify({
+ model: request.model,
+ prompt: request.prompt,
+ n: 1,
+ ...(request.size ? { size: request.size } : {}),
+ ...(includeResponseFormat ? { response_format: 'b64_json' } : {})
+ })
+ return this.requestImage(
+ openAiCompatImageUrl(this.baseUrl, 'generations'),
+ (includeResponseFormat) => ({
+ headers: { Authorization: `Bearer ${this.apiKey}`, 'Content-Type': 'application/json' },
+ body: body(includeResponseFormat)
+ }),
+ request
+ )
+ }
+
+ async edit(request: ImageGenEditRequest): Promise {
+ const buildForm = (includeResponseFormat: boolean) => {
+ const form = new FormData()
+ form.set('model', request.model)
+ form.set('prompt', request.prompt)
+ if (request.size) form.set('size', request.size)
+ if (includeResponseFormat) form.set('response_format', 'b64_json')
+ const field = request.images.length > 1 ? 'image[]' : 'image'
+ for (const image of request.images) {
+ form.append(field, new Blob([new Uint8Array(image.data)], { type: image.mimeType }), image.name)
+ }
+ return form
+ }
+ return this.requestImage(
+ openAiCompatImageUrl(this.baseUrl, 'edits'),
+ (includeResponseFormat) => ({
+ headers: { Authorization: `Bearer ${this.apiKey}` },
+ body: buildForm(includeResponseFormat)
+ }),
+ request
+ )
+ }
+
+ /**
+ * POST with two compat fallbacks: providers that reject `response_format`
+ * (e.g. gpt-image-1) get one retry without it, and providers that return a
+ * URL instead of b64_json (e.g. SiliconFlow default) get a second download.
+ */
+ private async requestImage(
+ url: string,
+ init: (includeResponseFormat: boolean) => { headers: Record; body: string | FormData },
+ request: { timeoutMs: number; signal: AbortSignal }
+ ): Promise {
+ const signal = withTimeout(request.signal, request.timeoutMs)
+ const post = async (includeResponseFormat: boolean): Promise => {
+ try {
+ return await fetch(url, { method: 'POST', ...init(includeResponseFormat), signal })
+ } catch (error) {
+ throw imageFetchFailure(url, error, request)
+ }
+ }
+ let response = await post(true)
+ if (!response.ok && response.status >= 400 && response.status < 500) {
+ const errorBody = await response.text()
+ if (!/response_format/i.test(errorBody)) throw new ImageGenHttpError(response.status, errorBody)
+ response = await post(false)
+ }
+ if (!response.ok) {
+ throw new ImageGenHttpError(response.status, await response.text())
+ }
+ const payload = (await response.json()) as ImagesApiPayload
+ const entry = payload.data?.[0]
+ if (entry?.b64_json) {
+ return { data: Buffer.from(entry.b64_json, 'base64'), mimeType: 'image/png' }
+ }
+ if (entry?.url) {
+ let download: Response
+ try {
+ download = await fetch(entry.url, { signal })
+ } catch (error) {
+ throw imageFetchFailure(entry.url, error, request)
+ }
+ if (!download.ok) throw new ImageGenHttpError(download.status, await download.text())
+ const mimeType = download.headers.get('content-type')?.split(';')[0] || 'image/png'
+ return { data: Buffer.from(await download.arrayBuffer()), mimeType }
+ }
+ throw new Error('image provider returned no image data')
+ }
+}
+
+export class MiniMaxImageClient implements ImageGenClient {
+ readonly id = 'minimax-image'
+ private readonly endpointUrl: string
+
+ constructor(
+ baseUrl: string,
+ private readonly apiKey: string
+ ) {
+ this.endpointUrl = minimaxImageGenerationUrl(baseUrl)
+ }
+
+ async generate(request: ImageGenRequest): Promise {
+ return this.requestImage({
+ model: request.model,
+ prompt: request.prompt,
+ ...minimaxImageDimensionFields(request.model, request.size),
+ prompt_optimizer: true,
+ response_format: 'base64',
+ n: 1
+ }, request)
+ }
+
+ async edit(request: ImageGenEditRequest): Promise {
+ return this.requestImage({
+ model: request.model,
+ prompt: request.prompt,
+ ...minimaxImageDimensionFields(request.model, request.size),
+ subject_reference: request.images.map((image) => ({
+ type: 'character',
+ image_file: `data:${image.mimeType};base64,${image.data.toString('base64')}`
+ })),
+ prompt_optimizer: true,
+ response_format: 'base64',
+ n: 1
+ }, request)
+ }
+
+ private async requestImage(
+ body: Record,
+ request: { timeoutMs: number; signal: AbortSignal }
+ ): Promise {
+ const signal = withTimeout(request.signal, request.timeoutMs)
+ let response: Response
+ try {
+ response = await fetch(this.endpointUrl, {
+ method: 'POST',
+ headers: {
+ Authorization: `Bearer ${this.apiKey}`,
+ 'Content-Type': 'application/json'
+ },
+ body: JSON.stringify(body),
+ signal
+ })
+ } catch (error) {
+ throw imageFetchFailure(this.endpointUrl, error, request)
+ }
+ const text = await response.text()
+ if (!response.ok) throw new ImageGenHttpError(response.status, text)
+ let payload: MiniMaxImagePayload
+ try {
+ payload = JSON.parse(text) as MiniMaxImagePayload
+ } catch {
+ throw new Error('MiniMax image provider returned invalid JSON')
+ }
+ const statusCode = payload.base_resp?.status_code
+ if (typeof statusCode === 'number' && statusCode !== 0) {
+ throw new Error(`MiniMax image provider failed (${statusCode}): ${payload.base_resp?.status_msg ?? 'unknown error'}`)
+ }
+ const b64 = payload.data?.image_base64?.[0]
+ if (b64) {
+ return { data: Buffer.from(b64, 'base64'), mimeType: 'image/jpeg' }
+ }
+ const imageUrl = payload.data?.image_urls?.[0]
+ if (imageUrl) {
+ let download: Response
+ try {
+ download = await fetch(imageUrl, { signal })
+ } catch (error) {
+ throw imageFetchFailure(imageUrl, error, request)
+ }
+ if (!download.ok) throw new ImageGenHttpError(download.status, await download.text())
+ const mimeType = download.headers.get('content-type')?.split(';')[0] || 'image/jpeg'
+ return { data: Buffer.from(await download.arrayBuffer()), mimeType }
+ }
+ throw new Error('MiniMax image provider returned no image data')
+ }
+}
+
+function minimaxImageGenerationUrl(baseUrl: string): string {
+ const normalized = baseUrl.trim().replace(/\/+$/, '')
+ const lower = normalized.toLowerCase()
+ if (!normalized) return '/v1/image_generation'
+ if (lower.endsWith('/v1/image_generation') || lower.endsWith('/image_generation')) return normalized
+ if (lower.endsWith('/v1')) return `${normalized}/image_generation`
+ return `${normalized}/v1/image_generation`
+}
+
+// aspect_ratio values both MiniMax image models accept (21:9 is image-01
+// only, and image-01 receives explicit width/height instead).
+const MINIMAX_ASPECT_RATIOS: Array<{ label: string; value: number }> = [
+ { label: '1:1', value: 1 },
+ { label: '16:9', value: 16 / 9 },
+ { label: '4:3', value: 4 / 3 },
+ { label: '3:2', value: 3 / 2 },
+ { label: '2:3', value: 2 / 3 },
+ { label: '3:4', value: 3 / 4 },
+ { label: '9:16', value: 9 / 16 }
+]
+
+/**
+ * MiniMax dimension fields for a `WxH` size. Per the t2i API docs only
+ * image-01 accepts explicit width/height (range [512, 2048], multiples
+ * of 8); image-01-live rejects them with status 2013, so every other model
+ * gets the nearest supported aspect_ratio instead. Nearest (not exact)
+ * because mapImageSize rounds edges to multiples of 8 — e.g. 3:2 at the 1K
+ * tier becomes 1024x680.
+ */
+export function minimaxImageDimensionFields(
+ model: string,
+ size: string | undefined
+): Record {
+ const match = size?.trim().match(/^(\d+)x(\d+)$/)
+ if (!match) return {}
+ const width = Number(match[1])
+ const height = Number(match[2])
+ if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) return {}
+ if (model.trim() === 'image-01') return { width, height }
+ const target = width / height
+ let best = MINIMAX_ASPECT_RATIOS[0]
+ let bestDiff = Number.POSITIVE_INFINITY
+ for (const candidate of MINIMAX_ASPECT_RATIOS) {
+ const diff = Math.abs(Math.log(candidate.value / target))
+ if (diff < bestDiff) {
+ bestDiff = diff
+ best = candidate
+ }
+ }
+ return { aspect_ratio: best.label }
+}
+
+function withTimeout(signal: AbortSignal, timeoutMs: number): AbortSignal {
+ return AbortSignal.any([signal, AbortSignal.timeout(timeoutMs)])
+}
+
+function telemetry(startedAt: number, provider: string): Record {
+ return { provider, durationMs: Date.now() - startedAt }
+}
+
+function toolError(code: string, message: string, toolTelemetry?: Record): { output: unknown; isError: true } {
+ return {
+ output: {
+ error: { code, message },
+ ...(toolTelemetry ? { telemetry: toolTelemetry } : {})
+ },
+ isError: true
+ }
+}
+
+function pickString(value: unknown): string | undefined {
+ return typeof value === 'string' && value.trim() ? value.trim() : undefined
+}
+
+function errorMessage(error: unknown): string {
+ return error instanceof Error ? error.message : String(error)
+}
diff --git a/kun/src/adapters/tool/local-tool-host.ts b/kun/src/adapters/tool/local-tool-host.ts
index bd69409c..3cf1aa72 100644
--- a/kun/src/adapters/tool/local-tool-host.ts
+++ b/kun/src/adapters/tool/local-tool-host.ts
@@ -12,11 +12,12 @@ import { makeToolResultItem, makeApprovalItem } from '../../domain/item.js'
import { buildBuiltinLocalTools } from './builtin-tools.js'
import { CapabilityRegistry } from './capability-registry.js'
import {
- applyPostToolHookResults,
- applyPreToolHookResults,
- runToolHooks,
- type ResolvedToolHook
-} from './tool-hooks.js'
+ runPostToolUseHooks,
+ runPreToolUseHooks,
+ type PostToolUseOutcome,
+ type PreToolUseOutcome,
+ type ResolvedHook
+} from '../../hooks/hook-engine.js'
import {
normalizeRateLimitedToolOutput
} from './tool-rate-limit.js'
@@ -25,6 +26,7 @@ import {
ReadTracker,
type ReadTrackerOptions
} from './read-tracker.js'
+import { sandboxBlockForTool, type SandboxBlock } from './sandbox-policy.js'
/**
* A single registered tool. Tools are pure functions that observe the
@@ -60,8 +62,8 @@ export type LocalToolHostOptions = {
registry?: CapabilityRegistry
/** Allow-list for `untrusted` policy. Tools outside the list always prompt. */
allowList?: string[]
- /** Optional PreToolUse/PostToolUse hooks. */
- hooks?: readonly ResolvedToolHook[]
+ /** Optional PreToolUse/PostToolUse hooks (lifecycle phases are ignored here). */
+ hooks?: readonly ResolvedHook[]
/** Runtime read-before-edit guard. Disabled by default for direct unit use. */
readTracker?: boolean | ReadTrackerOptions
}
@@ -84,7 +86,7 @@ export class LocalToolHost implements ToolHost {
readonly id = 'local'
private readonly registry: CapabilityRegistry
private readonly allowList: Set
- private readonly hooks: readonly ResolvedToolHook[]
+ private readonly hooks: readonly ResolvedHook[]
private readonly readTracker: ReadTracker
constructor(options: LocalToolHostOptions) {
@@ -114,15 +116,18 @@ export class LocalToolHost implements ToolHost {
if (tool.policy === 'never') {
throw new Error(`tool ${call.toolName} is disabled by policy`)
}
- let preHookResults
+ const sandboxBlock = sandboxBlockForTool(tool, context)
+ if (sandboxBlock) {
+ return {
+ item: this.errorToolResult(context, call, tool, sandboxBlock.message, sandboxBlock.code),
+ approved: false
+ }
+ }
+ let preHooks: PreToolUseOutcome
try {
- preHookResults = await runToolHooks({
- hooks: this.hooks,
- invocation: {
- phase: 'PreToolUse',
- call,
- context: hookContext(context)
- }
+ preHooks = await runPreToolUseHooks(this.hooks, {
+ call,
+ context: hookContext(context)
})
} catch (error) {
return {
@@ -130,14 +135,13 @@ export class LocalToolHost implements ToolHost {
approved: false
}
}
- const preHookDecision = applyPreToolHookResults(call, preHookResults)
- if (preHookDecision.denied) {
+ if (preHooks.denied) {
return {
- item: this.errorToolResult(context, preHookDecision.call, tool, preHookDecision.denied, 'hook_denied'),
+ item: this.errorToolResult(context, preHooks.call, tool, preHooks.denied, 'hook_denied'),
approved: false
}
}
- const activeCall = preHookDecision.call
+ const activeCall = preHooks.call
const readValidation = this.readTracker.validateBeforeTool({ context, call: activeCall })
if (!readValidation.ok) {
return {
@@ -145,19 +149,20 @@ export class LocalToolHost implements ToolHost {
approved: false
}
}
- if (this.isBlockedByRuntimePolicy(tool, activeCall, context)) {
+ const runtimeBlock = this.runtimePolicyBlock(tool, activeCall, context)
+ if (runtimeBlock) {
return {
item: this.errorToolResult(
context,
activeCall,
tool,
- `tool ${activeCall.toolName} is disabled by runtime approval policy`,
- 'approval_policy_blocked'
+ runtimeBlock.message,
+ runtimeBlock.code
),
approved: false
}
}
- const needsApproval = this.requiresApproval(tool, activeCall, context)
+ const needsApproval = !preHooks.autoApproved && this.requiresApproval(tool, activeCall, context)
if (needsApproval) {
const approvalId = `appr_${activeCall.callId}`
const approval: ApprovalRequest = createApprovalRequest({
@@ -183,31 +188,40 @@ export class LocalToolHost implements ToolHost {
if (context.abortSignal.aborted) {
throw new Error('tool call aborted while waiting for approval')
}
- const result = await tool.execute(activeCall.arguments, context, async (update) => {
- if (!onUpdate) return
- const partialItem = makeToolResultItem({
- id: `item_${activeCall.callId}`,
- turnId: context.turnId,
- threadId: context.threadId,
- callId: activeCall.callId,
- toolName: activeCall.toolName,
- toolKind: activeCall.toolKind ?? tool.toolKind,
- output: update.output,
- isError: update.isError,
- status: 'running'
+ let result: Awaited>
+ try {
+ result = await tool.execute(activeCall.arguments, context, async (update) => {
+ if (!onUpdate) return
+ const partialItem = makeToolResultItem({
+ id: `item_${activeCall.callId}`,
+ turnId: context.turnId,
+ threadId: context.threadId,
+ callId: activeCall.callId,
+ toolName: activeCall.toolName,
+ toolKind: activeCall.toolKind ?? tool.toolKind,
+ output: update.output,
+ isError: update.isError,
+ status: 'running'
+ })
+ await onUpdate(partialItem)
})
- await onUpdate(partialItem)
- })
- let postHookResults
+ } catch (error) {
+ // A tool blowing up (an MCP server returning a protocol error, a
+ // provider bug) is feedback for the model, not a reason to kill the
+ // whole turn. Only abort keeps propagating.
+ if (context.abortSignal.aborted) throw error
+ const message = error instanceof Error ? error.message : String(error)
+ return {
+ item: this.errorToolResult(context, activeCall, tool, message, 'tool_execution_failed'),
+ approved: true
+ }
+ }
+ let hookedResult: PostToolUseOutcome
try {
- postHookResults = await runToolHooks({
- hooks: this.hooks,
- invocation: {
- phase: 'PostToolUse',
- call: activeCall,
- context: hookContext(context),
- result
- }
+ hookedResult = await runPostToolUseHooks(this.hooks, {
+ call: activeCall,
+ context: hookContext(context),
+ result
})
} catch (error) {
return {
@@ -215,7 +229,6 @@ export class LocalToolHost implements ToolHost {
approved: true
}
}
- const hookedResult = applyPostToolHookResults(result, postHookResults)
const rateLimited = normalizeRateLimitedToolOutput(hookedResult.output)
const output = rateLimited.rateLimited ? rateLimited.output : hookedResult.output
const isError = hookedResult.isError || rateLimited.isError
@@ -242,14 +255,23 @@ export class LocalToolHost implements ToolHost {
this.readTracker.clear(threadId)
}
- private isBlockedByRuntimePolicy(
+ private runtimePolicyBlock(
tool: LocalTool,
call: ToolCallLike,
context: ToolHostContext
- ): boolean {
- if (this.isInteractiveGuiGateTool(call.toolName)) return false
- if (context.approvalPolicy !== 'never') return false
- return tool.policy !== 'never'
+ ): SandboxBlock | { code: 'approval_policy_blocked'; message: string } | null {
+ const sandboxBlock = sandboxBlockForTool(
+ { name: call.toolName, toolKind: call.toolKind ?? tool.toolKind },
+ context
+ )
+ if (sandboxBlock) return sandboxBlock
+ if (this.isInteractiveGuiGateTool(call.toolName)) return null
+ if (context.approvalPolicy !== 'never') return null
+ if (tool.policy === 'never') return null
+ return {
+ code: 'approval_policy_blocked',
+ message: `tool ${call.toolName} is disabled by runtime approval policy`
+ }
}
private requiresApproval(tool: LocalTool, call: ToolCallLike, context: ToolHostContext): boolean {
@@ -318,12 +340,13 @@ export class LocalToolHost implements ToolHost {
function hookContext(
context: ToolHostContext
-): Pick {
+): Pick {
return {
threadId: context.threadId,
turnId: context.turnId,
workspace: context.workspace,
approvalPolicy: context.approvalPolicy,
+ ...(context.sandboxMode ? { sandboxMode: context.sandboxMode } : {}),
...(context.threadMode ? { threadMode: context.threadMode } : {})
}
}
@@ -400,6 +423,9 @@ function createUserInputTool(name: string): LocalTool {
required: []
},
policy: 'auto',
+ // Only advertised when the turn can actually resolve structured
+ // input (IM bridges and headless runs omit `awaitUserInput`).
+ shouldAdvertise: (context) => typeof context.awaitUserInput === 'function',
execute: async (args, context) => {
if (!context.awaitUserInput) {
return {
diff --git a/kun/src/adapters/tool/mcp-tool-provider.ts b/kun/src/adapters/tool/mcp-tool-provider.ts
index 9e880ac7..9496637d 100644
--- a/kun/src/adapters/tool/mcp-tool-provider.ts
+++ b/kun/src/adapters/tool/mcp-tool-provider.ts
@@ -1,5 +1,6 @@
import { Client } from '@modelcontextprotocol/sdk/client/index.js'
import { createHash } from 'node:crypto'
+import { posix, win32 } from 'node:path'
import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js'
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'
@@ -77,6 +78,19 @@ export type McpToolProviderBuildResult = {
export type McpToolProviderOptions = {
clientFactory?: (serverId: string, server: McpServerConfig) => Promise
nowIso?: () => string
+ /**
+ * Upper bound for connect + initial tool listing per server during startup.
+ * A slow or hung server (e.g. an npx-based stdio server resolving packages)
+ * must not keep the whole runtime from reporting ready.
+ */
+ startupConnectTimeoutMs?: number
+}
+
+const DEFAULT_MCP_STARTUP_CONNECT_TIMEOUT_MS = 10_000
+
+export type McpStdioEnvironmentOptions = {
+ platform?: NodeJS.Platform
+ baseEnv?: NodeJS.ProcessEnv
}
type McpConnectionState = {
@@ -128,36 +142,75 @@ export async function buildMcpToolProviders(
}
}
- for (const [serverId, server] of Object.entries(mcp.servers)) {
- if (!server.enabled) {
- diagnostics.push(serverDiagnostic({ serverId, server }, 'disabled', 0))
+ // Connect all servers in parallel — startup previously paid the sum of
+ // every server's connect + list latency, and a single hung server (e.g.
+ // npx resolving a package) blocked the runtime ready signal forever.
+ const startupTimeoutMs = options.startupConnectTimeoutMs ?? DEFAULT_MCP_STARTUP_CONNECT_TIMEOUT_MS
+ type ConnectOutcome =
+ | { serverId: string; server: McpServerConfig; status: 'disabled' }
+ | { serverId: string; server: McpServerConfig; status: 'error'; error: unknown }
+ | {
+ serverId: string
+ server: McpServerConfig
+ status: 'connected'
+ state: McpConnectionState
+ listed: McpToolDescriptor[]
+ }
+ const outcomes = await Promise.all(
+ Object.entries(mcp.servers).map(async ([serverId, server]): Promise => {
+ if (!server.enabled) {
+ return { serverId, server, status: 'disabled' }
+ }
+ const attempt = (async () => {
+ const client = await clientFactory(serverId, server)
+ const state: McpConnectionState = {
+ serverId,
+ server,
+ client,
+ clientFactory,
+ nowIso,
+ lastConnectedAt: nowIso()
+ }
+ const listed = await refreshMcpConnectionCatalog(state)
+ return { state, listed }
+ })()
+ try {
+ const result = await raceStartupTimeout(attempt, startupTimeoutMs, serverId)
+ return { serverId, server, status: 'connected', ...result }
+ } catch (error) {
+ return { serverId, server, status: 'error', error }
+ }
+ })
+ )
+
+ for (const outcome of outcomes) {
+ if (outcome.status === 'disabled') {
+ diagnostics.push(serverDiagnostic({ serverId: outcome.serverId, server: outcome.server }, 'disabled', 0))
continue
}
- try {
- const client = await clientFactory(serverId, server)
- const state: McpConnectionState = {
- serverId,
- server,
- client,
- clientFactory,
- nowIso,
- lastConnectedAt: nowIso()
- }
- connected.push(state)
- const listed = await refreshMcpConnectionCatalog(state)
- catalogState.records.push(...listed.map((tool) => createMcpSearchCatalogRecord(state, tool)))
- const tools = listed.map((tool) => createMcpLocalTool(state, tool))
- directProviders.push({
- id: `mcp:${serverId}`,
- kind: 'mcp',
- enabled: true,
- available: true,
- tools
- })
- diagnostics.push(serverDiagnostic(state, 'connected', tools.length))
- } catch (error) {
- diagnostics.push(serverDiagnostic({ serverId, server }, 'error', 0, errorMessage(error)))
+ if (outcome.status === 'error') {
+ diagnostics.push(
+ serverDiagnostic(
+ { serverId: outcome.serverId, server: outcome.server },
+ 'error',
+ 0,
+ formatMcpConnectionError(outcome.error, outcome.server)
+ )
+ )
+ continue
}
+ const { state, listed } = outcome
+ connected.push(state)
+ catalogState.records.push(...listed.map((tool) => createMcpSearchCatalogRecord(state, tool)))
+ const tools = listed.map((tool) => createMcpLocalTool(state, tool))
+ directProviders.push({
+ id: `mcp:${outcome.serverId}`,
+ kind: 'mcp',
+ enabled: true,
+ available: true,
+ tools
+ })
+ diagnostics.push(serverDiagnostic(state, 'connected', tools.length))
}
const connectedServers = diagnostics.filter((diagnostic) => diagnostic.status === 'connected').length
@@ -248,7 +301,7 @@ function createTransport(server: McpServerConfig): Transport {
return new StdioClientTransport({
command: server.command ?? '',
args: server.args,
- env: server.env,
+ env: buildMcpStdioEnvironment(server.env),
stderr: 'pipe'
})
case 'streamable-http':
@@ -356,11 +409,58 @@ async function callMcpToolWithReconnect(
} catch (error) {
state.lastError = redactSecretText(errorMessage(error))
if (signal?.aborted) throw error
+ // Deterministic server-side failures (validation errors, bad
+ // arguments) come back identically on a fresh connection; tearing
+ // down a healthy session for them just loses server state. Only
+ // transport-looking failures earn a reconnect + retry.
+ if (!looksLikeMcpTransportError(error)) throw error
const client = await reconnectMcpConnection(state)
return client.callTool(input, { signal, timeout })
}
}
+function looksLikeMcpTransportError(error: unknown): boolean {
+ const message = errorMessage(error).toLowerCase()
+ return (
+ message.includes('connect') ||
+ message.includes('connection') ||
+ message.includes('transport') ||
+ message.includes('timed out') ||
+ message.includes('timeout') ||
+ message.includes('epipe') ||
+ message.includes('broken pipe') ||
+ message.includes('socket') ||
+ message.includes('stream closed') ||
+ message.includes('fetch failed') ||
+ message.includes('network')
+ )
+}
+
+async function raceStartupTimeout(
+ attempt: Promise,
+ timeoutMs: number,
+ serverId: string
+): Promise {
+ let timer: ReturnType | undefined
+ try {
+ return await Promise.race([
+ attempt,
+ new Promise((_, reject) => {
+ timer = setTimeout(
+ () => reject(new Error(`MCP server "${serverId}" did not connect within ${timeoutMs}ms during startup`)),
+ timeoutMs
+ )
+ })
+ ])
+ } catch (error) {
+ // A late successful connection would otherwise leak the child process.
+ void attempt.then((result) => result.state.client.close()).catch(() => undefined)
+ throw error
+ } finally {
+ if (timer) clearTimeout(timer)
+ }
+}
+
async function reconnectMcpConnection(state: McpConnectionState): Promise {
await state.client.close().catch(() => undefined)
const client = await state.clientFactory(state.serverId, state.server)
@@ -423,3 +523,127 @@ function normalizePathForTrust(value: string): string {
function errorMessage(error: unknown): string {
return error instanceof Error ? error.message : String(error)
}
+
+export function buildMcpStdioEnvironment(
+ serverEnv: Record = {},
+ options: McpStdioEnvironmentOptions = {}
+): Record {
+ const platform = options.platform ?? process.platform
+ const baseEnv = options.baseEnv ?? process.env
+ const pathKey = findPathKey(serverEnv) ?? findPathKey(baseEnv) ?? 'PATH'
+ const configuredPath = readEnvPath(serverEnv)
+ const inheritedPath = readEnvPath(baseEnv)
+ const pathValue = mergePathEntries(
+ [configuredPath ?? inheritedPath ?? '', ...commonMcpCommandPathEntries(platform, baseEnv)],
+ pathDelimiter(platform)
+ )
+ return {
+ ...serverEnv,
+ ...(pathValue ? { [pathKey]: pathValue } : {})
+ }
+}
+
+export function formatMcpConnectionError(error: unknown, server: McpServerConfig): string {
+ const message = errorMessage(error)
+ if (server.transport !== 'stdio' || !isMissingExecutableError(error, message)) return message
+ const command = missingExecutableCommand(error) ?? server.command ?? 'configured command'
+ const hint = isBareCommand(command)
+ ? missingBareCommandHint(command)
+ : `Could not find MCP command "${command}". Check that the configured executable path exists.`
+ return `${message}. ${hint}`
+}
+
+function commonMcpCommandPathEntries(
+ platform: NodeJS.Platform,
+ env: NodeJS.ProcessEnv
+): string[] {
+ if (platform === 'darwin') {
+ return [
+ '/opt/homebrew/bin',
+ '/usr/local/bin',
+ '/opt/local/bin',
+ homePath(env, '.volta/bin'),
+ homePath(env, '.local/bin'),
+ homePath(env, '.bun/bin')
+ ].filter((entry): entry is string => Boolean(entry))
+ }
+ if (platform === 'linux') {
+ return [
+ '/home/linuxbrew/.linuxbrew/bin',
+ '/usr/local/bin',
+ '/usr/bin',
+ homePath(env, '.volta/bin'),
+ homePath(env, '.local/bin'),
+ homePath(env, '.bun/bin')
+ ].filter((entry): entry is string => Boolean(entry))
+ }
+ if (platform === 'win32') {
+ return [
+ env.APPDATA ? win32.join(env.APPDATA, 'npm') : '',
+ env.ProgramFiles ? win32.join(env.ProgramFiles, 'nodejs') : '',
+ env['ProgramFiles(x86)'] ? win32.join(env['ProgramFiles(x86)'], 'nodejs') : ''
+ ].filter((entry): entry is string => Boolean(entry))
+ }
+ return []
+}
+
+function findPathKey(env: Record): string | undefined {
+ return Object.keys(env).find((key) => key.toLowerCase() === 'path')
+}
+
+function readEnvPath(env: Record): string | undefined {
+ const key = findPathKey(env)
+ const value = key ? env[key] : undefined
+ return value && value.trim() ? value : undefined
+}
+
+function mergePathEntries(values: string[], delimiter: string): string {
+ const seen = new Set()
+ const entries: string[] = []
+ for (const value of values) {
+ for (const entry of value.split(delimiter)) {
+ const trimmed = entry.trim()
+ if (!trimmed) continue
+ const key = trimmed.toLowerCase()
+ if (seen.has(key)) continue
+ seen.add(key)
+ entries.push(trimmed)
+ }
+ }
+ return entries.join(delimiter)
+}
+
+function pathDelimiter(platform: NodeJS.Platform): string {
+ return platform === 'win32' ? ';' : ':'
+}
+
+function homePath(env: NodeJS.ProcessEnv, relativePath: string): string {
+ return env.HOME ? posix.join(env.HOME, relativePath) : ''
+}
+
+function isMissingExecutableError(error: unknown, message: string): boolean {
+ const code = typeof error === 'object' && error !== null && 'code' in error
+ ? String((error as { code?: unknown }).code ?? '')
+ : ''
+ return code === 'ENOENT' || /\bspawn\s+\S+\s+ENOENT\b/i.test(message)
+}
+
+function missingExecutableCommand(error: unknown): string | undefined {
+ if (!error || typeof error !== 'object') return undefined
+ const path = (error as { path?: unknown }).path
+ return typeof path === 'string' && path.trim() ? path.trim() : undefined
+}
+
+function isBareCommand(command: string): boolean {
+ return Boolean(command.trim()) && !command.includes('/') && !command.includes('\\')
+}
+
+function missingBareCommandHint(command: string): string {
+ if (process.platform === 'win32') {
+ return `Could not find "${command}" on PATH while starting the MCP server. Make sure Node/npm is installed and available to Kun, or set the MCP command to an absolute path.`
+ }
+ if (process.platform === 'darwin') {
+ return `Could not find "${command}" on PATH while starting the MCP server. If Kun was launched from Finder or the desktop, make sure Node/npm is installed and available to GUI apps, or set the MCP command to an absolute path such as /opt/homebrew/bin/${command}.`
+ }
+ return `Could not find "${command}" on PATH while starting the MCP server. Make sure Node/npm is installed and available to Kun, or set the MCP command to an absolute path such as /usr/local/bin/${command}.`
+}
diff --git a/kun/src/adapters/tool/media-gen-tool-provider.ts b/kun/src/adapters/tool/media-gen-tool-provider.ts
new file mode 100644
index 00000000..99a244d6
--- /dev/null
+++ b/kun/src/adapters/tool/media-gen-tool-provider.ts
@@ -0,0 +1,951 @@
+import { randomBytes } from 'node:crypto'
+import { mkdir, readFile, writeFile } from 'node:fs/promises'
+import { isAbsolute, join, relative, resolve } from 'node:path'
+import type { KunCapabilitiesConfig } from '../../contracts/capabilities.js'
+import { detectImage } from '../../attachments/attachment-store.js'
+import type { ToolExecutionUpdate, ToolHostContext } from '../../ports/tool-host.js'
+import type { CapabilityToolProvider } from './capability-registry.js'
+import { ImageGenHttpError, describeNetworkError } from './image-gen-tool-provider.js'
+import { LocalToolHost } from './local-tool-host.js'
+
+const GENERATED_SPEECH_DIR = '.deepseekgui-audio'
+const GENERATED_MUSIC_DIR = '.deepseekgui-music'
+const GENERATED_VIDEO_DIR = '.deepseekgui-videos'
+const MAX_REFERENCE_IMAGE_BYTES = 10 * 1024 * 1024
+const REFERENCE_MIME_TYPES = new Set(['image/png', 'image/jpeg', 'image/webp'])
+const AUDIO_FORMATS = new Set(['mp3', 'wav', 'flac', 'pcm', 'pcm16'])
+const VIDEO_RESOLUTIONS = ['768P', '1080P'] as const
+
+export type GeneratedMedia = { data: Buffer; mimeType: string; extension: string }
+
+export type SpeechGenRequest = {
+ text: string
+ model: string
+ voice?: string
+ style?: string
+ format: string
+ timeoutMs: number
+ signal: AbortSignal
+}
+
+export type MusicGenRequest = {
+ prompt?: string
+ lyrics?: string
+ instrumental?: boolean
+ lyricsOptimizer?: boolean
+ referenceAudioUrl?: string
+ model: string
+ format: string
+ timeoutMs: number
+ signal: AbortSignal
+}
+
+export type VideoGenRequest = {
+ prompt: string
+ model: string
+ duration: number
+ resolution: string
+ firstFrameImage?: { mimeType: string; data: Buffer }
+ lastFrameImage?: { mimeType: string; data: Buffer }
+ timeoutMs: number
+ pollIntervalMs: number
+ signal: AbortSignal
+ onUpdate?: (update: ToolExecutionUpdate) => Promise | void
+}
+
+export interface SpeechGenClient {
+ id: string
+ generate(request: SpeechGenRequest): Promise
+}
+
+export interface MusicGenClient {
+ id: string
+ generate(request: MusicGenRequest): Promise
+}
+
+export interface VideoGenClient {
+ id: string
+ generate(request: VideoGenRequest): Promise
+}
+
+export type SpeechGenDiagnostic = {
+ id: 'speechGen'
+ enabled: boolean
+ available: boolean
+ model?: string
+ reason?: string
+}
+
+export type MusicGenDiagnostic = {
+ id: 'musicGen'
+ enabled: boolean
+ available: boolean
+ model?: string
+ reason?: string
+}
+
+export type VideoGenDiagnostic = {
+ id: 'videoGen'
+ enabled: boolean
+ available: boolean
+ model?: string
+ reason?: string
+}
+
+export type MediaGenToolProviderOptions = {
+ speechClient?: SpeechGenClient
+ musicClient?: MusicGenClient
+ videoClient?: VideoGenClient
+ nowIso?: () => string
+}
+
+export type SpeechGenToolProviderBuildResult = {
+ providers: CapabilityToolProvider[]
+ diagnostics: SpeechGenDiagnostic[]
+ available: boolean
+}
+
+export type MusicGenToolProviderBuildResult = {
+ providers: CapabilityToolProvider[]
+ diagnostics: MusicGenDiagnostic[]
+ available: boolean
+}
+
+export type VideoGenToolProviderBuildResult = {
+ providers: CapabilityToolProvider[]
+ diagnostics: VideoGenDiagnostic[]
+ available: boolean
+}
+
+export function buildSpeechGenToolProviders(
+ config: KunCapabilitiesConfig['speechGen'] | undefined,
+ options: MediaGenToolProviderOptions = {}
+): SpeechGenToolProviderBuildResult {
+ if (!config?.enabled) return { providers: [], diagnostics: [], available: false }
+ const missing = missingProviderFields(config)
+ if (missing.length > 0) {
+ const reason = `speech generation provider is not configured (missing ${missing.join(', ')})`
+ return {
+ providers: [{ id: 'speechGen', kind: 'audio', enabled: true, available: false, reason, tools: [] }],
+ diagnostics: [{ id: 'speechGen', enabled: true, available: false, model: config.model, reason }],
+ available: false
+ }
+ }
+
+ const client = options.speechClient ?? createSpeechGenClient(config)
+ const model = config.model!
+
+ const tool = LocalToolHost.defineTool({
+ name: 'generate_speech',
+ description: [
+ 'Generate spoken audio from text using the configured text-to-speech provider.',
+ `The generated audio is saved under ${GENERATED_SPEECH_DIR}/ in the workspace and returned as a generated file.`,
+ 'Use voice for a provider voice id/name and style for Xiaomi MiMo voice style instructions when needed.'
+ ].join(' '),
+ inputSchema: {
+ type: 'object',
+ properties: {
+ text: { type: 'string', description: 'Text to synthesize into speech' },
+ voice: { type: 'string', description: 'Optional provider voice id/name' },
+ style: { type: 'string', description: 'Optional voice style instruction for providers that support it' },
+ format: { type: 'string', enum: [...AUDIO_FORMATS] }
+ },
+ required: ['text'],
+ additionalProperties: false
+ },
+ policy: 'untrusted',
+ execute: async (args, context) => {
+ const startedAt = Date.now()
+ const text = pickString(args.text)
+ if (!text) return toolError('invalid_text', 'text is required')
+ const format = normalizeAudioFormat(pickString(args.format) || config.format)
+ const voice = pickString(args.voice) || config.voice
+ const style = pickString(args.style)
+ try {
+ const media = await client.generate({
+ text,
+ model,
+ ...(voice ? { voice } : {}),
+ ...(style ? { style } : {}),
+ format,
+ timeoutMs: config.timeoutMs,
+ signal: context.abortSignal
+ })
+ const file = await writeGeneratedMediaFile({
+ context,
+ data: media.data,
+ mimeType: media.mimeType,
+ extension: media.extension,
+ dir: GENERATED_SPEECH_DIR,
+ prefix: 'speech',
+ nowIso: options.nowIso
+ })
+ return {
+ output: {
+ files: [file],
+ model,
+ voice,
+ format,
+ telemetry: telemetry(startedAt, client.id)
+ }
+ }
+ } catch (error) {
+ return toolError('generation_failed', providerErrorMessage(error), telemetry(startedAt, client.id))
+ }
+ }
+ })
+
+ return {
+ providers: [{ id: 'speechGen', kind: 'audio', enabled: true, available: true, tools: [tool] }],
+ diagnostics: [{ id: 'speechGen', enabled: true, available: true, model }],
+ available: true
+ }
+}
+
+export function buildMusicGenToolProviders(
+ config: KunCapabilitiesConfig['musicGen'] | undefined,
+ options: MediaGenToolProviderOptions = {}
+): MusicGenToolProviderBuildResult {
+ if (!config?.enabled) return { providers: [], diagnostics: [], available: false }
+ const missing = missingProviderFields(config)
+ if (missing.length > 0) {
+ const reason = `music generation provider is not configured (missing ${missing.join(', ')})`
+ return {
+ providers: [{ id: 'musicGen', kind: 'audio', enabled: true, available: false, reason, tools: [] }],
+ diagnostics: [{ id: 'musicGen', enabled: true, available: false, model: config.model, reason }],
+ available: false
+ }
+ }
+
+ const client = options.musicClient ?? createMusicGenClient(config)
+ const model = config.model!
+
+ const tool = LocalToolHost.defineTool({
+ name: 'generate_music',
+ description: [
+ 'Generate a song or instrumental audio using the configured music provider.',
+ `The generated audio is saved under ${GENERATED_MUSIC_DIR}/ in the workspace and returned as a generated file.`,
+ 'Provide prompt for style/intention, lyrics for sung music, or instrumental=true for instrumental tracks.'
+ ].join(' '),
+ inputSchema: {
+ type: 'object',
+ properties: {
+ prompt: { type: 'string', description: 'Musical style, mood, arrangement, or generation prompt' },
+ lyrics: { type: 'string', description: 'Optional lyrics for sung music' },
+ instrumental: { type: 'boolean', description: 'Generate instrumental music without vocals' },
+ lyrics_optimizer: { type: 'boolean', description: 'Ask provider to generate or improve lyrics' },
+ reference_audio_url: { type: 'string', description: 'Optional public URL for cover/reference audio' },
+ format: { type: 'string', enum: [...AUDIO_FORMATS] }
+ },
+ additionalProperties: false
+ },
+ policy: 'untrusted',
+ execute: async (args, context) => {
+ const startedAt = Date.now()
+ const prompt = pickString(args.prompt)
+ const lyrics = pickString(args.lyrics)
+ const instrumental = pickBoolean(args.instrumental)
+ const lyricsOptimizer = pickBoolean(args.lyrics_optimizer)
+ if (!prompt && !lyrics && instrumental !== true) {
+ return toolError('invalid_music_request', 'provide prompt, lyrics, or instrumental=true')
+ }
+ const format = normalizeAudioFormat(pickString(args.format) || config.format)
+ try {
+ const media = await client.generate({
+ ...(prompt ? { prompt } : {}),
+ ...(lyrics ? { lyrics } : {}),
+ ...(instrumental !== undefined ? { instrumental } : {}),
+ ...(lyricsOptimizer !== undefined ? { lyricsOptimizer } : {}),
+ ...(pickString(args.reference_audio_url) ? { referenceAudioUrl: pickString(args.reference_audio_url) } : {}),
+ model,
+ format,
+ timeoutMs: config.timeoutMs,
+ signal: context.abortSignal
+ })
+ const file = await writeGeneratedMediaFile({
+ context,
+ data: media.data,
+ mimeType: media.mimeType,
+ extension: media.extension,
+ dir: GENERATED_MUSIC_DIR,
+ prefix: 'music',
+ nowIso: options.nowIso
+ })
+ return {
+ output: {
+ files: [file],
+ model,
+ format,
+ telemetry: telemetry(startedAt, client.id)
+ }
+ }
+ } catch (error) {
+ return toolError('generation_failed', providerErrorMessage(error), telemetry(startedAt, client.id))
+ }
+ }
+ })
+
+ return {
+ providers: [{ id: 'musicGen', kind: 'audio', enabled: true, available: true, tools: [tool] }],
+ diagnostics: [{ id: 'musicGen', enabled: true, available: true, model }],
+ available: true
+ }
+}
+
+export function buildVideoGenToolProviders(
+ config: KunCapabilitiesConfig['videoGen'] | undefined,
+ options: MediaGenToolProviderOptions = {}
+): VideoGenToolProviderBuildResult {
+ if (!config?.enabled) return { providers: [], diagnostics: [], available: false }
+ const missing = missingProviderFields(config)
+ if (missing.length > 0) {
+ const reason = `video generation provider is not configured (missing ${missing.join(', ')})`
+ return {
+ providers: [{ id: 'videoGen', kind: 'video', enabled: true, available: false, reason, tools: [] }],
+ diagnostics: [{ id: 'videoGen', enabled: true, available: false, model: config.model, reason }],
+ available: false
+ }
+ }
+
+ const client = options.videoClient ?? createVideoGenClient(config)
+ const model = config.model!
+
+ const tool = LocalToolHost.defineTool({
+ name: 'generate_video',
+ description: [
+ 'Generate a video from a text prompt using the configured video provider.',
+ 'Optionally pass workspace-relative first_frame_image_path and last_frame_image_path for image-to-video guidance.',
+ `The generated video is saved under ${GENERATED_VIDEO_DIR}/ in the workspace and returned as a generated file.`
+ ].join(' '),
+ inputSchema: {
+ type: 'object',
+ properties: {
+ prompt: { type: 'string', description: 'Detailed video generation prompt' },
+ duration: { type: 'integer', minimum: 1, maximum: 30 },
+ resolution: { type: 'string', enum: VIDEO_RESOLUTIONS },
+ first_frame_image_path: { type: 'string', description: 'Workspace-relative png/jpeg/webp first frame' },
+ last_frame_image_path: { type: 'string', description: 'Workspace-relative png/jpeg/webp last frame' }
+ },
+ required: ['prompt'],
+ additionalProperties: false
+ },
+ policy: 'untrusted',
+ execute: async (args, context, onUpdate) => {
+ const startedAt = Date.now()
+ const prompt = pickString(args.prompt)
+ if (!prompt) return toolError('invalid_prompt', 'prompt is required')
+ const firstFrame = await collectFrameImage(args.first_frame_image_path, context, 'first_frame_image_path')
+ if ('error' in firstFrame) return firstFrame.error
+ const lastFrame = await collectFrameImage(args.last_frame_image_path, context, 'last_frame_image_path')
+ if ('error' in lastFrame) return lastFrame.error
+ const duration = normalizeDuration(args.duration, config.defaultDuration)
+ const resolution = pickString(args.resolution) || config.defaultResolution
+ try {
+ const media = await client.generate({
+ prompt,
+ model,
+ duration,
+ resolution,
+ ...(firstFrame.image ? { firstFrameImage: firstFrame.image } : {}),
+ ...(lastFrame.image ? { lastFrameImage: lastFrame.image } : {}),
+ timeoutMs: config.timeoutMs,
+ pollIntervalMs: config.pollIntervalMs,
+ signal: context.abortSignal,
+ onUpdate
+ })
+ const file = await writeGeneratedMediaFile({
+ context,
+ data: media.data,
+ mimeType: media.mimeType,
+ extension: media.extension,
+ dir: GENERATED_VIDEO_DIR,
+ prefix: 'video',
+ nowIso: options.nowIso
+ })
+ return {
+ output: {
+ files: [file],
+ model,
+ duration,
+ resolution,
+ telemetry: telemetry(startedAt, client.id)
+ }
+ }
+ } catch (error) {
+ return toolError('generation_failed', providerErrorMessage(error), telemetry(startedAt, client.id))
+ }
+ }
+ })
+
+ return {
+ providers: [{ id: 'videoGen', kind: 'video', enabled: true, available: true, tools: [tool] }],
+ diagnostics: [{ id: 'videoGen', enabled: true, available: true, model }],
+ available: true
+ }
+}
+
+export function createSpeechGenClient(config: {
+ protocol?: string
+ baseUrl?: string
+ apiKey?: string
+}): SpeechGenClient {
+ if (config.protocol === 'minimax-t2a') return new MiniMaxSpeechClient(config.baseUrl!, config.apiKey!)
+ if (config.protocol === 'mimo-tts') return new MimoSpeechClient(config.baseUrl!, config.apiKey!)
+ return new OpenAiCompatSpeechClient(config.baseUrl!, config.apiKey!)
+}
+
+export function createMusicGenClient(config: {
+ protocol?: string
+ baseUrl?: string
+ apiKey?: string
+}): MusicGenClient {
+ return new MiniMaxMusicClient(config.baseUrl!, config.apiKey!)
+}
+
+export function createVideoGenClient(config: {
+ protocol?: string
+ baseUrl?: string
+ apiKey?: string
+}): VideoGenClient {
+ return new MiniMaxVideoClient(config.baseUrl!, config.apiKey!)
+}
+
+export class OpenAiCompatSpeechClient implements SpeechGenClient {
+ readonly id = 'openai-speech'
+ private readonly endpointUrl: string
+
+ constructor(
+ baseUrl: string,
+ private readonly apiKey: string
+ ) {
+ this.endpointUrl = apiUrl(baseUrl, '/v1/audio/speech')
+ }
+
+ async generate(request: SpeechGenRequest): Promise {
+ const response = await requestResponse(this.endpointUrl, {
+ method: 'POST',
+ headers: {
+ Authorization: `Bearer ${this.apiKey}`,
+ 'Content-Type': 'application/json'
+ },
+ body: JSON.stringify({
+ model: request.model,
+ input: request.text,
+ voice: request.voice || 'alloy',
+ response_format: request.format
+ }),
+ signal: withTimeout(request.signal, request.timeoutMs)
+ }, request)
+ if (!response.ok) throw new ImageGenHttpError(response.status, await response.text())
+ const mimeType = response.headers.get('content-type')?.split(';')[0] || audioMimeType(request.format)
+ return {
+ data: Buffer.from(await response.arrayBuffer()),
+ mimeType,
+ extension: audioExtension(request.format)
+ }
+ }
+}
+
+export class MiniMaxSpeechClient implements SpeechGenClient {
+ readonly id = 'minimax-t2a'
+ private readonly endpointUrl: string
+
+ constructor(
+ baseUrl: string,
+ private readonly apiKey: string
+ ) {
+ this.endpointUrl = apiUrl(baseUrl, '/v1/t2a_v2')
+ }
+
+ async generate(request: SpeechGenRequest): Promise {
+ const voiceId = request.voice || 'male-qn-qingse'
+ const payload = await requestJson(this.endpointUrl, {
+ method: 'POST',
+ headers: {
+ Authorization: `Bearer ${this.apiKey}`,
+ 'Content-Type': 'application/json'
+ },
+ body: JSON.stringify({
+ model: request.model,
+ text: request.text,
+ output_format: 'hex',
+ voice_setting: {
+ voice_id: voiceId,
+ speed: 1,
+ vol: 1,
+ pitch: 0
+ },
+ audio_setting: {
+ format: request.format,
+ sample_rate: request.format === 'mp3' ? 32_000 : 44_100,
+ bitrate: 128_000,
+ channel: 1
+ }
+ }),
+ signal: withTimeout(request.signal, request.timeoutMs)
+ }, request)
+ assertMiniMaxOk(payload.base_resp, 'MiniMax speech provider')
+ const audio = payload.data?.audio
+ if (!audio) throw new Error('MiniMax speech provider returned no audio data')
+ return {
+ data: bufferFromHex(audio),
+ mimeType: audioMimeType(request.format),
+ extension: audioExtension(request.format)
+ }
+ }
+}
+
+export class MimoSpeechClient implements SpeechGenClient {
+ readonly id = 'mimo-tts'
+ private readonly endpointUrl: string
+
+ constructor(
+ baseUrl: string,
+ private readonly apiKey: string
+ ) {
+ this.endpointUrl = apiUrl(baseUrl, '/v1/chat/completions')
+ }
+
+ async generate(request: SpeechGenRequest): Promise {
+ const messages = [
+ ...(request.style ? [{ role: 'user', content: request.style }] : []),
+ { role: 'assistant', content: request.text }
+ ]
+ const payload = await requestJson(this.endpointUrl, {
+ method: 'POST',
+ headers: {
+ Authorization: `Bearer ${this.apiKey}`,
+ 'api-key': this.apiKey,
+ 'Content-Type': 'application/json'
+ },
+ body: JSON.stringify({
+ model: request.model,
+ messages,
+ audio: {
+ format: request.format,
+ ...(request.voice ? { voice: request.voice } : {})
+ }
+ }),
+ signal: withTimeout(request.signal, request.timeoutMs)
+ }, request)
+ const audio = payload.choices?.[0]?.message?.audio?.data
+ if (!audio) throw new Error('MiMo speech provider returned no audio data')
+ return {
+ data: Buffer.from(audio, 'base64'),
+ mimeType: audioMimeType(request.format),
+ extension: audioExtension(request.format)
+ }
+ }
+}
+
+export class MiniMaxMusicClient implements MusicGenClient {
+ readonly id = 'minimax-music'
+ private readonly endpointUrl: string
+
+ constructor(
+ baseUrl: string,
+ private readonly apiKey: string
+ ) {
+ this.endpointUrl = apiUrl(baseUrl, '/v1/music_generation')
+ }
+
+ async generate(request: MusicGenRequest): Promise {
+ const payload = await requestJson(this.endpointUrl, {
+ method: 'POST',
+ headers: {
+ Authorization: `Bearer ${this.apiKey}`,
+ 'Content-Type': 'application/json'
+ },
+ body: JSON.stringify({
+ model: request.model,
+ ...(request.prompt ? { prompt: request.prompt } : {}),
+ ...(request.lyrics ? { lyrics: request.lyrics } : {}),
+ output_format: 'hex',
+ audio_setting: {
+ format: request.format,
+ sample_rate: 44_100,
+ bitrate: 256_000
+ },
+ lyrics_optimizer: request.lyricsOptimizer ?? (!request.lyrics && request.instrumental !== true),
+ ...(request.instrumental !== undefined ? { is_instrumental: request.instrumental } : {}),
+ ...(request.referenceAudioUrl ? { audio_url: request.referenceAudioUrl } : {})
+ }),
+ signal: withTimeout(request.signal, request.timeoutMs)
+ }, request)
+ assertMiniMaxOk(payload.base_resp, 'MiniMax music provider')
+ const audio = payload.data?.audio
+ if (!audio) throw new Error('MiniMax music provider returned no audio data')
+ return {
+ data: bufferFromHex(audio),
+ mimeType: audioMimeType(request.format),
+ extension: audioExtension(request.format)
+ }
+ }
+}
+
+export class MiniMaxVideoClient implements VideoGenClient {
+ readonly id = 'minimax-video'
+ private readonly rootUrl: string
+
+ constructor(
+ baseUrl: string,
+ private readonly apiKey: string
+ ) {
+ this.rootUrl = minimaxRootUrl(baseUrl)
+ }
+
+ async generate(request: VideoGenRequest): Promise {
+ const signal = withTimeout(request.signal, request.timeoutMs)
+ const createPayload = await requestJson(`${this.rootUrl}/v1/video_generation`, {
+ method: 'POST',
+ headers: this.headers(),
+ body: JSON.stringify({
+ model: request.model,
+ prompt: request.prompt,
+ duration: request.duration,
+ resolution: request.resolution,
+ ...(request.firstFrameImage
+ ? { first_frame_image: dataUri(request.firstFrameImage.mimeType, request.firstFrameImage.data) }
+ : {}),
+ ...(request.lastFrameImage
+ ? { last_frame_image: dataUri(request.lastFrameImage.mimeType, request.lastFrameImage.data) }
+ : {})
+ }),
+ signal
+ }, request)
+ assertMiniMaxOk(createPayload.base_resp, 'MiniMax video provider')
+ const taskId = createPayload.task_id
+ if (!taskId) throw new Error('MiniMax video provider returned no task_id')
+ await request.onUpdate?.({
+ output: { status: 'submitted', taskId, provider: this.id }
+ })
+
+ const deadline = Date.now() + request.timeoutMs
+ let lastStatus = 'submitted'
+ while (Date.now() < deadline) {
+ await delay(request.pollIntervalMs, signal)
+ const queryUrl = new URL(`${this.rootUrl}/v1/query/video_generation`)
+ queryUrl.searchParams.set('task_id', taskId)
+ const queryPayload = await requestJson(queryUrl.toString(), {
+ method: 'GET',
+ headers: this.headers(),
+ signal
+ }, request)
+ assertMiniMaxOk(queryPayload.base_resp, 'MiniMax video provider')
+ lastStatus = queryPayload.status || lastStatus
+ await request.onUpdate?.({
+ output: { status: lastStatus, taskId, provider: this.id }
+ })
+ if (isFailureStatus(lastStatus)) {
+ throw new Error(`MiniMax video generation failed with status ${lastStatus}`)
+ }
+ if (!isSuccessStatus(lastStatus)) continue
+ const fileId = queryPayload.file_id
+ if (!fileId) throw new Error('MiniMax video provider finished without file_id')
+ const downloadUrl = await this.retrieveDownloadUrl(fileId, request)
+ const response = await requestResponse(downloadUrl, { method: 'GET', signal }, request)
+ if (!response.ok) throw new ImageGenHttpError(response.status, await response.text())
+ const mimeType = response.headers.get('content-type')?.split(';')[0] || 'video/mp4'
+ return {
+ data: Buffer.from(await response.arrayBuffer()),
+ mimeType,
+ extension: videoExtension(mimeType)
+ }
+ }
+ throw new Error(`MiniMax video generation timed out after ${request.timeoutMs}ms (last status: ${lastStatus})`)
+ }
+
+ private async retrieveDownloadUrl(fileId: string, request: { timeoutMs: number; signal: AbortSignal }): Promise {
+ const retrieveUrl = new URL(`${this.rootUrl}/v1/files/retrieve`)
+ retrieveUrl.searchParams.set('file_id', fileId)
+ const payload = await requestJson(retrieveUrl.toString(), {
+ method: 'GET',
+ headers: this.headers(),
+ signal: withTimeout(request.signal, request.timeoutMs)
+ }, request)
+ assertMiniMaxOk(payload.base_resp, 'MiniMax video provider')
+ const downloadUrl = payload.file?.download_url
+ if (!downloadUrl) throw new Error('MiniMax video provider returned no download_url')
+ return downloadUrl
+ }
+
+ private headers(): Record {
+ return {
+ Authorization: `Bearer ${this.apiKey}`,
+ 'Content-Type': 'application/json'
+ }
+ }
+}
+
+type MiniMaxAudioPayload = {
+ data?: { audio?: string }
+ base_resp?: MiniMaxBaseResponse
+}
+
+type MiniMaxVideoCreatePayload = {
+ task_id?: string
+ base_resp?: MiniMaxBaseResponse
+}
+
+type MiniMaxVideoQueryPayload = {
+ status?: string
+ file_id?: string
+ base_resp?: MiniMaxBaseResponse
+}
+
+type MiniMaxFileRetrievePayload = {
+ file?: { download_url?: string }
+ base_resp?: MiniMaxBaseResponse
+}
+
+type MiniMaxBaseResponse = {
+ status_code?: number
+ status_msg?: string
+}
+
+type MimoSpeechPayload = {
+ choices?: Array<{
+ message?: {
+ audio?: {
+ data?: string
+ }
+ }
+ }>
+}
+
+function missingProviderFields(config: { baseUrl?: string; apiKey?: string; model?: string }): string[] {
+ return [
+ !config.baseUrl ? 'baseUrl' : undefined,
+ !config.apiKey ? 'apiKey' : undefined,
+ !config.model ? 'model' : undefined
+ ].filter((field): field is string => Boolean(field))
+}
+
+async function writeGeneratedMediaFile(input: {
+ context: ToolHostContext
+ data: Buffer
+ mimeType: string
+ extension: string
+ dir: string
+ prefix: string
+ nowIso?: () => string
+}): Promise<{
+ relativePath: string
+ absolutePath: string
+ mimeType: string
+ byteSize: number
+}> {
+ const stamp = (input.nowIso?.() ?? new Date().toISOString()).replace(/\D/g, '').slice(0, 14)
+ const fileName = `${input.prefix}-${stamp}-${randomBytes(2).toString('hex')}.${input.extension}`
+ const relativePath = `${input.dir}/${fileName}`
+ const absolutePath = join(input.context.workspace, input.dir, fileName)
+ await mkdir(join(input.context.workspace, input.dir), { recursive: true })
+ await writeFile(absolutePath, input.data)
+ return {
+ relativePath,
+ absolutePath,
+ mimeType: input.mimeType,
+ byteSize: input.data.byteLength
+ }
+}
+
+type FrameImageResult = { image?: { mimeType: string; data: Buffer } }
+type FrameImageError = { error: { output: unknown; isError: true } }
+
+async function collectFrameImage(
+ value: unknown,
+ context: ToolHostContext,
+ fieldName: string
+): Promise {
+ const rawPath = pickString(value)
+ if (!rawPath) return {}
+ const resolved = resolve(context.workspace, rawPath)
+ const rel = relative(context.workspace, resolved)
+ if (rel.startsWith('..') || isAbsolute(rel)) {
+ return { error: toolError('invalid_reference_path', `${fieldName} must be inside the workspace: ${rawPath}`) }
+ }
+ let data: Buffer
+ try {
+ data = await readFile(resolved)
+ } catch {
+ return { error: toolError('invalid_reference_path', `${fieldName} not found: ${rawPath}`) }
+ }
+ if (data.byteLength > MAX_REFERENCE_IMAGE_BYTES) {
+ return { error: toolError('invalid_reference_path', `${fieldName} exceeds ${MAX_REFERENCE_IMAGE_BYTES} byte limit: ${rawPath}`) }
+ }
+ const detected = detectImage(data)
+ if (!detected || !REFERENCE_MIME_TYPES.has(detected.mimeType)) {
+ return { error: toolError('invalid_reference_path', `${fieldName} must be png, jpeg, or webp: ${rawPath}`) }
+ }
+ return { image: { mimeType: detected.mimeType, data } }
+}
+
+async function requestJson(
+ url: string,
+ init: RequestInit,
+ request: { timeoutMs: number; signal: AbortSignal }
+): Promise {
+ const response = await requestResponse(url, init, request)
+ const text = await response.text()
+ if (!response.ok) throw new ImageGenHttpError(response.status, text)
+ try {
+ return JSON.parse(text) as T
+ } catch {
+ throw new Error(`provider returned invalid JSON from ${url.split('?')[0]}`)
+ }
+}
+
+async function requestResponse(
+ url: string,
+ init: RequestInit,
+ request: { timeoutMs: number; signal: AbortSignal }
+): Promise {
+ try {
+ return await fetch(url, init)
+ } catch (error) {
+ throw mediaFetchFailure(url, error, request)
+ }
+}
+
+function mediaFetchFailure(
+ url: string,
+ error: unknown,
+ request: { timeoutMs: number }
+): Error {
+ const target = url.split('?')[0]
+ if (error instanceof DOMException && error.name === 'TimeoutError') {
+ return new Error(`media request to ${target} timed out after ${request.timeoutMs}ms`, { cause: error })
+ }
+ if (error instanceof DOMException && error.name === 'AbortError') {
+ return new Error(`media request to ${target} was canceled`, { cause: error })
+ }
+ return new Error(`media request to ${target} failed: ${describeNetworkError(error)}`, { cause: error })
+}
+
+function apiUrl(baseUrl: string, v1Path: string): string {
+ const normalized = baseUrl.trim().replace(/\/+$/, '')
+ const lower = normalized.toLowerCase()
+ const path = v1Path.startsWith('/') ? v1Path : `/${v1Path}`
+ const pathWithoutV1 = path.replace(/^\/v1\//, '/')
+ if (!normalized) return path
+ if (lower.endsWith(path.toLowerCase()) || lower.endsWith(pathWithoutV1.toLowerCase())) return normalized
+ if (lower.endsWith('/v1')) return `${normalized}${pathWithoutV1}`
+ return `${normalized}${path}`
+}
+
+function minimaxRootUrl(baseUrl: string): string {
+ const normalized = baseUrl.trim().replace(/\/+$/, '')
+ if (!normalized) return ''
+ for (const suffix of ['/v1/video_generation', '/video_generation', '/v1/query/video_generation']) {
+ if (normalized.toLowerCase().endsWith(suffix)) {
+ return normalized.slice(0, -suffix.length).replace(/\/+$/, '')
+ }
+ }
+ if (normalized.toLowerCase().endsWith('/v1')) return normalized.slice(0, -3).replace(/\/+$/, '')
+ return normalized
+}
+
+function assertMiniMaxOk(baseResp: MiniMaxBaseResponse | undefined, label: string): void {
+ const statusCode = baseResp?.status_code
+ if (typeof statusCode === 'number' && statusCode !== 0) {
+ throw new Error(`${label} failed (${statusCode}): ${baseResp?.status_msg ?? 'unknown error'}`)
+ }
+}
+
+function bufferFromHex(value: string): Buffer {
+ const normalized = value.replace(/\s+/g, '')
+ if (!normalized || normalized.length % 2 !== 0 || /[^0-9a-f]/i.test(normalized)) {
+ throw new Error('provider returned invalid hex audio data')
+ }
+ return Buffer.from(normalized, 'hex')
+}
+
+function withTimeout(signal: AbortSignal, timeoutMs: number): AbortSignal {
+ return AbortSignal.any([signal, AbortSignal.timeout(timeoutMs)])
+}
+
+function dataUri(mimeType: string, data: Buffer): string {
+ return `data:${mimeType};base64,${data.toString('base64')}`
+}
+
+function normalizeAudioFormat(value: string | undefined): string {
+ const normalized = value?.trim().toLowerCase()
+ return normalized && AUDIO_FORMATS.has(normalized) ? normalized : 'mp3'
+}
+
+function audioMimeType(format: string): string {
+ switch (normalizeAudioFormat(format)) {
+ case 'wav':
+ return 'audio/wav'
+ case 'flac':
+ return 'audio/flac'
+ case 'pcm':
+ case 'pcm16':
+ return 'audio/L16'
+ case 'mp3':
+ default:
+ return 'audio/mpeg'
+ }
+}
+
+function audioExtension(format: string): string {
+ const normalized = normalizeAudioFormat(format)
+ return normalized === 'pcm16' ? 'pcm' : normalized
+}
+
+function videoExtension(mimeType: string): string {
+ if (mimeType.includes('webm')) return 'webm'
+ if (mimeType.includes('quicktime')) return 'mov'
+ return 'mp4'
+}
+
+function normalizeDuration(value: unknown, fallback: number): number {
+ const candidate = typeof value === 'number' && Number.isFinite(value) ? Math.round(value) : fallback
+ return Math.min(30, Math.max(1, candidate))
+}
+
+function isSuccessStatus(status: string): boolean {
+ return ['success', 'succeeded', 'completed', 'complete'].includes(status.trim().toLowerCase())
+}
+
+function isFailureStatus(status: string): boolean {
+ return ['fail', 'failed', 'error', 'canceled', 'cancelled'].includes(status.trim().toLowerCase())
+}
+
+function delay(ms: number, signal: AbortSignal): Promise {
+ if (signal.aborted) return Promise.reject(new DOMException('Aborted', 'AbortError'))
+ return new Promise((resolveDelay, rejectDelay) => {
+ const timer = setTimeout(resolveDelay, ms)
+ const abort = () => {
+ clearTimeout(timer)
+ rejectDelay(new DOMException('Aborted', 'AbortError'))
+ }
+ signal.addEventListener('abort', abort, { once: true })
+ })
+}
+
+function telemetry(startedAt: number, provider: string): Record {
+ return { provider, durationMs: Date.now() - startedAt }
+}
+
+function toolError(code: string, message: string, toolTelemetry?: Record): { output: unknown; isError: true } {
+ return {
+ output: {
+ error: { code, message },
+ ...(toolTelemetry ? { telemetry: toolTelemetry } : {})
+ },
+ isError: true
+ }
+}
+
+function pickString(value: unknown): string | undefined {
+ return typeof value === 'string' && value.trim() ? value.trim() : undefined
+}
+
+function pickBoolean(value: unknown): boolean | undefined {
+ return typeof value === 'boolean' ? value : undefined
+}
+
+function providerErrorMessage(error: unknown): string {
+ return error instanceof Error ? error.message : String(error)
+}
diff --git a/kun/src/adapters/tool/output-accumulator.ts b/kun/src/adapters/tool/output-accumulator.ts
index b8f0cf2b..8d3c0e8a 100644
--- a/kun/src/adapters/tool/output-accumulator.ts
+++ b/kun/src/adapters/tool/output-accumulator.ts
@@ -176,19 +176,21 @@ export class OutputAccumulator {
}
snapshot(options: { persistIfTruncated?: boolean } = {}): OutputAccumulatorSnapshot {
- const tailTruncation = truncateTail(this.getSnapshotText(), {
+ const pendingPreview = this.pendingDecodePreview()
+ const snapshotText = this.getSnapshotText(pendingPreview)
+ const totalDecodedBytes = this.totalDecodedBytes + byteLength(pendingPreview)
+ const totalLines = this.totalLinesAfterPreview(pendingPreview)
+ const tailTruncation = truncateTail(snapshotText, {
maxLines: this.maxLines,
maxBytes: this.maxBytes
})
- const truncated = this.totalLines > this.maxLines || this.totalDecodedBytes > this.maxBytes
+ const truncated = totalLines > this.maxLines || totalDecodedBytes > this.maxBytes
const truncation: OutputAccumulatorTruncation = {
...tailTruncation,
truncated,
- truncatedBy: truncated
- ? (tailTruncation.truncatedBy ?? (this.totalDecodedBytes > this.maxBytes ? 'bytes' : 'lines'))
- : null,
- totalLines: this.totalLines,
- totalBytes: this.totalDecodedBytes,
+ truncatedBy: truncated ? (tailTruncation.truncatedBy ?? (totalDecodedBytes > this.maxBytes ? 'bytes' : 'lines')) : null,
+ totalLines,
+ totalBytes: totalDecodedBytes,
maxLines: this.maxLines,
maxBytes: this.maxBytes
}
@@ -281,10 +283,34 @@ export class OutputAccumulator {
this.tailBytes = byteLength(this.tailText)
}
- private getSnapshotText(): string {
- if (this.tailStartsAtLineBoundary) return this.tailText
- const firstNewline = this.tailText.indexOf('\n')
- return firstNewline === -1 ? this.tailText : this.tailText.slice(firstNewline + 1)
+ private getSnapshotText(pendingPreview = ''): string {
+ const text = this.tailText + pendingPreview
+ if (this.tailStartsAtLineBoundary) return text
+ const firstNewline = text.indexOf('\n')
+ return firstNewline === -1 ? text : text.slice(firstNewline + 1)
+ }
+
+ private pendingDecodePreview(): string {
+ if (this.decoder || this.decodeBuffer.length === 0) return ''
+ if (startsWithUtf16LeBom(this.decodeBuffer) || looksLikeUtf16Le(this.decodeBuffer)) {
+ return new TextDecoder('utf-16le').decode(stripKnownBom(this.decodeBuffer, 'utf-16le'))
+ }
+ return new TextDecoder('utf-8').decode(this.decodeBuffer)
+ }
+
+ private totalLinesAfterPreview(pendingPreview: string): number {
+ if (!pendingPreview) return this.totalLines
+ let newlines = 0
+ let lastNewline = -1
+ for (let index = pendingPreview.indexOf('\n'); index !== -1; index = pendingPreview.indexOf('\n', index + 1)) {
+ newlines += 1
+ lastNewline = index
+ }
+ if (newlines === 0) {
+ return this.completedLines + (this.hasOpenLine || pendingPreview.length > 0 ? 1 : 0)
+ }
+ const tail = pendingPreview.slice(lastNewline + 1)
+ return this.completedLines + newlines + (tail.length > 0 ? 1 : 0)
}
private shouldUseTempFile(): boolean {
diff --git a/kun/src/adapters/tool/sandbox-policy.ts b/kun/src/adapters/tool/sandbox-policy.ts
new file mode 100644
index 00000000..4ac69f1b
--- /dev/null
+++ b/kun/src/adapters/tool/sandbox-policy.ts
@@ -0,0 +1,117 @@
+import { isAbsolute, relative, resolve, sep } from 'node:path'
+import {
+ DEFAULT_SANDBOX_MODE,
+ SandboxModeSchema,
+ type SandboxMode
+} from '../../contracts/policy.js'
+import type { ToolHostContext } from '../../ports/tool-host.js'
+import type { LocalTool } from './local-tool-host.js'
+import { workspaceRoot } from './builtin-tool-utils.js'
+
+export type SandboxBlock = {
+ code: 'sandbox_read_only' | 'sandbox_command_blocked' | 'sandbox_write_blocked'
+ message: string
+}
+
+export function effectiveSandboxMode(
+ context?: Pick
+): SandboxMode {
+ const parsed = SandboxModeSchema.safeParse(context?.sandboxMode)
+ return parsed.success ? parsed.data : DEFAULT_SANDBOX_MODE
+}
+
+export function isToolAdvertisedInSandbox(
+ tool: Pick,
+ context?: Pick
+): boolean {
+ if (!context) return true
+ return sandboxBlockForTool(tool, context) === null
+}
+
+export function sandboxBlockForTool(
+ tool: Pick,
+ context: Pick
+): SandboxBlock | null {
+ const mode = effectiveSandboxMode(context)
+ if (mode === 'danger-full-access') return null
+ if (isInteractiveGuiGateTool(tool.name)) return null
+
+ if (tool.toolKind === 'file_change') {
+ if (mode === 'workspace-write') return null
+ return {
+ code: mode === 'read-only' ? 'sandbox_read_only' : 'sandbox_write_blocked',
+ message:
+ mode === 'read-only'
+ ? `tool ${tool.name} is blocked by the read-only sandbox`
+ : `tool ${tool.name} is blocked because ${mode} does not allow in-process file mutation`
+ }
+ }
+
+ if (tool.toolKind === 'command_execution') {
+ return {
+ code: 'sandbox_command_blocked',
+ message:
+ mode === 'read-only'
+ ? `tool ${tool.name} is blocked by the read-only sandbox`
+ : `tool ${tool.name} is blocked because ${mode} cannot sandbox host shell commands`
+ }
+ }
+
+ return null
+}
+
+export function canWritePath(
+ absolutePath: string,
+ context: Pick
+): { ok: true } | { ok: false; block: SandboxBlock } {
+ const mode = effectiveSandboxMode(context)
+ if (mode === 'danger-full-access') return { ok: true }
+ if (mode === 'read-only') {
+ return {
+ ok: false,
+ block: {
+ code: 'sandbox_read_only',
+ message: `writing is blocked by the read-only sandbox: ${absolutePath}`
+ }
+ }
+ }
+ if (mode === 'external-sandbox') {
+ return {
+ ok: false,
+ block: {
+ code: 'sandbox_write_blocked',
+ message: `writing is blocked because external-sandbox is not enforced by in-process file tools: ${absolutePath}`
+ }
+ }
+ }
+
+ const root = workspaceRoot(context.workspace)
+ const resolvedPath = isAbsolute(absolutePath) ? resolve(absolutePath) : resolve(root, absolutePath)
+ if (isPathInsideOrEqual(root, resolvedPath)) return { ok: true }
+ return {
+ ok: false,
+ block: {
+ code: 'sandbox_write_blocked',
+ message: `writing is limited to the workspace sandbox: ${absolutePath}`
+ }
+ }
+}
+
+export function assertCanWritePath(
+ absolutePath: string,
+ context: Pick
+): void {
+ const decision = canWritePath(absolutePath, context)
+ if (!decision.ok) throw new Error(decision.block.message)
+}
+
+function isPathInsideOrEqual(root: string, candidate: string): boolean {
+ const rootPath = resolve(root)
+ const candidatePath = resolve(candidate)
+ const rel = relative(rootPath, candidatePath)
+ return rel === '' || (rel !== '..' && !rel.startsWith(`..${sep}`) && !isAbsolute(rel))
+}
+
+function isInteractiveGuiGateTool(toolName: string): boolean {
+ return toolName === 'user_input' || toolName === 'request_user_input'
+}
diff --git a/kun/src/adapters/tool/tool-hooks.ts b/kun/src/adapters/tool/tool-hooks.ts
deleted file mode 100644
index f8198a81..00000000
--- a/kun/src/adapters/tool/tool-hooks.ts
+++ /dev/null
@@ -1,162 +0,0 @@
-import { spawn } from 'node:child_process'
-import type { ToolCallLike, ToolHostContext } from '../../ports/tool-host.js'
-import { terminateSpawnTree } from './builtin-tool-utils.js'
-
-export type ToolHookPhase = 'PreToolUse' | 'PostToolUse'
-
-export type ToolHookInvocation = {
- phase: ToolHookPhase
- call: ToolCallLike
- context: Pick
- result?: {
- output: unknown
- isError?: boolean
- }
-}
-
-export type ToolHookResult = {
- decision?: 'allow' | 'deny'
- message?: string
- arguments?: Record
- output?: unknown
- isError?: boolean
-}
-
-export type ResolvedToolHook =
- | {
- phase: ToolHookPhase
- toolNames?: readonly string[]
- timeoutMs?: number
- run: (invocation: ToolHookInvocation) => Promise | ToolHookResult | void
- }
- | {
- phase: ToolHookPhase
- toolNames?: readonly string[]
- timeoutMs?: number
- command: string
- cwd?: string
- }
-
-export async function runToolHooks(input: {
- hooks: readonly ResolvedToolHook[]
- invocation: ToolHookInvocation
-}): Promise {
- const matching = input.hooks.filter((hook) => hook.phase === input.invocation.phase && hookMatchesTool(hook, input.invocation.call.toolName))
- const results: ToolHookResult[] = []
- for (const hook of matching) {
- const result = 'run' in hook
- ? await runFunctionHook(hook, input.invocation)
- : await runCommandHook(hook, input.invocation)
- if (result) results.push(result)
- }
- return results
-}
-
-export function applyPreToolHookResults(
- call: ToolCallLike,
- results: readonly ToolHookResult[]
-): { call: ToolCallLike; denied?: string } {
- let next = call
- for (const result of results) {
- if (result.decision === 'deny') {
- return { call: next, denied: result.message || 'tool call denied by PreToolUse hook' }
- }
- if (result.arguments && typeof result.arguments === 'object') {
- next = { ...next, arguments: result.arguments }
- }
- }
- return { call: next }
-}
-
-export function applyPostToolHookResults(
- result: { output: unknown; isError?: boolean },
- results: readonly ToolHookResult[]
-): { output: unknown; isError?: boolean } {
- let next = result
- for (const hookResult of results) {
- if ('output' in hookResult) {
- next = {
- output: hookResult.output,
- isError: hookResult.isError ?? next.isError
- }
- } else if (hookResult.isError !== undefined) {
- next = { ...next, isError: hookResult.isError }
- }
- }
- return next
-}
-
-function hookMatchesTool(hook: Pick, toolName: string): boolean {
- if (!hook.toolNames || hook.toolNames.length === 0) return true
- return hook.toolNames.includes(toolName)
-}
-
-async function runFunctionHook(
- hook: Extract,
- invocation: ToolHookInvocation
-): Promise {
- return withTimeout(
- Promise.resolve(hook.run(invocation)),
- hook.timeoutMs ?? 5_000,
- `${hook.phase} hook timed out`
- )
-}
-
-async function runCommandHook(
- hook: Extract,
- invocation: ToolHookInvocation
-): Promise {
- const payload = JSON.stringify(invocation)
- const child = spawn(hook.command, {
- cwd: hook.cwd || invocation.context.workspace || undefined,
- shell: true,
- stdio: ['pipe', 'pipe', 'pipe']
- })
- child.stdin.end(payload)
- let stdout = ''
- let stderr = ''
- child.stdout.on('data', (chunk) => {
- stdout += String(chunk)
- })
- child.stderr.on('data', (chunk) => {
- stderr += String(chunk)
- })
- const exitCode = await withTimeout(
- new Promise((resolve) => {
- child.on('close', (code) => resolve(code ?? 0))
- }),
- hook.timeoutMs ?? 5_000,
- `${hook.phase} command hook timed out`
- ).catch((error) => {
- terminateSpawnTree(child)
- throw error
- })
- if (exitCode !== 0) {
- return {
- decision: hook.phase === 'PreToolUse' ? 'deny' : undefined,
- isError: hook.phase === 'PostToolUse' ? true : undefined,
- message: stderr.trim() || `${hook.phase} command hook exited with ${exitCode}`
- }
- }
- const text = stdout.trim()
- if (!text) return undefined
- try {
- return JSON.parse(text) as ToolHookResult
- } catch {
- return { message: text }
- }
-}
-
-async function withTimeout(promise: Promise, timeoutMs: number, message: string): Promise {
- let timer: ReturnType | undefined
- try {
- return await Promise.race([
- promise,
- new Promise((_resolve, reject) => {
- timer = setTimeout(() => reject(new Error(message)), Math.max(1, timeoutMs))
- })
- ])
- } finally {
- if (timer) clearTimeout(timer)
- }
-}
diff --git a/kun/src/adapters/tool/web-tool-provider.ts b/kun/src/adapters/tool/web-tool-provider.ts
index 2472dc75..831fb69c 100644
--- a/kun/src/adapters/tool/web-tool-provider.ts
+++ b/kun/src/adapters/tool/web-tool-provider.ts
@@ -6,6 +6,9 @@ import { LocalToolHost } from './local-tool-host.js'
const DEFAULT_WEB_TIMEOUT_MS = 15_000
const DEFAULT_WEB_MAX_BYTES = 1_000_000
+// Models sometimes pass tiny max_bytes budgets (2000 was common in the
+// wild); below this floor the extracted text is too small to be useful.
+const MIN_WEB_FETCH_BYTES = 4_096
const DEFAULT_SEARCH_LIMIT = 5
const MAX_SEARCH_LIMIT = 10
@@ -110,7 +113,13 @@ function createFetchTool(config: WebCapabilityConfig, provider: WebProvider) {
const policy = validateUrlPolicy(rawUrl, config)
if (!policy.ok) return toolError('policy_blocked', policy.reason, telemetry({ startedAt, policy: 'blocked', url: rawUrl }))
if (!provider.fetch) return toolError('provider_unavailable', 'web fetch provider is unavailable')
- const maxBytes = boundedInt(args.max_bytes, DEFAULT_WEB_MAX_BYTES, 1, DEFAULT_WEB_MAX_BYTES)
+ const maxBytesCap = config.maxFetchBytes ?? DEFAULT_WEB_MAX_BYTES
+ const maxBytes = boundedInt(
+ args.max_bytes,
+ maxBytesCap,
+ Math.min(MIN_WEB_FETCH_BYTES, maxBytesCap),
+ maxBytesCap
+ )
const timeoutMs = boundedInt(args.timeout_ms, DEFAULT_WEB_TIMEOUT_MS, 1, DEFAULT_WEB_TIMEOUT_MS)
try {
const result = await provider.fetch({
@@ -212,11 +221,9 @@ class FetchWebProvider implements WebProvider {
const response = await fetch(request.url, { signal: controller.signal })
if (!response.ok) throw new Error(`HTTP ${response.status}`)
- // Fast-fail if content-length is known and exceeds limit
- const contentLength = response.headers.get('content-length')
- if (contentLength && Number(contentLength) > request.maxBytes) {
- throw new Error(`content exceeds ${request.maxBytes} byte limit`)
- }
+ // Oversized pages truncate at maxBytes via the streaming read below.
+ // Hard-failing on the declared content-length made most real pages
+ // unfetchable whenever the model passed a small byte budget.
// Stream response body with size limit
const reader = response.body?.getReader()
diff --git a/kun/src/attachments/attachment-store.ts b/kun/src/attachments/attachment-store.ts
index 99fb3bc3..3543da8e 100644
--- a/kun/src/attachments/attachment-store.ts
+++ b/kun/src/attachments/attachment-store.ts
@@ -14,6 +14,7 @@ export interface AttachmentStore {
name: string
data: Buffer
mimeType?: string
+ localFilePath?: string
textFallback?: AttachmentTextFallback
threadId?: string
workspace?: string
@@ -40,6 +41,7 @@ export class FileAttachmentStore implements AttachmentStore {
name: string
data: Buffer
mimeType?: string
+ localFilePath?: string
textFallback?: AttachmentTextFallback
threadId?: string
workspace?: string
@@ -64,6 +66,7 @@ export class FileAttachmentStore implements AttachmentStore {
if (existing) {
const next = mergeScope({
...existing,
+ ...(input.localFilePath ? { localFilePath: input.localFilePath } : {}),
...(input.textFallback ? { textFallback: input.textFallback } : {}),
updatedAt: now
}, input)
@@ -79,6 +82,7 @@ export class FileAttachmentStore implements AttachmentStore {
hash,
...(image.width ? { width: image.width } : {}),
...(image.height ? { height: image.height } : {}),
+ ...(input.localFilePath ? { localFilePath: input.localFilePath } : {}),
...(input.textFallback ? { textFallback: input.textFallback } : {}),
threadIds: [],
workspaces: [],
@@ -179,7 +183,7 @@ function validateTextFallback(fallback: AttachmentTextFallback, config: Attachme
}
}
-function detectImage(buffer: Buffer): { mimeType: string; width?: number; height?: number } | null {
+export function detectImage(buffer: Buffer): { mimeType: string; width?: number; height?: number } | null {
if (buffer.length >= 24 && buffer[0] === 0x89 && buffer[1] === 0x50 && buffer[2] === 0x4e && buffer[3] === 0x47) {
return { mimeType: 'image/png', width: buffer.readUInt32BE(16), height: buffer.readUInt32BE(20) }
}
diff --git a/kun/src/cli/agent-cli.ts b/kun/src/cli/agent-cli.ts
index a53a2b8f..54b0b11d 100644
--- a/kun/src/cli/agent-cli.ts
+++ b/kun/src/cli/agent-cli.ts
@@ -339,6 +339,7 @@ function buildExecContext(options: ServeOptions, workspace: string): ToolHostCon
memoryPolicy: { enabled: false },
delegationPolicy: { enabled: false },
approvalPolicy: options.approvalPolicy,
+ sandboxMode: options.sandboxMode,
abortSignal: new AbortController().signal,
awaitApproval: async () => (options.approvalPolicy === 'auto' ? 'allow' : 'deny')
}
diff --git a/kun/src/cli/cli-options.ts b/kun/src/cli/cli-options.ts
index ff6518fb..368f9653 100644
--- a/kun/src/cli/cli-options.ts
+++ b/kun/src/cli/cli-options.ts
@@ -23,6 +23,7 @@ import {
MODEL_ENDPOINT_FORMATS,
normalizeModelEndpointFormat
} from '../contracts/model-endpoint-format.js'
+import { HooksConfigSchema } from '../hooks/hook-config.js'
export const DEFAULT_SERVE_PORT = 8899
export const DEFAULT_SERVE_MODEL = DEFAULT_KUN_MODEL
@@ -54,7 +55,8 @@ export const ServeOptionsSchema = z.object({
models: ModelConfigSchema.optional(),
contextCompaction: ContextCompactionConfigSchema.optional(),
runtime: RuntimeTuningConfigSchema.optional(),
- capabilities: KunCapabilitiesConfig.default(DEFAULT_KUN_CAPABILITIES_CONFIG)
+ capabilities: KunCapabilitiesConfig.default(DEFAULT_KUN_CAPABILITIES_CONFIG),
+ hooks: HooksConfigSchema.optional()
})
export type ServeOptions = z.infer
diff --git a/kun/src/cli/serve-entry.ts b/kun/src/cli/serve-entry.ts
index 7994ff98..4f1a0588 100644
--- a/kun/src/cli/serve-entry.ts
+++ b/kun/src/cli/serve-entry.ts
@@ -6,10 +6,40 @@ import {
runAgentCommand,
splitKunCliCommand
} from './agent-cli.js'
-import { startKunServe } from '../server/runtime-factory.js'
+import { startKunServe, type KunServeHandle } from '../server/runtime-factory.js'
export const KUN_READY_PREFIX = 'KUN_READY '
+/**
+ * Serve mode runs unattended under the GUI. An uncaught error must not
+ * leave a half-dead process: report it on stderr (the GUI captures the
+ * tail), attempt a bounded graceful close, then exit non-zero so the
+ * GUI supervisor can restart us.
+ */
+function installServeCrashHandlers(getHandle: () => KunServeHandle | null): void {
+ let crashing = false
+ const crash = (kind: string, error: unknown): void => {
+ if (crashing) return
+ crashing = true
+ const detail = error instanceof Error ? (error.stack ?? error.message) : String(error)
+ process.stderr.write(`kun serve: ${kind}: ${detail}\n`)
+ const finish = (): void => process.exit(ServeExitCode.runtime)
+ const handle = getHandle()
+ if (!handle) {
+ finish()
+ return
+ }
+ const deadline = setTimeout(finish, 3000)
+ deadline.unref()
+ void handle
+ .close()
+ .catch(() => undefined)
+ .finally(finish)
+ }
+ process.on('uncaughtException', (error) => crash('uncaughtException', error))
+ process.on('unhandledRejection', (reason) => crash('unhandledRejection', reason))
+}
+
/**
* Serve-mode command. Kept separate from the dispatcher so GUI startup
* still has the exact same KUN_READY handshake behavior.
@@ -27,13 +57,16 @@ async function serveMain(argv: readonly string[]): Promise {
}
return parsed.exitCode
}
- const handle = await startKunServe(parsed.options)
- const info = handle.runtime.info()
+ let handle: KunServeHandle | null = null
+ installServeCrashHandlers(() => handle)
+ const server = await startKunServe(parsed.options)
+ handle = server
+ const info = server.runtime.info()
const startupInfo = {
service: 'kun',
mode: 'serve',
- host: handle.host,
- port: handle.port,
+ host: server.host,
+ port: server.port,
configPath: info.configPath,
dataDir: info.dataDir,
model: info.model,
@@ -42,13 +75,13 @@ async function serveMain(argv: readonly string[]): Promise {
insecure: info.insecure,
startedAt: info.startedAt,
pid: info.pid,
- message: `kun runtime listening on http://${handle.host}:${handle.port}`
+ message: `kun runtime listening on http://${server.host}:${server.port}`
}
process.stdout.write(`${KUN_READY_PREFIX}${JSON.stringify(startupInfo)}\n`)
process.stdout.write(JSON.stringify(startupInfo, null, 2) + '\n')
await new Promise((resolve) => {
const stop = () => {
- void handle.close().finally(resolve)
+ void server.close().finally(resolve)
}
process.once('SIGTERM', stop)
process.once('SIGINT', stop)
diff --git a/kun/src/cli/serve.ts b/kun/src/cli/serve.ts
index 707ee364..754f9fd4 100644
--- a/kun/src/cli/serve.ts
+++ b/kun/src/cli/serve.ts
@@ -136,7 +136,8 @@ export function parseServeOptions(
models: loadedConfig?.config.models,
contextCompaction: loadedConfig?.config.contextCompaction,
runtime: loadedConfig?.config.runtime,
- capabilities: loadedConfig?.config.capabilities ?? DEFAULT_SERVE_OPTIONS.capabilities
+ capabilities: loadedConfig?.config.capabilities ?? DEFAULT_SERVE_OPTIONS.capabilities,
+ hooks: loadedConfig?.config.hooks
}
return ServeOptionsSchema.parse(merged)
}
diff --git a/kun/src/config/kun-config.ts b/kun/src/config/kun-config.ts
index 7181e612..72a64ffa 100644
--- a/kun/src/config/kun-config.ts
+++ b/kun/src/config/kun-config.ts
@@ -12,13 +12,15 @@ import {
DEFAULT_KUN_CAPABILITIES_CONFIG,
KunCapabilitiesConfig,
ModelInputModality,
- ModelMessagePartSupport
+ ModelMessagePartSupport,
+ ModelReasoningCapabilityMetadata
} from '../contracts/capabilities.js'
import {
DEFAULT_MODEL_ENDPOINT_FORMAT,
MODEL_ENDPOINT_FORMATS,
normalizeModelEndpointFormat
} from '../contracts/model-endpoint-format.js'
+import { HooksConfigSchema } from '../hooks/hook-config.js'
export const KUN_CONFIG_FILENAME = 'config.json'
export const DEFAULT_KUN_MODEL = 'deepseek-v4-pro'
@@ -59,7 +61,8 @@ export const ModelContextProfileConfigSchema = z
inputModalities: z.array(ModelInputModality).optional(),
outputModalities: z.array(ModelInputModality).optional(),
supportsToolCalling: z.boolean().optional(),
- messageParts: z.array(ModelMessagePartSupport).optional()
+ messageParts: z.array(ModelMessagePartSupport).optional(),
+ reasoning: ModelReasoningCapabilityMetadata.optional()
})
.strict()
.superRefine((profile, ctx) => {
@@ -193,7 +196,8 @@ export const KunConfigSchema = z
models: ModelConfigSchema.optional(),
contextCompaction: ContextCompactionConfigSchema.optional(),
runtime: RuntimeTuningConfigSchema.optional(),
- capabilities: KunCapabilitiesConfig.default(DEFAULT_KUN_CAPABILITIES_CONFIG)
+ capabilities: KunCapabilitiesConfig.default(DEFAULT_KUN_CAPABILITIES_CONFIG),
+ hooks: HooksConfigSchema.optional()
})
.strict()
diff --git a/kun/src/contracts/attachments.ts b/kun/src/contracts/attachments.ts
index 98bab076..b253a002 100644
--- a/kun/src/contracts/attachments.ts
+++ b/kun/src/contracts/attachments.ts
@@ -18,6 +18,7 @@ export const AttachmentMetadata = z.object({
hash: z.string().min(1),
width: z.number().int().positive().optional(),
height: z.number().int().positive().optional(),
+ localFilePath: z.string().min(1).optional(),
textFallback: AttachmentTextFallback.optional(),
threadIds: z.array(z.string().min(1)).default([]),
workspaces: z.array(z.string().min(1)).default([]),
@@ -30,6 +31,7 @@ export const AttachmentUploadRequest = z.object({
name: z.string().min(1),
mimeType: z.string().min(1).optional(),
dataBase64: z.string().min(1),
+ localFilePath: z.string().min(1).optional(),
textFallback: AttachmentTextFallback.optional(),
threadId: z.string().min(1).optional(),
workspace: z.string().min(1).optional()
diff --git a/kun/src/contracts/capabilities.ts b/kun/src/contracts/capabilities.ts
index cd7d7326..4fa32aa1 100644
--- a/kun/src/contracts/capabilities.ts
+++ b/kun/src/contracts/capabilities.ts
@@ -21,6 +21,27 @@ export type ModelInputModality = z.infer
export const ModelMessagePartSupport = z.enum(['text', 'image_url', 'input_image'])
export type ModelMessagePartSupport = z.infer
+export const ModelReasoningEffort = z.enum(['auto', 'off', 'low', 'medium', 'high', 'max'])
+export type ModelReasoningEffort = z.infer
+
+export const ModelReasoningRequestProtocol = z.enum([
+ 'none',
+ 'deepseek-chat-completions',
+ 'mimo-chat-completions',
+ 'openai-responses',
+ 'anthropic-thinking'
+])
+export type ModelReasoningRequestProtocol = z.infer
+
+export const ModelReasoningCapabilityMetadata = z
+ .object({
+ supportedEfforts: z.array(ModelReasoningEffort).min(1),
+ defaultEffort: ModelReasoningEffort,
+ requestProtocol: ModelReasoningRequestProtocol
+ })
+ .strict()
+export type ModelReasoningCapabilityMetadata = z.infer
+
export const ModelCapabilityMetadata = z
.object({
id: z.string().min(1),
@@ -28,7 +49,8 @@ export const ModelCapabilityMetadata = z
outputModalities: z.array(ModelInputModality).min(1),
supportsToolCalling: z.boolean(),
contextWindowTokens: z.number().int().positive().optional(),
- messageParts: z.array(ModelMessagePartSupport).min(1)
+ messageParts: z.array(ModelMessagePartSupport).min(1),
+ reasoning: ModelReasoningCapabilityMetadata.optional()
})
.strict()
export type ModelCapabilityMetadata = z.infer
@@ -146,7 +168,9 @@ export const WebCapabilityConfig = CapabilityToggleConfig.extend({
searchEnabled: z.boolean().default(false),
provider: z.string().min(1).optional(),
allowDomains: z.array(z.string().min(1)).default([]),
- denyDomains: z.array(z.string().min(1)).default([])
+ denyDomains: z.array(z.string().min(1)).default([]),
+ /** Upper bound for web_fetch body bytes; fetched pages truncate here. */
+ maxFetchBytes: z.number().int().positive().default(1_000_000)
}).strict()
export type WebCapabilityConfig = z.infer
@@ -186,6 +210,62 @@ export const MemoryCapabilityConfig = CapabilityToggleConfig.extend({
}).strict()
export type MemoryCapabilityConfig = z.infer
+export const ImageGenerationProtocol = z.enum(['openai-images', 'minimax-image'])
+export type ImageGenerationProtocol = z.infer
+
+export const ImageGenCapabilityConfig = CapabilityToggleConfig.extend({
+ protocol: ImageGenerationProtocol.default('openai-images'),
+ baseUrl: z.string().min(1).optional(),
+ apiKey: z.string().min(1).optional(),
+ model: z.string().min(1).optional(),
+ defaultSize: z.string().min(1).optional(),
+ timeoutMs: z.number().int().positive().default(180_000),
+ maxReferenceImages: z.number().int().positive().max(8).default(4)
+}).strict()
+export type ImageGenCapabilityConfig = z.infer
+
+export const TextToSpeechProtocol = z.enum(['openai-speech', 'minimax-t2a', 'mimo-tts'])
+export type TextToSpeechProtocol = z.infer
+
+export const SpeechGenCapabilityConfig = CapabilityToggleConfig.extend({
+ protocol: TextToSpeechProtocol.default('openai-speech'),
+ baseUrl: z.string().min(1).optional(),
+ apiKey: z.string().min(1).optional(),
+ model: z.string().min(1).optional(),
+ voice: z.string().min(1).optional(),
+ format: z.string().min(1).default('mp3'),
+ timeoutMs: z.number().int().positive().default(120_000)
+}).strict()
+export type SpeechGenCapabilityConfig = z.infer
+
+export const MusicGenerationProtocol = z.enum(['minimax-music'])
+export type MusicGenerationProtocol = z.infer
+
+export const MusicGenCapabilityConfig = CapabilityToggleConfig.extend({
+ protocol: MusicGenerationProtocol.default('minimax-music'),
+ baseUrl: z.string().min(1).optional(),
+ apiKey: z.string().min(1).optional(),
+ model: z.string().min(1).optional(),
+ format: z.string().min(1).default('mp3'),
+ timeoutMs: z.number().int().positive().default(300_000)
+}).strict()
+export type MusicGenCapabilityConfig = z.infer
+
+export const VideoGenerationProtocol = z.enum(['minimax-video'])
+export type VideoGenerationProtocol = z.infer
+
+export const VideoGenCapabilityConfig = CapabilityToggleConfig.extend({
+ protocol: VideoGenerationProtocol.default('minimax-video'),
+ baseUrl: z.string().min(1).optional(),
+ apiKey: z.string().min(1).optional(),
+ model: z.string().min(1).optional(),
+ defaultDuration: z.number().int().positive().default(6),
+ defaultResolution: z.string().min(1).default('1080P'),
+ timeoutMs: z.number().int().positive().default(900_000),
+ pollIntervalMs: z.number().int().positive().default(10_000)
+}).strict()
+export type VideoGenCapabilityConfig = z.infer
+
export const KunCapabilitiesConfig = z
.object({
mcp: McpCapabilityConfig.default(() => McpCapabilityConfig.parse({})),
@@ -193,7 +273,11 @@ export const KunCapabilitiesConfig = z
skills: SkillsCapabilityConfig.default(() => SkillsCapabilityConfig.parse({})),
subagents: SubagentsCapabilityConfig.default(() => SubagentsCapabilityConfig.parse({})),
attachments: AttachmentsCapabilityConfig.default(() => AttachmentsCapabilityConfig.parse({})),
- memory: MemoryCapabilityConfig.default(() => MemoryCapabilityConfig.parse({}))
+ memory: MemoryCapabilityConfig.default(() => MemoryCapabilityConfig.parse({})),
+ imageGen: ImageGenCapabilityConfig.default(() => ImageGenCapabilityConfig.parse({})),
+ speechGen: SpeechGenCapabilityConfig.default(() => SpeechGenCapabilityConfig.parse({})),
+ musicGen: MusicGenCapabilityConfig.default(() => MusicGenCapabilityConfig.parse({})),
+ videoGen: VideoGenCapabilityConfig.default(() => VideoGenCapabilityConfig.parse({}))
})
.strict()
export type KunCapabilitiesConfig = z.infer
@@ -250,6 +334,18 @@ export const RuntimeCapabilityManifest = z
memory: RuntimeCapabilityState.extend({
scopes: z.array(z.enum(['user', 'workspace', 'project'])),
maxInjectedRecords: z.number().int().positive()
+ }).strict(),
+ imageGen: RuntimeCapabilityState.extend({
+ model: z.string().optional()
+ }).strict(),
+ speechGen: RuntimeCapabilityState.extend({
+ model: z.string().optional()
+ }).strict(),
+ musicGen: RuntimeCapabilityState.extend({
+ model: z.string().optional()
+ }).strict(),
+ videoGen: RuntimeCapabilityState.extend({
+ model: z.string().optional()
}).strict()
})
.strict()
@@ -292,6 +388,22 @@ export function buildRuntimeCapabilityManifest(input: {
available?: boolean
reason?: string
}
+ imageGen?: {
+ available?: boolean
+ reason?: string
+ }
+ speechGen?: {
+ available?: boolean
+ reason?: string
+ }
+ musicGen?: {
+ available?: boolean
+ reason?: string
+ }
+ videoGen?: {
+ available?: boolean
+ reason?: string
+ }
}): RuntimeCapabilityManifest {
const config = KunCapabilitiesConfig.parse(input.config ?? {})
const configuredMcpServers = input.mcp?.configuredServers ?? Object.keys(config.mcp.servers).length
@@ -380,6 +492,42 @@ export function buildRuntimeCapabilityManifest(input: {
),
scopes: config.memory.scopes,
maxInjectedRecords: config.memory.maxInjectedRecords
+ },
+ imageGen: {
+ ...providerCapabilityState(
+ config.imageGen.enabled,
+ 'image generation is disabled by config',
+ input.imageGen?.available === true,
+ input.imageGen?.reason ?? 'image generation provider is not configured'
+ ),
+ ...(config.imageGen.model ? { model: config.imageGen.model } : {})
+ },
+ speechGen: {
+ ...providerCapabilityState(
+ config.speechGen.enabled,
+ 'speech generation is disabled by config',
+ input.speechGen?.available === true,
+ input.speechGen?.reason ?? 'speech generation provider is not configured'
+ ),
+ ...(config.speechGen.model ? { model: config.speechGen.model } : {})
+ },
+ musicGen: {
+ ...providerCapabilityState(
+ config.musicGen.enabled,
+ 'music generation is disabled by config',
+ input.musicGen?.available === true,
+ input.musicGen?.reason ?? 'music generation provider is not configured'
+ ),
+ ...(config.musicGen.model ? { model: config.musicGen.model } : {})
+ },
+ videoGen: {
+ ...providerCapabilityState(
+ config.videoGen.enabled,
+ 'video generation is disabled by config',
+ input.videoGen?.available === true,
+ input.videoGen?.reason ?? 'video generation provider is not configured'
+ ),
+ ...(config.videoGen.model ? { model: config.videoGen.model } : {})
}
})
}
diff --git a/kun/src/contracts/errors.ts b/kun/src/contracts/errors.ts
index 9a1f3777..89307146 100644
--- a/kun/src/contracts/errors.ts
+++ b/kun/src/contracts/errors.ts
@@ -3,7 +3,7 @@ import { z } from 'zod'
/**
* Structured API error codes returned by every Kun HTTP/SSE endpoint.
*
- * The error contract mirrors what DeepSeek-GUI diagnostics can render:
+ * The error contract mirrors what Kun diagnostics can render:
* the renderer needs a stable `code` to drive UI state and a human-readable
* `message` to surface in toasts. `details` carries optional, JSON-encodable
* per-endpoint information (for example a Zod issue list).
diff --git a/kun/src/contracts/events.ts b/kun/src/contracts/events.ts
index 0d31c52f..736e1a11 100644
--- a/kun/src/contracts/events.ts
+++ b/kun/src/contracts/events.ts
@@ -3,6 +3,7 @@ import { TurnItem } from './items.js'
import { ThreadGoalSchema, ThreadTodoListSchema } from './threads.js'
import { UsageSnapshotSchema } from './usage.js'
import { RuntimeErrorSeverity } from './errors.js'
+import { ApprovalPolicySchema, SandboxModeSchema } from './policy.js'
/**
* Persisted runtime events. Every event has a per-thread `seq` so the
@@ -118,6 +119,8 @@ export const ApprovalEvent = RuntimeEventBase.extend({
approvalId: z.string().min(1),
toolName: z.string().min(1),
status: z.enum(['pending', 'allowed', 'denied', 'expired']),
+ approvalPolicy: ApprovalPolicySchema.optional(),
+ sandboxMode: SandboxModeSchema.optional(),
summary: z.string().optional()
})
export type ApprovalEvent = z.infer
diff --git a/kun/src/contracts/threads.ts b/kun/src/contracts/threads.ts
index a37a8d11..0d01ecd1 100644
--- a/kun/src/contracts/threads.ts
+++ b/kun/src/contracts/threads.ts
@@ -123,6 +123,8 @@ export const ThreadSummarySchema = ThreadSchema.pick({
model: true,
mode: true,
status: true,
+ approvalPolicy: true,
+ sandboxMode: true,
costBudgetUsd: true,
costBudgetWarningSent: true,
relation: true,
diff --git a/kun/src/contracts/turns.ts b/kun/src/contracts/turns.ts
index d893d0dc..0a73d3a1 100644
--- a/kun/src/contracts/turns.ts
+++ b/kun/src/contracts/turns.ts
@@ -1,7 +1,7 @@
import { z } from 'zod'
import { TurnItem } from './items.js'
import { isGuiPlanRelativePath } from '../shared/gui-plan.js'
-import { ApprovalPolicySchema } from './policy.js'
+import { ApprovalPolicySchema, SandboxModeSchema } from './policy.js'
/**
* Mode enum, inlined here (instead of importing `ThreadMode` from
@@ -77,6 +77,12 @@ export const TurnSchema = z.object({
* otherwise agent thread, or a Build turn that runs as agent).
*/
mode: TurnModeSchema.optional(),
+ /**
+ * True when no interactive user is attached to this turn (IM bridges,
+ * headless runs). Kun hides `user_input`/`request_user_input` and
+ * rejects calls to them instead of blocking on a GUI answer.
+ */
+ disableUserInput: z.boolean().optional(),
error: z.string().optional()
})
export type Turn = z.infer
@@ -87,6 +93,7 @@ export const StartTurnRequest = z.object({
model: z.string().optional(),
reasoningEffort: TurnReasoningEffortSchema.optional(),
approvalPolicy: ApprovalPolicySchema.optional(),
+ sandboxMode: SandboxModeSchema.optional(),
/**
* Optional per-turn mode. Overrides the thread mode for this turn so
* the GUI can toggle Plan/agent without recreating the thread. In Plan
@@ -107,7 +114,13 @@ export const StartTurnRequest = z.object({
* `create_plan` tool for the turn and writes only to the reserved
* path advertised in the context.
*/
- guiPlan: GuiPlanContextSchema.optional()
+ guiPlan: GuiPlanContextSchema.optional(),
+ /**
+ * True when the caller cannot relay structured input prompts to a
+ * user (IM bridges such as WeChat/Feishu, headless runs). The turn
+ * runs without the `user_input`/`request_user_input` tools.
+ */
+ disableUserInput: z.boolean().optional()
})
export type StartTurnRequest = z.input
diff --git a/kun/src/contracts/usage.ts b/kun/src/contracts/usage.ts
index 064ec0e9..f9bd78b8 100644
--- a/kun/src/contracts/usage.ts
+++ b/kun/src/contracts/usage.ts
@@ -19,6 +19,12 @@ export const UsageSnapshotSchema = z.object({
turns: z.number().int().nonnegative(),
costUsd: z.number().nonnegative().optional(),
costCny: z.number().nonnegative().optional(),
+ /**
+ * @deprecated Savings are reported in tokens only (cache hits via
+ * `cacheHitTokens`, compression via `tokenEconomySavingsTokens`).
+ * The money fields remain parseable for persisted threads recorded
+ * by older runtimes but are no longer populated.
+ */
cacheSavingsUsd: z.number().nonnegative().optional(),
cacheSavingsCny: z.number().nonnegative().optional(),
tokenEconomySavingsTokens: z.number().int().nonnegative().optional(),
@@ -121,9 +127,5 @@ export const emptyUsageSnapshot = (): UsageSnapshot => ({
cacheMissTokens: 0,
cacheHitRate: null,
turns: 0,
- cacheSavingsUsd: 0,
- cacheSavingsCny: 0,
- tokenEconomySavingsTokens: 0,
- tokenEconomySavingsUsd: 0,
- tokenEconomySavingsCny: 0
+ tokenEconomySavingsTokens: 0
})
diff --git a/kun/src/domain/thread.ts b/kun/src/domain/thread.ts
index e07667a7..9a79454c 100644
--- a/kun/src/domain/thread.ts
+++ b/kun/src/domain/thread.ts
@@ -77,7 +77,7 @@ export function toThreadSummary(
thread: ThreadEntity
): Pick<
ThreadEntity,
- 'id' | 'title' | 'workspace' | 'model' | 'mode' | 'status' | 'createdAt' | 'updatedAt'
+ 'id' | 'title' | 'workspace' | 'model' | 'mode' | 'status' | 'approvalPolicy' | 'sandboxMode' | 'createdAt' | 'updatedAt'
| 'costBudgetUsd' | 'costBudgetWarningSent'
| 'relation' | 'parentThreadId'
| 'forkedFromThreadId' | 'forkedFromTitle' | 'forkedAt' | 'forkedFromMessageCount' | 'forkedFromTurnCount'
@@ -90,6 +90,8 @@ export function toThreadSummary(
model: thread.model,
mode: thread.mode,
status: thread.status,
+ approvalPolicy: thread.approvalPolicy,
+ sandboxMode: thread.sandboxMode,
...(thread.costBudgetUsd !== undefined ? { costBudgetUsd: thread.costBudgetUsd } : {}),
...(thread.costBudgetWarningSent !== undefined ? { costBudgetWarningSent: thread.costBudgetWarningSent } : {}),
relation: thread.relation ?? 'primary',
diff --git a/kun/src/domain/turn.ts b/kun/src/domain/turn.ts
index 586c74f7..41ddec21 100644
--- a/kun/src/domain/turn.ts
+++ b/kun/src/domain/turn.ts
@@ -13,6 +13,7 @@ export function createTurnRecord(input: {
attachmentIds?: string[]
guiPlan?: GuiPlanContextJson
mode?: ThreadMode
+ disableUserInput?: boolean
createdAt?: string
status?: TurnStatus
}): TurnEntity {
@@ -32,6 +33,7 @@ export function createTurnRecord(input: {
...(reasoningEffort ? { reasoningEffort } : {}),
...(input.guiPlan ? { guiPlan: input.guiPlan } : {}),
...(input.mode ? { mode: input.mode } : {}),
+ ...(input.disableUserInput ? { disableUserInput: true } : {}),
createdAt: input.createdAt ?? new Date().toISOString()
}
}
diff --git a/kun/src/hooks/hook-config.ts b/kun/src/hooks/hook-config.ts
new file mode 100644
index 00000000..ca4e141b
--- /dev/null
+++ b/kun/src/hooks/hook-config.ts
@@ -0,0 +1,39 @@
+import { z } from 'zod'
+import { HOOK_PHASES, type ResolvedHook } from './hook-engine.js'
+
+/**
+ * Command hook entry as written in `config.json` under the top-level
+ * `hooks` key. Only command hooks are configurable from JSON; function
+ * hooks are reserved for embedders that assemble the runtime in code.
+ */
+export const HookCommandConfigSchema = z
+ .object({
+ phase: z.enum(HOOK_PHASES),
+ /** Glob matched against the tool name (`*` wildcard, `|` alternation). Tool phases only. */
+ matcher: z.string().min(1).optional(),
+ /** Exact tool-name list; matches when either this or `matcher` matches. Tool phases only. */
+ toolNames: z.array(z.string().min(1)).optional(),
+ /** Shell command. Receives the invocation as JSON on stdin. */
+ command: z.string().min(1),
+ /** Working directory; defaults to the active workspace. */
+ cwd: z.string().min(1).optional(),
+ timeoutMs: z.number().int().positive().optional()
+ })
+ .strict()
+
+export const HooksConfigSchema = z.array(HookCommandConfigSchema)
+
+export type HookCommandConfig = z.infer
+export type HooksConfig = z.infer
+
+/** Map validated config entries onto runnable command hooks. */
+export function resolveConfiguredHooks(config: HooksConfig | undefined): ResolvedHook[] {
+ return (config ?? []).map((entry) => ({
+ phase: entry.phase,
+ ...(entry.matcher ? { matcher: entry.matcher } : {}),
+ ...(entry.toolNames ? { toolNames: entry.toolNames } : {}),
+ ...(entry.timeoutMs ? { timeoutMs: entry.timeoutMs } : {}),
+ command: entry.command,
+ ...(entry.cwd ? { cwd: entry.cwd } : {})
+ }))
+}
diff --git a/kun/src/hooks/hook-engine.ts b/kun/src/hooks/hook-engine.ts
new file mode 100644
index 00000000..48d4cebd
--- /dev/null
+++ b/kun/src/hooks/hook-engine.ts
@@ -0,0 +1,387 @@
+import { spawn } from 'node:child_process'
+import type { ToolCallLike, ToolHostContext } from '../ports/tool-host.js'
+import { terminateSpawnTree } from '../adapters/tool/builtin-tool-utils.js'
+
+/**
+ * Hook phases. Tool phases run inside the tool host around every tool
+ * call; lifecycle phases run inside the agent loop. `UserPromptSubmit`
+ * may deny the turn or inject extra context; `TurnStart`, `TurnEnd`,
+ * and `PreCompact` are observe-only.
+ */
+export const HOOK_PHASES = [
+ 'PreToolUse',
+ 'PostToolUse',
+ 'UserPromptSubmit',
+ 'TurnStart',
+ 'TurnEnd',
+ 'PreCompact'
+] as const
+
+export type HookPhase = (typeof HOOK_PHASES)[number]
+
+export type ToolHookContext = Pick<
+ ToolHostContext,
+ 'threadId' | 'turnId' | 'workspace' | 'threadMode' | 'approvalPolicy' | 'sandboxMode'
+>
+
+export type ToolHookResultPayload = {
+ output: unknown
+ isError?: boolean
+}
+
+export type HookInvocation =
+ | { phase: 'PreToolUse'; call: ToolCallLike; context: ToolHookContext }
+ | { phase: 'PostToolUse'; call: ToolCallLike; context: ToolHookContext; result: ToolHookResultPayload }
+ | { phase: 'UserPromptSubmit'; threadId: string; turnId: string; prompt: string; workspace?: string }
+ | { phase: 'TurnStart'; threadId: string; turnId: string; prompt: string; workspace?: string }
+ | {
+ phase: 'TurnEnd'
+ threadId: string
+ turnId: string
+ status: 'completed' | 'failed' | 'aborted'
+ error?: string
+ workspace?: string
+ }
+ | { phase: 'PreCompact'; threadId: string; turnId: string; reason: string; mode?: string; workspace?: string }
+
+export type HookResult = {
+ /**
+ * `deny` blocks the action (tool call or turn) with `message` as the
+ * reason. `allow` on PreToolUse additionally skips approval prompting
+ * for this call. Later hooks can still deny an earlier allow.
+ */
+ decision?: 'allow' | 'deny'
+ message?: string
+ /** PreToolUse only: replaces the tool arguments for subsequent hooks and execution. */
+ arguments?: Record
+ /** PostToolUse only: replaces the tool output for subsequent hooks and the model. */
+ output?: unknown
+ isError?: boolean
+ /** UserPromptSubmit only: extra context appended to the turn as a persisted message. */
+ additionalContext?: string
+}
+
+/**
+ * A hook ready to run. Function hooks (`run`) are for embedders that
+ * assemble the runtime programmatically. Command hooks (`command`) are
+ * what `config.json` resolves to: the invocation is written to stdin as
+ * JSON and the result is read from stdout (see `runCommandHook`).
+ */
+export type ResolvedHook =
+ | {
+ phase: HookPhase
+ /** Glob pattern matched against the tool name: `*` wildcard, `|` alternation. */
+ matcher?: string
+ /** Exact tool-name allow-list. Matches when either this or `matcher` matches. */
+ toolNames?: readonly string[]
+ timeoutMs?: number
+ run: (invocation: HookInvocation) => Promise | HookResult | void
+ }
+ | {
+ phase: HookPhase
+ matcher?: string
+ toolNames?: readonly string[]
+ timeoutMs?: number
+ command: string
+ cwd?: string
+ }
+
+export const DEFAULT_HOOK_TIMEOUT_MS = 60_000
+
+/** Exit code a command hook uses to block the action (deny / mark error). */
+export const HOOK_BLOCKING_EXIT_CODE = 2
+
+export type PreToolUseOutcome = {
+ call: ToolCallLike
+ denied?: string
+ /** True when a hook returned `decision: 'allow'` and nothing denied: skips approval. */
+ autoApproved: boolean
+ warnings: string[]
+}
+
+export type PostToolUseOutcome = {
+ output: unknown
+ isError?: boolean
+ warnings: string[]
+}
+
+export type UserPromptSubmitOutcome = {
+ denied?: string
+ additionalContext: string[]
+ warnings: string[]
+}
+
+export type ObserverOutcome = {
+ warnings: string[]
+}
+
+export function hasHooksForPhase(hooks: readonly ResolvedHook[] | undefined, phase: HookPhase): boolean {
+ return (hooks ?? []).some((hook) => hook.phase === phase)
+}
+
+/**
+ * Run PreToolUse hooks in order. Argument rewrites chain: each hook
+ * sees the call as rewritten by the hooks before it. A deny stops the
+ * chain. Hook crashes and timeouts propagate to the caller, which
+ * contains them as a `hook_failed` tool error.
+ */
+export async function runPreToolUseHooks(
+ hooks: readonly ResolvedHook[] | undefined,
+ input: { call: ToolCallLike; context: ToolHookContext }
+): Promise {
+ let call = input.call
+ let autoApproved = false
+ const warnings: string[] = []
+ for (const hook of hooksForTool(hooks, 'PreToolUse', call.toolName)) {
+ const outcome = await executeHook(hook, { phase: 'PreToolUse', call, context: input.context })
+ if (outcome.warning) warnings.push(outcome.warning)
+ const result = outcome.result
+ if (!result) continue
+ if (result.decision === 'deny') {
+ return {
+ call,
+ denied: result.message || 'tool call denied by PreToolUse hook',
+ autoApproved: false,
+ warnings
+ }
+ }
+ if (result.decision === 'allow') autoApproved = true
+ if (result.arguments && typeof result.arguments === 'object') {
+ call = { ...call, arguments: result.arguments }
+ }
+ }
+ return { call, autoApproved, warnings }
+}
+
+/**
+ * Run PostToolUse hooks in order. Output rewrites chain: each hook sees
+ * the result as rewritten by the hooks before it.
+ */
+export async function runPostToolUseHooks(
+ hooks: readonly ResolvedHook[] | undefined,
+ input: { call: ToolCallLike; context: ToolHookContext; result: ToolHookResultPayload }
+): Promise {
+ let current = input.result
+ const warnings: string[] = []
+ for (const hook of hooksForTool(hooks, 'PostToolUse', input.call.toolName)) {
+ const outcome = await executeHook(hook, {
+ phase: 'PostToolUse',
+ call: input.call,
+ context: input.context,
+ result: current
+ })
+ if (outcome.warning) warnings.push(outcome.warning)
+ const result = outcome.result
+ if (!result) continue
+ if ('output' in result) {
+ current = { output: result.output, isError: result.isError ?? current.isError }
+ } else if (result.isError !== undefined) {
+ current = { ...current, isError: result.isError }
+ }
+ }
+ return { output: current.output, isError: current.isError, warnings }
+}
+
+/**
+ * Run UserPromptSubmit hooks. A deny fails the turn before the first
+ * model call. `additionalContext` strings are collected for the loop to
+ * persist as extra turn context. Hook crashes fail open with a warning:
+ * a broken gate must not lock the user out of their own agent.
+ */
+export async function runUserPromptSubmitHooks(
+ hooks: readonly ResolvedHook[] | undefined,
+ input: { threadId: string; turnId: string; prompt: string; workspace?: string }
+): Promise {
+ const additionalContext: string[] = []
+ const warnings: string[] = []
+ for (const hook of hooksForPhase(hooks, 'UserPromptSubmit')) {
+ let outcome: HookExecutionOutcome
+ try {
+ outcome = await executeHook(hook, { phase: 'UserPromptSubmit', ...input })
+ } catch (error) {
+ warnings.push(`UserPromptSubmit hook failed: ${errorMessage(error)}`)
+ continue
+ }
+ if (outcome.warning) warnings.push(outcome.warning)
+ const result = outcome.result
+ if (!result) continue
+ if (result.decision === 'deny') {
+ return {
+ denied: result.message || 'turn denied by UserPromptSubmit hook',
+ additionalContext,
+ warnings
+ }
+ }
+ if (result.additionalContext?.trim()) additionalContext.push(result.additionalContext.trim())
+ if (result.message?.trim() && !result.additionalContext) warnings.push(result.message.trim())
+ }
+ return { additionalContext, warnings }
+}
+
+/**
+ * Run observe-only hooks (TurnStart, TurnEnd, PreCompact). Results are
+ * ignored except messages; crashes and timeouts become warnings.
+ */
+export async function runObserverHooks(
+ hooks: readonly ResolvedHook[] | undefined,
+ invocation: Extract
+): Promise {
+ const warnings: string[] = []
+ for (const hook of hooksForPhase(hooks, invocation.phase)) {
+ try {
+ const outcome = await executeHook(hook, invocation)
+ if (outcome.warning) warnings.push(outcome.warning)
+ else if (outcome.result?.message?.trim()) warnings.push(outcome.result.message.trim())
+ } catch (error) {
+ warnings.push(`${invocation.phase} hook failed: ${errorMessage(error)}`)
+ }
+ }
+ return { warnings }
+}
+
+function hooksForPhase(hooks: readonly ResolvedHook[] | undefined, phase: HookPhase): ResolvedHook[] {
+ return (hooks ?? []).filter((hook) => hook.phase === phase)
+}
+
+function hooksForTool(
+ hooks: readonly ResolvedHook[] | undefined,
+ phase: HookPhase,
+ toolName: string
+): ResolvedHook[] {
+ return hooksForPhase(hooks, phase).filter((hook) => hookMatchesTool(hook, toolName))
+}
+
+export function hookMatchesTool(
+ hook: Pick,
+ toolName: string
+): boolean {
+ const hasNames = Boolean(hook.toolNames && hook.toolNames.length > 0)
+ const hasMatcher = Boolean(hook.matcher)
+ if (!hasNames && !hasMatcher) return true
+ if (hasNames && hook.toolNames!.includes(toolName)) return true
+ if (hasMatcher && compileMatcher(hook.matcher!).test(toolName)) return true
+ return false
+}
+
+const matcherCache = new Map()
+
+/** Compile a glob matcher: `*` matches any run of characters, `|` separates alternatives. */
+function compileMatcher(pattern: string): RegExp {
+ const cached = matcherCache.get(pattern)
+ if (cached) return cached
+ const alternatives = pattern
+ .split('|')
+ .map((part) => part.trim())
+ .filter(Boolean)
+ .map((part) => part.replace(/[.+?^${}()[\]\\]/g, '\\$&').replaceAll('*', '.*'))
+ const regex = new RegExp(`^(?:${alternatives.join('|') || '$.'})$`)
+ matcherCache.set(pattern, regex)
+ return regex
+}
+
+type HookExecutionOutcome = {
+ result?: HookResult
+ /** Non-blocking diagnostic (command hook exited non-zero without blocking). */
+ warning?: string
+}
+
+async function executeHook(hook: ResolvedHook, invocation: HookInvocation): Promise {
+ if ('run' in hook) {
+ const result = await withTimeout(
+ Promise.resolve(hook.run(invocation)),
+ hook.timeoutMs ?? DEFAULT_HOOK_TIMEOUT_MS,
+ `${hook.phase} hook timed out`
+ )
+ return result ? { result } : {}
+ }
+ return runCommandHook(hook, invocation)
+}
+
+/**
+ * Command hook protocol:
+ * - The invocation is written to stdin as a single JSON document.
+ * - Exit 0: stdout is parsed as a JSON `HookResult`. Plain (non-JSON)
+ * stdout becomes `additionalContext` for UserPromptSubmit and
+ * `message` for every other phase.
+ * - Exit 2: blocks. PreToolUse/UserPromptSubmit deny, PostToolUse marks
+ * the result as an error; stderr is the reason.
+ * - Any other exit code: non-blocking warning with stderr attached.
+ * - Timeout kills the spawned process tree and propagates as an error.
+ */
+async function runCommandHook(
+ hook: Extract,
+ invocation: HookInvocation
+): Promise {
+ const payload = JSON.stringify(invocation)
+ const child = spawn(hook.command, {
+ cwd: hook.cwd || workspaceOf(invocation) || undefined,
+ shell: true,
+ stdio: ['pipe', 'pipe', 'pipe']
+ })
+ child.stdin.end(payload)
+ let stdout = ''
+ let stderr = ''
+ child.stdout.on('data', (chunk) => {
+ stdout += String(chunk)
+ })
+ child.stderr.on('data', (chunk) => {
+ stderr += String(chunk)
+ })
+ const exitCode = await withTimeout(
+ new Promise((resolve) => {
+ child.on('close', (code) => resolve(code ?? 0))
+ }),
+ hook.timeoutMs ?? DEFAULT_HOOK_TIMEOUT_MS,
+ `${hook.phase} command hook timed out`
+ ).catch((error) => {
+ terminateSpawnTree(child)
+ throw error
+ })
+ if (exitCode === HOOK_BLOCKING_EXIT_CODE) {
+ const reason = stderr.trim() || `${hook.phase} command hook blocked (exit ${exitCode})`
+ if (invocation.phase === 'PostToolUse') {
+ return { result: { isError: true, message: reason } }
+ }
+ return { result: { decision: 'deny', message: reason } }
+ }
+ if (exitCode !== 0) {
+ return {
+ warning: stderr.trim() || `${hook.phase} command hook exited with ${exitCode}`
+ }
+ }
+ const text = stdout.trim()
+ if (!text) return {}
+ try {
+ return { result: JSON.parse(text) as HookResult }
+ } catch {
+ if (invocation.phase === 'UserPromptSubmit') {
+ return { result: { additionalContext: text } }
+ }
+ return { result: { message: text } }
+ }
+}
+
+function workspaceOf(invocation: HookInvocation): string | undefined {
+ if (invocation.phase === 'PreToolUse' || invocation.phase === 'PostToolUse') {
+ return invocation.context.workspace
+ }
+ return invocation.workspace
+}
+
+function errorMessage(error: unknown): string {
+ return error instanceof Error ? error.message : String(error)
+}
+
+async function withTimeout(promise: Promise, timeoutMs: number, message: string): Promise {
+ let timer: ReturnType | undefined
+ try {
+ return await Promise.race([
+ promise,
+ new Promise((_resolve, reject) => {
+ timer = setTimeout(() => reject(new Error(message)), Math.max(1, timeoutMs))
+ })
+ ])
+ } finally {
+ if (timer) clearTimeout(timer)
+ }
+}
diff --git a/kun/src/hooks/index.ts b/kun/src/hooks/index.ts
new file mode 100644
index 00000000..f522e5ca
--- /dev/null
+++ b/kun/src/hooks/index.ts
@@ -0,0 +1,27 @@
+export {
+ DEFAULT_HOOK_TIMEOUT_MS,
+ HOOK_BLOCKING_EXIT_CODE,
+ HOOK_PHASES,
+ hasHooksForPhase,
+ hookMatchesTool,
+ runObserverHooks,
+ runPostToolUseHooks,
+ runPreToolUseHooks,
+ runUserPromptSubmitHooks,
+ type HookInvocation,
+ type HookPhase,
+ type HookResult,
+ type ObserverOutcome,
+ type PostToolUseOutcome,
+ type PreToolUseOutcome,
+ type ResolvedHook,
+ type ToolHookContext,
+ type UserPromptSubmitOutcome
+} from './hook-engine.js'
+export {
+ HookCommandConfigSchema,
+ HooksConfigSchema,
+ resolveConfiguredHooks,
+ type HookCommandConfig,
+ type HooksConfig
+} from './hook-config.js'
diff --git a/kun/src/index.ts b/kun/src/index.ts
index c4b052e8..f80808fa 100644
--- a/kun/src/index.ts
+++ b/kun/src/index.ts
@@ -1,7 +1,7 @@
/**
* Kun public surface.
*
- * The package exposes a small set of named entrypoints that the DeepSeek-GUI
+ * The package exposes a small set of named entrypoints that the Kun
* main process and CLI use. The submodules contain the actual implementation
* and additional re-exports.
*/
@@ -13,6 +13,7 @@ export * from './ports/index.js'
export * from './adapters/index.js'
export * from './attachments/index.js'
export * from './services/index.js'
+export * from './hooks/index.js'
export * from './loop/index.js'
export * from './memory/index.js'
export * from './cache/index.js'
diff --git a/kun/src/loop/agent-loop.test.ts b/kun/src/loop/agent-loop.test.ts
new file mode 100644
index 00000000..3079c6ea
--- /dev/null
+++ b/kun/src/loop/agent-loop.test.ts
@@ -0,0 +1,128 @@
+import { describe, expect, it } from 'vitest'
+import { resolvePlanModeToolSpecs } from './agent-loop.js'
+import type { ModelToolSpec } from '../ports/model-client.js'
+
+function spec(name: string): ModelToolSpec {
+ return {
+ name,
+ description: `Tool: ${name}`,
+ toolKind: name === 'create_plan' || name === 'write' || name === 'edit'
+ ? 'file_change'
+ : 'tool_call',
+ inputSchema: { type: 'object', properties: {} }
+ }
+}
+
+const ALL_TOOLS: ModelToolSpec[] = [
+ spec('read'),
+ spec('write'),
+ spec('edit'),
+ spec('ls'),
+ spec('find'),
+ spec('grep'),
+ spec('bash'),
+ spec('web_search'),
+ spec('web_fetch'),
+ spec('create_plan')
+]
+
+const READ_ONLY_TOOLS = new Set([
+ 'read', 'ls', 'find', 'grep', 'web_search', 'web_fetch'
+])
+
+describe('resolvePlanModeToolSpecs', () => {
+ it('step 0: read-only tools + create_plan only', () => {
+ const result = resolvePlanModeToolSpecs(ALL_TOOLS, {
+ planTurnActive: true,
+ createPlanSatisfied: false,
+ stepIndex: 0,
+ readOnlyToolNames: READ_ONLY_TOOLS
+ })
+ const names = result.map((t) => t.name)
+ expect(names).toContain('read')
+ expect(names).toContain('ls')
+ expect(names).toContain('find')
+ expect(names).toContain('grep')
+ expect(names).toContain('web_search')
+ expect(names).toContain('web_fetch')
+ expect(names).toContain('create_plan')
+ expect(names).not.toContain('write')
+ expect(names).not.toContain('edit')
+ expect(names).not.toContain('bash')
+ })
+
+ it('step > 0: only create_plan', () => {
+ const result = resolvePlanModeToolSpecs(ALL_TOOLS, {
+ planTurnActive: true,
+ createPlanSatisfied: false,
+ stepIndex: 1,
+ readOnlyToolNames: READ_ONLY_TOOLS
+ })
+ expect(result).toHaveLength(1)
+ expect(result[0].name).toBe('create_plan')
+ })
+
+ it('plan satisfied: returns all tools unchanged (pass-through)', () => {
+ const result = resolvePlanModeToolSpecs(ALL_TOOLS, {
+ planTurnActive: true,
+ createPlanSatisfied: true,
+ stepIndex: 0,
+ readOnlyToolNames: READ_ONLY_TOOLS
+ })
+ expect(result).toBe(ALL_TOOLS)
+ })
+
+ it('not plan-active: returns all tools unchanged (pass-through)', () => {
+ const result = resolvePlanModeToolSpecs(ALL_TOOLS, {
+ planTurnActive: false,
+ createPlanSatisfied: false,
+ stepIndex: 0,
+ readOnlyToolNames: READ_ONLY_TOOLS
+ })
+ expect(result).toBe(ALL_TOOLS)
+ })
+
+ it('uses PLAN_READ_ONLY_TOOL_NAMES default when readOnlyToolNames omitted', () => {
+ const result = resolvePlanModeToolSpecs(ALL_TOOLS, {
+ planTurnActive: true,
+ createPlanSatisfied: false,
+ stepIndex: 0
+ })
+ const names = result.map((t) => t.name)
+ // Default set excludes bash
+ expect(names).not.toContain('bash')
+ expect(names).toContain('create_plan')
+ expect(names).toContain('read')
+ })
+
+ it('uses CREATE_PLAN_TOOL_NAME default when planToolName omitted', () => {
+ const result = resolvePlanModeToolSpecs(ALL_TOOLS, {
+ planTurnActive: true,
+ createPlanSatisfied: false,
+ stepIndex: 1
+ })
+ expect(result).toHaveLength(1)
+ expect(result[0].name).toBe('create_plan')
+ })
+
+ it('custom readOnlyToolNames and planToolName', () => {
+ const customTools: ModelToolSpec[] = [
+ spec('custom-read'),
+ spec('custom-plan'),
+ spec('write'),
+ spec('bash')
+ ]
+ const result = resolvePlanModeToolSpecs(customTools, {
+ planTurnActive: true,
+ createPlanSatisfied: false,
+ stepIndex: 0,
+ readOnlyToolNames: new Set(['custom-read']),
+ planToolName: 'custom-plan'
+ })
+ const names = result.map((t) => t.name)
+ expect(names).toContain('custom-read')
+ expect(names).toContain('custom-plan')
+ expect(names).not.toContain('write')
+ expect(names).not.toContain('bash')
+ })
+})
diff --git a/kun/src/loop/agent-loop.ts b/kun/src/loop/agent-loop.ts
index 394ee218..aa784f83 100644
--- a/kun/src/loop/agent-loop.ts
+++ b/kun/src/loop/agent-loop.ts
@@ -8,7 +8,7 @@ import type {
ToolProviderKind
} from '../ports/tool-host.js'
import type { ModelCapabilityMetadata } from '../contracts/capabilities.js'
-import { DEFAULT_APPROVAL_POLICY } from '../contracts/policy.js'
+import { DEFAULT_APPROVAL_POLICY, DEFAULT_SANDBOX_MODE } from '../contracts/policy.js'
import type { ThreadStore } from '../ports/thread-store.js'
import type { SessionStore } from '../ports/session-store.js'
import type { ApprovalGate } from '../ports/approval-gate.js'
@@ -17,6 +17,7 @@ import type { UsageService } from '../services/usage-service.js'
import type { TurnService } from '../services/turn-service.js'
import type { RuntimeEventRecorder } from '../services/runtime-event-recorder.js'
import type { PipelineStage } from '../contracts/events.js'
+import type { RuntimeErrorSeverity } from '../contracts/errors.js'
import type { IdGenerator } from '../ports/id-generator.js'
import type { ImmutablePrefix } from '../cache/immutable-prefix.js'
import { ContextCompactor } from './context-compactor.js'
@@ -50,6 +51,12 @@ import type { SkillRuntime } from '../skills/skill-runtime.js'
import type { AttachmentContent, AttachmentStore } from '../attachments/attachment-store.js'
import type { ModelInputAttachment, ModelTextAttachmentFallback } from '../ports/model-client.js'
import type { MemoryStore } from '../memory/memory-store.js'
+import {
+ hasHooksForPhase,
+ runObserverHooks,
+ runUserPromptSubmitHooks,
+ type ResolvedHook
+} from '../hooks/hook-engine.js'
import {
applyTokenEconomyToRequest,
normalizeTokenEconomyConfig,
@@ -57,7 +64,6 @@ import {
} from './token-economy.js'
import { applyRequestHistoryHygiene } from './request-history-hygiene.js'
import { estimateModelRequestInputTokens } from './model-request-estimator.js'
-import { estimateDeepseekInputTokenCost } from '../adapters/model/deepseek-pricing.js'
import {
recentAutoRouterContext,
resolveAutoModelRoute,
@@ -73,10 +79,26 @@ import { shellRuntimeInstruction } from '../adapters/tool/builtin-tool-utils.js'
const PARALLEL_READ_ONLY_TOOL_NAMES = new Set(['read', 'grep', 'find', 'ls'])
const MAX_PARALLEL_TOOL_CALLS = 3
+const MAX_TURN_MODEL_STEPS = 64
+const MAX_TOOL_CATALOG_SNAPSHOTS = 256
const DEFAULT_COMPACTION_SUMMARY_TIMEOUT_MS = 15_000
const DEFAULT_COMPACTION_SUMMARY_MAX_TOKENS = 1_200
const DEFAULT_COMPACTION_SUMMARY_INPUT_MAX_BYTES = 96 * 1024
+type TurnFailure = {
+ error: string
+ code?: string
+ details?: unknown
+ severity?: RuntimeErrorSeverity
+}
+
+type ModelClientDiagnostics = {
+ provider?: string
+ providerBaseUrl?: string
+ endpointFormat?: string
+ configuredModel?: string
+}
+
const PIPELINE_STAGE_LABELS: Record = {
setup: 'Setup',
pre_start: 'Pre-Start',
@@ -116,14 +138,55 @@ type ToolCatalogDrift =
*/
export const PLAN_MODE_INSTRUCTION = [
'You are in Plan mode.',
- 'Investigate the task first using read-only tools and commands: prefer `read`, `grep`, `find`, `ls`, and safe read-only shell commands appropriate for the host platform via `bash` to gather the facts you need.',
- 'Do NOT modify project files, apply edits, or run mutating commands in this mode.',
+ 'Investigate the task first using read-only tools: prefer `read`, `grep`, `find`, and `ls` to gather the facts you need.',
+ 'Do NOT modify project files, apply edits, run shell commands, or run mutating commands in this mode.',
'When you understand the task well enough, call the `create_plan` tool to save a complete implementation plan as Markdown.',
'Use `operation: "draft"` for the first plan, and `operation: "refine"` when revising an existing plan; you may call `create_plan` multiple times as the plan evolves.',
'Write concrete, actionable steps (summary, implementation steps, tests, risks) rather than vague intentions.',
'After saving, give the user a short summary of the plan and what to review.'
].join('\n')
+/** Read-only tools allowed during the investigation phase of a Plan-mode
+ * turn (step 0, before `create_plan` has been called). Matches the
+ * PLAN_MODE_INSTRUCTION guidance. `bash` is intentionally excluded —
+ * it can execute arbitrary commands and its policy is `on-request` which
+ * auto-approves under `approvalPolicy: auto`. */
+const PLAN_READ_ONLY_TOOL_NAMES = new Set([
+ 'read',
+ 'ls',
+ 'find',
+ 'grep',
+ 'web_search',
+ 'web_fetch'
+])
+
+/**
+ * Resolve the tool list for a Plan-mode turn step. Extracted as a pure
+ * function so the behaviour can be unit-tested without spinning up the
+ * full agent loop.
+ *
+ * - Not plan-active or plan already satisfied → pass through unchanged.
+ * - Step 0 (investigation): read-only tools + create_plan.
+ * - Step > 0 (must produce plan): only create_plan.
+ */
+export function resolvePlanModeToolSpecs(
+ toolSpecs: ModelToolSpec[],
+ options: {
+ planTurnActive: boolean
+ createPlanSatisfied: boolean
+ stepIndex: number
+ readOnlyToolNames?: ReadonlySet
+ planToolName?: string
+ }
+): ModelToolSpec[] {
+ if (!options.planTurnActive || options.createPlanSatisfied) return toolSpecs
+ const readOnly = options.readOnlyToolNames ?? PLAN_READ_ONLY_TOOL_NAMES
+ const planTool = options.planToolName ?? CREATE_PLAN_TOOL_NAME
+ return options.stepIndex === 0
+ ? toolSpecs.filter((tool) => tool.name === planTool || readOnly.has(tool.name))
+ : toolSpecs.filter((tool) => tool.name === planTool)
+}
+
function goalContinuationInstruction(goal: ThreadGoal | undefined): string | null {
if (!goal || goal.status !== 'active') return null
const tokenBudget = goal.tokenBudget == null ? 'none' : String(goal.tokenBudget)
@@ -162,6 +225,63 @@ function goalContinuationInstruction(goal: ThreadGoal | undefined): string | nul
].join('\n')
}
+const GOAL_NO_TOOL_REPEAT_SIMILARITY = 0.85
+const GOAL_NO_TOOL_REPEAT_MIN_LENGTH = 12
+const GOAL_NO_TOOL_REPEAT_MAX_RECOVERY_STEPS = 3
+
+function goalNoToolRecoveryInstruction(recoveryStep: number): string {
+ return [
+ 'Goal continuation recovery:',
+ `- The active goal continuation has produced near-identical no-tool replies ${recoveryStep} time(s).`,
+ '- Do not repeat the same status update, promise, or summary again.',
+ `- If the objective is actually achieved, call ${UPDATE_GOAL_TOOL_NAME} with status "complete" after verifying the current state.`,
+ `- If the strict blocked audit is satisfied, call ${UPDATE_GOAL_TOOL_NAME} with status "blocked".`,
+ '- Otherwise, continue with new substantive work or call an available tool to make concrete progress.'
+ ].join('\n')
+}
+
+/**
+ * Goal continuation re-prompts the model whenever it stops without tool
+ * calls, which can spin forever on "I will do X next" filler that never
+ * acts. Exact-equality checks miss this: the filler usually varies in
+ * punctuation, casing, or word order between rounds, so the guard
+ * normalizes both texts and falls back to character-bigram similarity.
+ */
+function isRepeatedNoToolAssistantText(previous: string | undefined, current: string): boolean {
+ if (previous === undefined) return false
+ const a = normalizeNoToolAssistantText(previous)
+ const b = normalizeNoToolAssistantText(current)
+ if (a === b) return true
+ if (a.length < GOAL_NO_TOOL_REPEAT_MIN_LENGTH || b.length < GOAL_NO_TOOL_REPEAT_MIN_LENGTH) {
+ return false
+ }
+ return charBigramDiceSimilarity(a, b) >= GOAL_NO_TOOL_REPEAT_SIMILARITY
+}
+
+function normalizeNoToolAssistantText(text: string): string {
+ return text.toLowerCase().replace(/[\s\p{P}\p{S}]+/gu, '')
+}
+
+function charBigramDiceSimilarity(a: string, b: string): number {
+ const bigramsA = charBigramCounts(a)
+ const bigramsB = charBigramCounts(b)
+ let shared = 0
+ for (const [bigram, countA] of bigramsA) {
+ const countB = bigramsB.get(bigram)
+ if (countB) shared += Math.min(countA, countB)
+ }
+ return (2 * shared) / (a.length - 1 + b.length - 1)
+}
+
+function charBigramCounts(text: string): Map {
+ const counts = new Map()
+ for (let index = 0; index < text.length - 1; index += 1) {
+ const bigram = text.slice(index, index + 2)
+ counts.set(bigram, (counts.get(bigram) ?? 0) + 1)
+ }
+ return counts
+}
+
function todoContinuationInstruction(todos: ThreadTodoList | undefined): string | null {
const items = todos?.items ?? []
if (items.length === 0) return null
@@ -207,6 +327,19 @@ function latestUserMessageText(items: readonly TurnItem[], turnId: string): stri
return ''
}
+/**
+ * Injected when the turn runs without an interactive user (IM bridges,
+ * headless runs). The user-input tools are also withheld from the tool
+ * catalog; this line keeps the model from promising a GUI dialog that
+ * nobody can answer.
+ */
+function userInputUnavailableInstruction(): string {
+ return [
+ 'Interactive user input is unavailable for this turn: the user is on a remote channel (IM) and cannot answer GUI prompts.',
+ 'Do not ask for structured input or wait for confirmation. If information is missing, state your assumption and continue, or finish your reply with the question so the user can answer in their next message.'
+ ].join(' ')
+}
+
function allowedToolNamesWithGuiStateTools(
allowedToolNames: readonly string[] | undefined,
activeGoal: boolean
@@ -249,6 +382,11 @@ export type AgentLoopOptions = {
toolArgumentRepair?: {
maxStringBytes?: number
}
+ /**
+ * Lifecycle hooks (UserPromptSubmit, TurnStart, TurnEnd, PreCompact).
+ * Tool phases are handled by the tool host; the loop ignores them.
+ */
+ hooks?: readonly ResolvedHook[]
/**
* Optional fallback GUI plan context for embedders that run the loop
* without persisted turn metadata. Normal serve mode reads GUI plan
@@ -288,6 +426,9 @@ export class AgentLoop {
private readonly promptTokenPressure = new Map()
private readonly toolStormBreakers = new Map()
private readonly toolCatalogSnapshots = new Map()
+ private readonly lastNoToolTextByTurn = new Map()
+ private readonly goalNoToolRecoveryStepsByTurn = new Map()
+ private readonly turnFailures = new Map()
constructor(opts: AgentLoopOptions) {
this.opts = opts
@@ -309,6 +450,8 @@ export class AgentLoop {
return 'aborted'
}
let goalTimer: GoalElapsedTimer | null = null
+ let finalStatus: 'completed' | 'failed' | 'aborted' | undefined
+ let finalError: string | undefined
try {
goalTimer = await this.startGoalElapsedTimer(threadId)
await this.recordPipelineStage(threadId, turnId, 'setup')
@@ -316,10 +459,44 @@ export class AgentLoop {
this.toolStormBreakers.set(turnId, new ToolStormBreaker(this.opts.toolStorm))
}
await this.recordPipelineStage(threadId, turnId, 'pre_start')
+ const denial = await this.runTurnStartLifecycleHooks(threadId, turnId)
+ if (denial) {
+ await this.opts.events.record({
+ kind: 'error',
+ threadId,
+ turnId,
+ message: denial,
+ code: 'hook_denied',
+ severity: 'error'
+ })
+ await this.opts.turns.applyItem(
+ threadId,
+ makeErrorItem({
+ id: this.opts.ids.next('item_error'),
+ turnId,
+ threadId,
+ message: denial,
+ code: 'hook_denied',
+ severity: 'error'
+ })
+ )
+ await this.opts.turns.finishTurn({ threadId, turnId, status: 'failed', error: denial })
+ finalStatus = 'failed'
+ finalError = denial
+ return 'failed'
+ }
await this.drainSteering(threadId, turnId, signal)
await this.recordPipelineStage(threadId, turnId, 'post_start')
const status = await this.loop(threadId, turnId, signal)
- await this.opts.turns.finishTurn({ threadId, turnId, status })
+ const failure = status === 'failed' ? this.turnFailures.get(turnId) : undefined
+ await this.opts.turns.finishTurn({
+ threadId,
+ turnId,
+ status,
+ ...(failure ?? {})
+ })
+ finalStatus = status
+ finalError = failure?.error
return status
} catch (error) {
const raw = error instanceof Error ? error.message : String(error)
@@ -343,11 +520,101 @@ export class AgentLoop {
stack ? `stack=${stack}` : ''
].filter(Boolean).join(' ')
await this.failTurn(threadId, turnId, message)
+ finalStatus = 'failed'
+ finalError = message
return 'failed'
} finally {
await this.finishGoalElapsedTimer(threadId, goalTimer)
this.autoModelRoutes.delete(autoModelRouteKey(threadId, turnId))
this.toolStormBreakers.delete(turnId)
+ this.lastNoToolTextByTurn.delete(turnId)
+ this.goalNoToolRecoveryStepsByTurn.delete(turnId)
+ this.turnFailures.delete(turnId)
+ await this.runTurnEndHooks(threadId, turnId, finalStatus ?? 'failed', finalError)
+ }
+ }
+
+ /**
+ * TurnStart (observe-only) then UserPromptSubmit hooks. Returns the
+ * denial message when a UserPromptSubmit hook blocks the turn.
+ * Accepted `additionalContext` is persisted as an extra user message
+ * so replays and the prompt cache see a stable history.
+ */
+ private async runTurnStartLifecycleHooks(threadId: string, turnId: string): Promise {
+ const hooks = this.opts.hooks
+ const hasStart = hasHooksForPhase(hooks, 'TurnStart')
+ const hasSubmit = hasHooksForPhase(hooks, 'UserPromptSubmit')
+ if (!hasStart && !hasSubmit) return undefined
+ const turn = await this.opts.turns.getTurn(threadId, turnId)
+ const thread = await this.opts.threadStore.get(threadId)
+ const payload = {
+ threadId,
+ turnId,
+ prompt: turn?.prompt ?? '',
+ ...(thread?.workspace ? { workspace: thread.workspace } : {})
+ }
+ if (hasStart) {
+ const started = await runObserverHooks(hooks, { phase: 'TurnStart', ...payload })
+ await this.recordHookWarnings(threadId, turnId, started.warnings)
+ }
+ if (!hasSubmit) return undefined
+ const submit = await runUserPromptSubmitHooks(hooks, payload)
+ await this.recordHookWarnings(threadId, turnId, submit.warnings)
+ if (submit.denied) return submit.denied
+ if (submit.additionalContext.length > 0) {
+ const now = this.opts.nowIso()
+ const item: TurnItem = {
+ id: this.opts.ids.next('item_hook'),
+ turnId,
+ threadId,
+ role: 'user',
+ status: 'completed',
+ createdAt: now,
+ finishedAt: now,
+ kind: 'user_message',
+ text: `\n${submit.additionalContext.join('\n\n')}\n `
+ }
+ await this.opts.turns.applyItem(threadId, item)
+ }
+ return undefined
+ }
+
+ /** Observe-only TurnEnd hooks; run after the turn is finalized and must never throw. */
+ private async runTurnEndHooks(
+ threadId: string,
+ turnId: string,
+ status: 'completed' | 'failed' | 'aborted',
+ error?: string
+ ): Promise {
+ if (!hasHooksForPhase(this.opts.hooks, 'TurnEnd')) return
+ try {
+ const outcome = await runObserverHooks(this.opts.hooks, {
+ phase: 'TurnEnd',
+ threadId,
+ turnId,
+ status,
+ ...(error ? { error } : {})
+ })
+ await this.recordHookWarnings(threadId, turnId, outcome.warnings)
+ } catch {
+ // Observe-only: a TurnEnd hook must never break turn cleanup.
+ }
+ }
+
+ private async recordHookWarnings(
+ threadId: string,
+ turnId: string,
+ warnings: readonly string[]
+ ): Promise {
+ for (const message of warnings) {
+ await this.opts.events.record({
+ kind: 'error',
+ threadId,
+ turnId,
+ message,
+ code: 'hook_warning',
+ severity: 'warning'
+ })
}
}
@@ -355,6 +622,27 @@ export class AgentLoop {
await this.opts.turns.finishTurn({ threadId, turnId, status: 'failed', error: message })
}
+ private rememberTurnFailure(turnId: string, failure: TurnFailure): void {
+ if (!failure.error.trim()) return
+ this.turnFailures.set(turnId, failure)
+ }
+
+ private modelClientDiagnostics(): ModelClientDiagnostics {
+ const client = this.opts.model as ModelClient & {
+ config?: {
+ baseUrl?: string
+ endpointFormat?: string
+ model?: string
+ }
+ }
+ return {
+ provider: client.provider,
+ ...(client.config?.baseUrl ? { providerBaseUrl: sanitizeProviderBaseUrl(client.config.baseUrl) } : {}),
+ ...(client.config?.endpointFormat ? { endpointFormat: client.config.endpointFormat } : {}),
+ ...(client.config?.model ? { configuredModel: client.config.model } : {})
+ }
+ }
+
private nowMs(): number {
return this.opts.nowMs?.() ?? Date.now()
}
@@ -427,6 +715,30 @@ export class AgentLoop {
): Promise<'completed' | 'failed' | 'aborted'> {
for (let step = 0; ; step += 1) {
if (signal.aborted) return 'aborted'
+ if (step >= MAX_TURN_MODEL_STEPS) {
+ const message =
+ `Turn stopped after ${MAX_TURN_MODEL_STEPS} model steps without reaching a final response.`
+ await this.opts.events.record({
+ kind: 'error',
+ threadId,
+ turnId,
+ message,
+ code: 'turn_step_limit_exceeded',
+ severity: 'error'
+ })
+ await this.opts.turns.applyItem(
+ threadId,
+ makeErrorItem({
+ id: this.opts.ids.next('item_error'),
+ turnId,
+ threadId,
+ message,
+ code: 'turn_step_limit_exceeded',
+ severity: 'error'
+ })
+ )
+ return 'failed'
+ }
await this.drainSteering(threadId, turnId, signal)
const stepResult = await this.modelStep(threadId, turnId, signal, step)
if (stepResult === 'stop') return 'completed'
@@ -455,9 +767,16 @@ export class AgentLoop {
const budgetGate = await this.checkBudgetGate(thread, threadId, turnId)
if (budgetGate === 'blocked') return 'stop'
const loadedItems = await this.opts.sessionStore.loadItems(threadId)
- const healed = healLoadedHistoryItems(loadedItems)
- if (healed.changed) {
- await this.opts.sessionStore.rewriteItems(threadId, healed.items)
+ // Heal (and possibly rewrite) on-disk history once per turn: within a
+ // turn the loop only appends well-formed items, and healing's deep
+ // change detection costs two full-history stringifies per call.
+ let historyItems: TurnItem[] = loadedItems
+ if (stepIndex === 0) {
+ const healed = healLoadedHistoryItems(loadedItems)
+ if (healed.changed) {
+ await this.opts.sessionStore.rewriteItems(threadId, healed.items)
+ }
+ historyItems = healed.items
}
await this.recordPipelineStage(
threadId,
@@ -466,7 +785,7 @@ export class AgentLoop {
prefixVolatilityStageDetails(detectVolatilePrefixContent(this.opts.prefix))
)
if (stepIndex > 0) {
- const toolResultCount = healed.items.filter(
+ const toolResultCount = historyItems.filter(
(item) => item.turnId === turnId && item.kind === 'tool_result'
).length
await this.opts.events.record({
@@ -478,9 +797,10 @@ export class AgentLoop {
})
}
const items = repairModelHistoryItems(
- effectiveHistoryAfterLatestCompaction(healed.items)
+ effectiveHistoryAfterLatestCompaction(historyItems)
)
const approvalPolicy = normalizeApprovalPolicy(thread?.approvalPolicy)
+ const sandboxMode = normalizeSandboxMode(thread?.sandboxMode)
// Per-turn mode overrides the thread mode so the GUI can toggle
// Plan/agent (and run Build as agent) without recreating the thread.
const effectiveMode = turn?.mode ?? thread?.mode
@@ -522,11 +842,20 @@ export class AgentLoop {
const activeGoalInstruction = planTurnActive
? null
: goalContinuationInstruction(thread?.goal)
- const activeTodoInstruction = todoContinuationInstruction(thread?.todos)
+ const goalRecoveryInstruction = activeGoalInstruction
+ ? goalNoToolRecoveryInstruction(this.goalNoToolRecoveryStepsByTurn.get(turnId) ?? 0)
+ : null
+ const activeTodoInstruction = planTurnActive
+ ? null
+ : todoContinuationInstruction(thread?.todos)
const allowedToolNames = allowedToolNamesWithGuiStateTools(
skillResolution.allowedToolNames,
activeGoalInstruction !== null
)
+ // IM/headless turns run without the user-input gate; the tools key
+ // their advertisement off `awaitUserInput`, so omitting it hides
+ // `user_input`/`request_user_input` and rejects stray calls.
+ const userInputDisabled = turn?.disableUserInput === true
const toolContext: ToolHostContext = {
threadId,
turnId,
@@ -539,9 +868,12 @@ export class AgentLoop {
delegationPolicy: { enabled: false },
...(allowedToolNames ? { allowedToolNames } : {}),
approvalPolicy,
+ sandboxMode,
abortSignal: signal,
awaitApproval: async () => 'allow',
- awaitUserInput: (input) => this.awaitUserInput(threadId, turnId, input, signal)
+ ...(userInputDisabled
+ ? {}
+ : { awaitUserInput: (input) => this.awaitUserInput(threadId, turnId, input, signal) })
}
const tools = await this.opts.toolHost.listTools(toolContext)
const toolSpecs: ModelToolSpec[] = tools
@@ -556,6 +888,7 @@ export class AgentLoop {
model: modelCapabilities.id,
activeSkillIds: skillResolution.activeSkillIds,
allowedToolNames,
+ userInputDisabled,
fingerprint: toolCatalog.fingerprint,
toolNames: toolCatalog.toolNames,
toolHashes: toolCatalog.toolHashes
@@ -587,7 +920,7 @@ export class AgentLoop {
if (toolCatalogDrift.kind === 'breaking') return 'stop'
const toolKinds = new Map(toolSpecs.map((tool) => [tool.name, tool.toolKind]))
const createPlanSatisfied = planTurnActive
- ? hasSuccessfulCreatePlanResult(healed.items, turnId)
+ ? hasSuccessfulCreatePlanResult(historyItems, turnId)
: false
const requiredToolName =
planTurnActive &&
@@ -595,10 +928,11 @@ export class AgentLoop {
toolSpecs.some((tool) => tool.name === CREATE_PLAN_TOOL_NAME)
? CREATE_PLAN_TOOL_NAME
: undefined
- // Final step of a plan turn that still owes a plan. Offer ONLY create_plan
- // (this DeepSeek-compatible provider ignores a forced tool_choice, so we
- // remove the investigation tools instead) so the model can only save the
- // plan or answer with plan text that the create_plan fallback materializes.
+ const effectiveToolSpecs = resolvePlanModeToolSpecs(toolSpecs, {
+ planTurnActive,
+ createPlanSatisfied,
+ stepIndex
+ })
const history = await this.compactIfNeeded(items, model, signal, { threadId, turnId })
if (signal.aborted) return 'aborted'
await this.recordPipelineStage(threadId, turnId, 'input_compressed', {
@@ -606,10 +940,14 @@ export class AgentLoop {
})
const contextInstructions = [
...(activeGoalInstruction ? [activeGoalInstruction] : []),
+ ...(goalRecoveryInstruction && (this.goalNoToolRecoveryStepsByTurn.get(turnId) ?? 0) > 0
+ ? [goalRecoveryInstruction]
+ : []),
...(activeTodoInstruction ? [activeTodoInstruction] : []),
...memoryInstructions(memories),
...skillResolution.instructions,
- ...(toolSpecs.some((tool) => tool.name === 'bash') ? [shellRuntimeInstruction()] : []),
+ ...(userInputDisabled ? [userInputUnavailableInstruction()] : []),
+ ...(effectiveToolSpecs.some((tool) => tool.name === 'bash') ? [shellRuntimeInstruction()] : []),
...(toolCatalogDriftMessage ? [toolCatalogDriftMessage] : [])
]
await this.recordPipelineStage(threadId, turnId, 'input_remembered', {
@@ -628,7 +966,7 @@ export class AgentLoop {
history,
...(attachments.imageAttachments.length ? { attachments: attachments.imageAttachments } : {}),
...(attachments.textFallbacks.length ? { attachmentTextFallbacks: attachments.textFallbacks } : {}),
- tools: toolSpecs,
+ tools: effectiveToolSpecs,
...(requiredToolName ? { requiredToolName } : {}),
...(modelRoute.reasoningEffort ? { reasoningEffort: modelRoute.reasoningEffort } : {}),
abortSignal: signal
@@ -656,8 +994,10 @@ export class AgentLoop {
let reasoningItemId = ''
const completedToolCalls: ToolCallLike[] = []
let stopReason: 'stop' | 'tool_calls' | 'length' | 'error' = 'stop'
+ const modelClientDiagnostics = this.modelClientDiagnostics()
await this.recordPipelineStage(threadId, turnId, 'pre_send', {
model: request.model,
+ ...modelClientDiagnostics,
historyItems: request.history.length,
toolCount: request.tools.length,
...(request.requiredToolName ? { requiredToolName: request.requiredToolName } : {}),
@@ -669,7 +1009,8 @@ export class AgentLoop {
})
})
await this.recordPipelineStage(threadId, turnId, 'post_send', {
- model: request.model
+ model: request.model,
+ ...modelClientDiagnostics
})
for await (const chunk of this.opts.model.stream(request)) {
if (signal.aborted) return 'aborted'
@@ -767,15 +1108,21 @@ export class AgentLoop {
break
}
case 'completed':
- stopReason = chunk.stopReason
+ if (stopReason !== 'error') stopReason = chunk.stopReason
break
case 'error':
+ this.rememberTurnFailure(turnId, {
+ error: chunk.message,
+ ...(chunk.code ? { code: chunk.code } : {}),
+ severity: 'error'
+ })
await this.opts.events.record({
kind: 'error',
threadId,
turnId,
message: chunk.message,
- code: chunk.code
+ code: chunk.code,
+ severity: 'error'
})
stopReason = 'error'
break
@@ -822,7 +1169,7 @@ export class AgentLoop {
const provider = toolProviderMetadata.get(CREATE_PLAN_TOOL_NAME)
const toolKind = toolKinds.get(CREATE_PLAN_TOOL_NAME)
const sourceRequest = activePlanContext?.sourceRequest ||
- latestUserMessageText(healed.items, turnId) ||
+ latestUserMessageText(historyItems, turnId) ||
turn?.prompt ||
''
const argumentsForFallback: Record = activePlanContext
@@ -881,9 +1228,11 @@ export class AgentLoop {
allowedToolNames,
toolProviderKinds: new Map(tools.map((tool) => [tool.name, tool.providerKind])),
approvalPolicy,
+ sandboxMode,
signal
})
if (dispatched === 'aborted') return 'aborted'
+ if (dispatched === 'all_suppressed') return 'stop'
return 'continue'
}
const message = `Model did not call the required \`${request.requiredToolName}\` tool for this GUI plan turn.`
@@ -906,9 +1255,50 @@ export class AgentLoop {
)
return 'failed'
}
- if (stopReason === 'stop' && activeGoalInstruction) return 'continue'
+ if (stopReason === 'stop' && activeGoalInstruction) {
+ const previousText = this.lastNoToolTextByTurn.get(turnId)
+ if (isRepeatedNoToolAssistantText(previousText, textAccumulator.value)) {
+ const recoverySteps = (this.goalNoToolRecoveryStepsByTurn.get(turnId) ?? 0) + 1
+ if (recoverySteps <= GOAL_NO_TOOL_REPEAT_MAX_RECOVERY_STEPS) {
+ this.goalNoToolRecoveryStepsByTurn.set(turnId, recoverySteps)
+ this.lastNoToolTextByTurn.set(turnId, textAccumulator.value)
+ return 'continue'
+ }
+ const message =
+ 'Goal continuation stopped: the model kept repeating near-identical replies without calling tools or updating the goal.'
+ await this.opts.turns.applyItem(
+ threadId,
+ makeErrorItem({
+ id: this.opts.ids.next('item_error'),
+ turnId,
+ threadId,
+ message,
+ code: 'goal_repetition_stop',
+ severity: 'warning'
+ })
+ )
+ await this.opts.events.record({
+ kind: 'error',
+ threadId,
+ turnId,
+ message,
+ code: 'goal_repetition_stop',
+ severity: 'warning'
+ })
+ this.lastNoToolTextByTurn.delete(turnId)
+ this.goalNoToolRecoveryStepsByTurn.delete(turnId)
+ return 'stop'
+ }
+ this.goalNoToolRecoveryStepsByTurn.delete(turnId)
+ this.lastNoToolTextByTurn.set(turnId, textAccumulator.value)
+ return 'continue'
+ }
return 'stop'
}
+ // Tool calls mean the turn is making progress again; reset the no-tool
+ // repetition window so unrelated later status texts are not compared.
+ this.lastNoToolTextByTurn.delete(turnId)
+ this.goalNoToolRecoveryStepsByTurn.delete(turnId)
const dispatched = await this.dispatchToolCalls({
calls: completedToolCalls,
threadId,
@@ -919,11 +1309,14 @@ export class AgentLoop {
modelCapabilities,
activeSkillIds: skillResolution.activeSkillIds,
allowedToolNames,
+ userInputDisabled,
toolProviderKinds: new Map(tools.map((tool) => [tool.name, tool.providerKind])),
approvalPolicy,
+ sandboxMode,
signal
})
if (dispatched === 'aborted') return 'aborted'
+ if (dispatched === 'all_suppressed') return 'stop'
return 'continue'
}
@@ -937,12 +1330,15 @@ export class AgentLoop {
modelCapabilities: ModelCapabilityMetadata
activeSkillIds: readonly string[]
allowedToolNames?: readonly string[]
+ userInputDisabled?: boolean
toolProviderKinds: ReadonlyMap
approvalPolicy: ToolHostContext['approvalPolicy']
+ sandboxMode: NonNullable
signal: AbortSignal
- }): Promise<'continue' | 'aborted'> {
+ }): Promise<'continue' | 'aborted' | 'all_suppressed'> {
const context = this.createToolContext(input)
let index = 0
+ let executedAny = false
while (index < input.calls.length) {
if (input.signal.aborted) return 'aborted'
@@ -963,12 +1359,13 @@ export class AgentLoop {
}
if (!this.isParallelSafeToolCall(call, input.approvalPolicy, input.toolProviderKinds)) {
- const result = await this.executeToolCall({
+ const result = await this.executeToolCallSafely({
threadId: input.threadId,
turnId: input.turnId,
call,
context
})
+ executedAny = true
await this.persistToolCallResult(input.threadId, input.turnId, call, result)
index += 1
continue
@@ -996,7 +1393,7 @@ export class AgentLoop {
const settled = await Promise.allSettled(
batch.map((entry) =>
- this.executeToolCall({
+ this.executeToolCallSafely({
threadId: input.threadId,
turnId: input.turnId,
call: entry,
@@ -1004,6 +1401,7 @@ export class AgentLoop {
})
)
)
+ executedAny = true
for (let batchIndex = 0; batchIndex < batch.length; batchIndex += 1) {
const result = settled[batchIndex]
const batchCall = batch[batchIndex]
@@ -1022,7 +1420,7 @@ export class AgentLoop {
}
}
- return 'continue'
+ return executedAny ? 'continue' : 'all_suppressed'
}
private isParallelSafeToolCall(
@@ -1045,7 +1443,9 @@ export class AgentLoop {
modelCapabilities: ModelCapabilityMetadata
activeSkillIds: readonly string[]
allowedToolNames?: readonly string[]
+ userInputDisabled?: boolean
approvalPolicy: ToolHostContext['approvalPolicy']
+ sandboxMode: NonNullable
signal: AbortSignal
}): ToolHostContext {
return {
@@ -1060,6 +1460,7 @@ export class AgentLoop {
delegationPolicy: { enabled: false },
...(input.allowedToolNames ? { allowedToolNames: input.allowedToolNames } : {}),
approvalPolicy: input.approvalPolicy,
+ sandboxMode: input.sandboxMode,
abortSignal: input.signal,
awaitApproval: async (approval) => {
await this.opts.events.record({
@@ -1069,12 +1470,18 @@ export class AgentLoop {
approvalId: approval.id,
toolName: approval.toolName,
status: 'pending',
+ approvalPolicy: input.approvalPolicy,
+ sandboxMode: input.sandboxMode,
summary: approval.summary
})
return this.opts.approvalGate.request(approval)
},
- awaitUserInput: (inputRequest) =>
- this.awaitUserInput(input.threadId, input.turnId, inputRequest, input.signal)
+ ...(input.userInputDisabled
+ ? {}
+ : {
+ awaitUserInput: (inputRequest) =>
+ this.awaitUserInput(input.threadId, input.turnId, inputRequest, input.signal)
+ })
}
}
@@ -1138,6 +1545,51 @@ export class AgentLoop {
)
}
+ /**
+ * A crashing tool handler must surface as an error tool_result the
+ * model can react to, not kill the whole turn. Only turn aborts are
+ * allowed to propagate.
+ */
+ private async executeToolCallSafely(input: {
+ threadId: string
+ turnId: string
+ call: ToolCallLike
+ context: ToolHostContext
+ }): Promise {
+ try {
+ return await this.executeToolCall(input)
+ } catch (error) {
+ if (input.context.abortSignal.aborted) throw error
+ const message = error instanceof Error ? error.message : String(error)
+ await this.opts.events.record({
+ kind: 'error',
+ threadId: input.threadId,
+ turnId: input.turnId,
+ message: `Tool call ${input.call.toolName} failed: ${message}`,
+ code: 'tool_execution_failed',
+ severity: 'warning'
+ })
+ return {
+ item: makeToolResultItem({
+ id: `item_${input.call.callId}`,
+ turnId: input.turnId,
+ threadId: input.threadId,
+ callId: input.call.callId,
+ toolName: input.call.toolName,
+ toolKind: input.call.toolKind ?? 'tool_call',
+ output: {
+ code: 'tool_execution_failed',
+ error: message,
+ guidance:
+ 'The tool crashed while executing. Adjust the arguments or take a different approach instead of retrying the identical call.'
+ },
+ isError: true
+ }),
+ approved: false
+ }
+ }
+ }
+
private isRecoverableToolDispatchError(error: unknown): boolean {
const message = error instanceof Error ? error.message : String(error)
return (
@@ -1344,6 +1796,16 @@ export class AgentLoop {
if (!plan) return items
const threadId = context.threadId
const turnId = context.turnId
+ if (hasHooksForPhase(this.opts.hooks, 'PreCompact')) {
+ const observed = await runObserverHooks(this.opts.hooks, {
+ phase: 'PreCompact',
+ threadId,
+ turnId,
+ reason: String(plan.reason),
+ mode: String(plan.mode)
+ })
+ await this.recordHookWarnings(threadId, turnId, observed.warnings)
+ }
let result = this.opts.compactor.compact({
threadId,
turnId,
@@ -1520,14 +1982,8 @@ export class AgentLoop {
}): Promise {
const savedTokens = Math.max(0, Math.floor(input.rawInputTokens - input.sentInputTokens))
if (savedTokens <= 0) return
- const estimatedCost = estimateDeepseekInputTokenCost({
- model: input.model,
- inputTokens: savedTokens
- })
const usage = this.opts.usage.recordTokenEconomySavings(input.threadId, {
- tokenEconomySavingsTokens: savedTokens,
- ...(estimatedCost ? { tokenEconomySavingsUsd: estimatedCost.costUsd } : {}),
- ...(estimatedCost ? { tokenEconomySavingsCny: estimatedCost.costCny } : {})
+ tokenEconomySavingsTokens: savedTokens
})
await this.opts.events.record({
kind: 'usage',
@@ -1597,6 +2053,7 @@ export class AgentLoop {
model: string
activeSkillIds: readonly string[]
allowedToolNames?: readonly string[]
+ userInputDisabled?: boolean
fingerprint: string
toolNames: string[]
toolHashes: Record
@@ -1607,7 +2064,8 @@ export class AgentLoop {
mode: input.mode,
model: input.model,
activeSkillIds: [...input.activeSkillIds].sort(),
- allowedToolNames: input.allowedToolNames ? [...input.allowedToolNames].sort() : []
+ allowedToolNames: input.allowedToolNames ? [...input.allowedToolNames].sort() : [],
+ userInputDisabled: input.userInputDisabled === true
})
const current: ToolCatalogSnapshot = {
fingerprint: input.fingerprint,
@@ -1615,7 +2073,12 @@ export class AgentLoop {
toolHashes: input.toolHashes
}
const previous = this.toolCatalogSnapshots.get(key)
+ this.toolCatalogSnapshots.delete(key)
this.toolCatalogSnapshots.set(key, current)
+ if (this.toolCatalogSnapshots.size > MAX_TOOL_CATALOG_SNAPSHOTS) {
+ const oldest = this.toolCatalogSnapshots.keys().next().value
+ if (oldest !== undefined) this.toolCatalogSnapshots.delete(oldest)
+ }
if (!previous || previous.fingerprint === input.fingerprint) return { kind: 'none' }
return isAdditiveToolCatalogChange(previous, current)
? { kind: 'additive', previous }
@@ -1810,6 +2273,7 @@ function buildTextAttachmentFallback(
byteSize: fallback.byteSize,
...(fallback.width ? { width: fallback.width } : {}),
...(fallback.height ? { height: fallback.height } : {}),
+ ...(attachment.localFilePath ? { localFilePath: attachment.localFilePath } : {}),
...(fallback.wasCompressed !== undefined ? { wasCompressed: fallback.wasCompressed } : {})
}
}
@@ -1828,6 +2292,7 @@ function buildTextAttachmentFallback(
byteSize: attachment.byteSize,
...(attachment.width ? { width: attachment.width } : {}),
...(attachment.height ? { height: attachment.height } : {}),
+ ...(attachment.localFilePath ? { localFilePath: attachment.localFilePath } : {}),
wasCompressed: false
}
}
@@ -1868,6 +2333,7 @@ function normalizeApprovalPolicy(
value: string | undefined
): ToolHostContext['approvalPolicy'] {
switch (value) {
+ case 'on-request':
case 'never':
case 'auto':
case 'suggest':
@@ -1878,6 +2344,20 @@ function normalizeApprovalPolicy(
}
}
+function normalizeSandboxMode(
+ value: string | undefined
+): NonNullable {
+ switch (value) {
+ case 'read-only':
+ case 'workspace-write':
+ case 'danger-full-access':
+ case 'external-sandbox':
+ return value
+ default:
+ return DEFAULT_SANDBOX_MODE
+ }
+}
+
function isAdditiveToolCatalogChange(previous: ToolCatalogSnapshot, current: ToolCatalogSnapshot): boolean {
let added = false
for (const name of current.toolNames) {
@@ -1922,14 +2402,31 @@ function buildModelCompactionPrompt(input: {
Math.max(1_024, input.maxBytes)
)
return [
- 'Summarize the following Kun conversation history for a context fold.',
- 'Preserve user goals, requirements, decisions, files touched, tool outcomes, errors, constraints, active/pinned skills, and unresolved next steps.',
- 'Do not invent facts. Do not include generic advice. Prefer concise bullets grouped by topic.',
+ 'You are compacting a long agent conversation so work can continue past the context window.',
+ 'Write a dense, factual handoff summary using EXACTLY the following section headers, in this order.',
+ 'Keep every section; write "- (none)" when a section has no content. Use short bullets, not prose.',
+ 'Do not invent facts, do not add generic advice, and preserve concrete identifiers verbatim',
+ '(file paths, function/variable names, commands, URLs, IDs, error messages).',
+ '',
+ '## Goal',
+ "- The user's overall objective and any explicit requirements or constraints.",
+ '## Completed',
+ '- Work already done and decisions made, with the concrete outcome of each.',
+ '## Key findings',
+ '- Important facts discovered (root causes, data values, API shapes) needed to continue.',
+ '## Files & locations',
+ '- Files created/edited/inspected and the relevant paths or line ranges.',
+ '## Tool & command results',
+ '- Notable tool/command outcomes, especially errors and their resolution status.',
+ '## Pending',
+ '- Unresolved next steps and anything explicitly requested but not yet done.',
+ '## Constraints & pins',
+ '- Durable rules, user preferences, and active/pinned skills that must survive.',
'',
- 'Existing heuristic summary to cross-check:',
+ 'Existing heuristic summary to cross-check (may be incomplete):',
input.heuristicSummary.trim() || '(none)',
'',
- 'History excerpt to fold:',
+ 'Conversation history to fold:',
transcript || '(empty)'
].join('\n')
}
@@ -2014,6 +2511,19 @@ function normalizeRequestedReasoningEffort(effort: string | undefined): string |
return normalized && normalized !== 'auto' ? normalized : undefined
}
+function sanitizeProviderBaseUrl(baseUrl: string): string {
+ try {
+ const url = new URL(baseUrl)
+ url.username = ''
+ url.password = ''
+ url.search = ''
+ url.hash = ''
+ return url.toString().replace(/\/$/, '')
+ } catch {
+ return baseUrl.replace(/[?#].*$/, '').replace(/\/+$/, '')
+ }
+}
+
function autoModelRouteKey(threadId: string, turnId: string): string {
return `${threadId}:${turnId}`
}
diff --git a/kun/src/loop/context-estimator.ts b/kun/src/loop/context-estimator.ts
index dd1d87fa..aa9147e8 100644
--- a/kun/src/loop/context-estimator.ts
+++ b/kun/src/loop/context-estimator.ts
@@ -1,28 +1,60 @@
import type { TurnItem } from '../contracts/items.js'
/**
- * Very small token estimator. The estimator prefers reported usage
- * when available, otherwise approximates one token per ~4 characters of
- * item text. The estimator is intentionally simple: the goal is to
- * trigger compaction at a reasonable threshold, not to model provider
- * tokenizers exactly.
+ * Token estimator for compaction decisions.
+ *
+ * The estimator prefers reported usage when available. When it must
+ * approximate from text, it counts CJK and other wide characters as
+ * roughly one token each and packs runs of ASCII at ~4 chars/token.
+ * This avoids the severe under-counting that a naive `length / 4`
+ * heuristic produces for Chinese/Japanese/Korean text, which is the
+ * dominant language for many users of this app. Accurate estimates are
+ * what let compaction trigger *before* the real context window is
+ * exceeded rather than after.
*/
export class ContextEstimator {
private readonly charsPerToken: number
constructor(charsPerToken = 4) {
- this.charsPerToken = charsPerToken
+ this.charsPerToken = Math.max(1, charsPerToken)
}
estimateItem(item: TurnItem): number {
const text = this.collectText(item)
- return Math.max(1, Math.ceil(text.length / this.charsPerToken))
+ return Math.max(1, this.estimateText(text))
}
estimateItems(items: TurnItem[]): number {
return items.reduce((sum, item) => sum + this.estimateItem(item), 0)
}
+ /**
+ * Estimate tokens for a raw string. ASCII bytes are packed at
+ * `charsPerToken` per token; non-ASCII characters (CJK, emoji, etc.)
+ * count as ~1 token each, except zero-width combining marks.
+ */
+ estimateText(text: string): number {
+ if (!text) return 0
+ let asciiRun = 0
+ let tokens = 0
+ const flushAscii = (): void => {
+ if (asciiRun > 0) {
+ tokens += Math.ceil(asciiRun / this.charsPerToken)
+ asciiRun = 0
+ }
+ }
+ for (const char of text) {
+ if (char.charCodeAt(0) <= 0x7f) {
+ asciiRun += 1
+ continue
+ }
+ flushAscii()
+ tokens += isCombiningMark(char) ? 0 : 1
+ }
+ flushAscii()
+ return tokens
+ }
+
private collectText(item: TurnItem): string {
switch (item.kind) {
case 'user_message':
@@ -46,3 +78,7 @@ export class ContextEstimator {
}
}
}
+
+function isCombiningMark(char: string): boolean {
+ return /[\u0300-\u036f\ufe00-\ufe0f]/u.test(char)
+}
diff --git a/kun/src/loop/model-context-profile.test.ts b/kun/src/loop/model-context-profile.test.ts
new file mode 100644
index 00000000..5b3bec81
--- /dev/null
+++ b/kun/src/loop/model-context-profile.test.ts
@@ -0,0 +1,51 @@
+import { describe, expect, it } from 'vitest'
+import { contextThresholdsForModel } from './model-context-profile.js'
+
+describe('contextThresholdsForModel safety cap', () => {
+ it('caps soft/hard thresholds to 75%/85% of the context window', () => {
+ // A config-provided profile that sets thresholds dangerously close to
+ // the full window (98%/99%) must be clamped so compaction still has
+ // headroom to run before the real window is exceeded.
+ const profiles = [
+ {
+ canonicalModel: 'deepseek-v4-pro',
+ modelIds: ['deepseek-v4-pro'] as readonly string[],
+ contextWindowTokens: 1_000_000,
+ softThreshold: 980_000,
+ hardThreshold: 990_000,
+ inputModalities: ['text'] as const,
+ outputModalities: ['text'] as const,
+ supportsToolCalling: true,
+ messageParts: ['text'] as const
+ }
+ ]
+ const thresholds = contextThresholdsForModel('deepseek-v4-pro', undefined, profiles)
+ expect(thresholds.softThreshold).toBe(750_000)
+ expect(thresholds.hardThreshold).toBe(850_000)
+ })
+
+ it('leaves already-safe thresholds untouched', () => {
+ const profiles = [
+ {
+ canonicalModel: 'deepseek-v4-pro',
+ modelIds: ['deepseek-v4-pro'] as readonly string[],
+ contextWindowTokens: 1_000_000,
+ softThreshold: 500_000,
+ hardThreshold: 600_000,
+ inputModalities: ['text'] as const,
+ outputModalities: ['text'] as const,
+ supportsToolCalling: true,
+ messageParts: ['text'] as const
+ }
+ ]
+ const thresholds = contextThresholdsForModel('deepseek-v4-pro', undefined, profiles)
+ expect(thresholds.softThreshold).toBe(500_000)
+ expect(thresholds.hardThreshold).toBe(600_000)
+ })
+
+ it('returns the fallback when no profile matches', () => {
+ const fallback = { softThreshold: 1234, hardThreshold: 5678 }
+ const thresholds = contextThresholdsForModel('unknown-model', fallback, [])
+ expect(thresholds).toEqual(fallback)
+ })
+})
diff --git a/kun/src/loop/model-context-profile.ts b/kun/src/loop/model-context-profile.ts
index 4a52c7b5..fca6b54f 100644
--- a/kun/src/loop/model-context-profile.ts
+++ b/kun/src/loop/model-context-profile.ts
@@ -1,7 +1,8 @@
import type {
ModelCapabilityMetadata,
ModelInputModality,
- ModelMessagePartSupport
+ ModelMessagePartSupport,
+ ModelReasoningCapabilityMetadata
} from '../contracts/capabilities.js'
export type ModelContextThresholds = {
@@ -24,6 +25,7 @@ export type ModelContextProfile = ModelContextThresholds & {
outputModalities: readonly ModelInputModality[]
supportsToolCalling: boolean
messageParts: readonly ModelMessagePartSupport[]
+ reasoning?: ModelReasoningCapabilityMetadata
}
export type ModelContextProfileConfig = {
@@ -42,6 +44,7 @@ export type ModelContextProfileConfig = {
outputModalities?: readonly ModelInputModality[]
supportsToolCalling?: boolean
messageParts?: readonly ModelMessagePartSupport[]
+ reasoning?: ModelReasoningCapabilityMetadata
}
export type ModelConfig = {
@@ -68,13 +71,23 @@ export type ModelProfileConfigSource = {
}
export const DEFAULT_CONTEXT_THRESHOLDS: ModelContextThresholds = {
- softThreshold: 16_000,
- hardThreshold: 24_000
+ // Fallback for models without a registered profile. These assume a
+ // reasonably large window (>=128k). A custom endpoint with a small
+ // window (e.g. 32k) should register a profile with explicit thresholds,
+ // otherwise it may exceed its window before the first compaction.
+ softThreshold: 96_000,
+ hardThreshold: 120_000
}
const DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS = 1_000_000
-const DEEPSEEK_V4_SOFT_THRESHOLD_RATIO = 0.98
-const DEEPSEEK_V4_HARD_THRESHOLD_RATIO = 0.99
+// Trigger compaction well before the real window is full. Compacting at
+// ~98% (the previous default) left no headroom: a single large tool
+// result could blow past the window before the next compaction ran,
+// which is what caused runaway context growth and dropped tool tables.
+// 0.75 / 0.85 mirrors the "compact before 100%" guidance used by mature
+// coding agents and leaves room for the post-compaction request to fit.
+const DEEPSEEK_V4_SOFT_THRESHOLD_RATIO = 0.75
+const DEEPSEEK_V4_HARD_THRESHOLD_RATIO = 0.85
const DEFAULT_MODEL_INPUT_MODALITIES: readonly ModelInputModality[] = ['text']
const DEFAULT_MODEL_OUTPUT_MODALITIES: readonly ModelInputModality[] = ['text']
const DEFAULT_MODEL_MESSAGE_PARTS: readonly ModelMessagePartSupport[] = ['text']
@@ -107,9 +120,19 @@ export function contextThresholdsForModel(
): ModelContextThresholds {
const profile = resolveModelContextProfile(model, profiles)
if (!profile) return fallback
+ // Safety cap: never let thresholds exceed 75%/85% of the context
+ // window, even if a config-provided model profile sets them higher
+ // (e.g. 98%/99%). Compacting too late leaves no headroom and lets a
+ // single large turn blow past the real window, causing runaway growth.
+ const maxSoft = profile.contextWindowTokens
+ ? Math.floor(profile.contextWindowTokens * 0.75)
+ : profile.softThreshold
+ const maxHard = profile.contextWindowTokens
+ ? Math.floor(profile.contextWindowTokens * 0.85)
+ : profile.hardThreshold
return {
- softThreshold: profile.softThreshold,
- hardThreshold: profile.hardThreshold
+ softThreshold: Math.min(profile.softThreshold, maxSoft),
+ hardThreshold: Math.min(profile.hardThreshold, maxHard)
}
}
@@ -124,7 +147,8 @@ export function modelCapabilitiesForModel(
outputModalities: [...(profile?.outputModalities ?? DEFAULT_MODEL_OUTPUT_MODALITIES)],
supportsToolCalling: profile?.supportsToolCalling ?? true,
contextWindowTokens: profile?.contextWindowTokens,
- messageParts: [...(profile?.messageParts ?? DEFAULT_MODEL_MESSAGE_PARTS)]
+ messageParts: [...(profile?.messageParts ?? DEFAULT_MODEL_MESSAGE_PARTS)],
+ ...(profile?.reasoning ? { reasoning: copyReasoningCapability(profile.reasoning) } : {})
}
}
@@ -162,7 +186,12 @@ function deepseekV4Profile(
inputModalities: DEFAULT_MODEL_INPUT_MODALITIES,
outputModalities: DEFAULT_MODEL_OUTPUT_MODALITIES,
supportsToolCalling: true,
- messageParts: DEFAULT_MODEL_MESSAGE_PARTS
+ messageParts: DEFAULT_MODEL_MESSAGE_PARTS,
+ reasoning: {
+ supportedEfforts: ['off', 'high', 'max'],
+ defaultEffort: 'max',
+ requestProtocol: 'deepseek-chat-completions'
+ }
}
}
@@ -202,6 +231,7 @@ function mergeModelContextProfile(
...(current?.modelIds ?? []),
...(input.aliases ?? [])
])
+ const reasoning = input.reasoning ?? current?.reasoning
return {
canonicalModel,
modelIds,
@@ -211,7 +241,20 @@ function mergeModelContextProfile(
inputModalities: uniqueModelCapabilityValues(input.inputModalities ?? current?.inputModalities ?? DEFAULT_MODEL_INPUT_MODALITIES),
outputModalities: uniqueModelCapabilityValues(input.outputModalities ?? current?.outputModalities ?? DEFAULT_MODEL_OUTPUT_MODALITIES),
supportsToolCalling: input.supportsToolCalling ?? current?.supportsToolCalling ?? true,
- messageParts: uniqueModelCapabilityValues(input.messageParts ?? current?.messageParts ?? DEFAULT_MODEL_MESSAGE_PARTS)
+ messageParts: uniqueModelCapabilityValues(input.messageParts ?? current?.messageParts ?? DEFAULT_MODEL_MESSAGE_PARTS),
+ ...(reasoning
+ ? { reasoning: copyReasoningCapability(reasoning) }
+ : {})
+ }
+}
+
+function copyReasoningCapability(
+ reasoning: ModelReasoningCapabilityMetadata
+): ModelReasoningCapabilityMetadata {
+ return {
+ supportedEfforts: [...reasoning.supportedEfforts],
+ defaultEffort: reasoning.defaultEffort,
+ requestProtocol: reasoning.requestProtocol
}
}
diff --git a/kun/src/loop/request-history-hygiene.test.ts b/kun/src/loop/request-history-hygiene.test.ts
new file mode 100644
index 00000000..a7be3353
--- /dev/null
+++ b/kun/src/loop/request-history-hygiene.test.ts
@@ -0,0 +1,71 @@
+import { describe, expect, it } from 'vitest'
+import type { TurnItem } from '../contracts/items.js'
+import { applyRequestHistoryHygiene } from './request-history-hygiene.js'
+
+function toolResult(id: string, output: string): TurnItem {
+ return {
+ id: `item_${id}`,
+ turnId: 'turn_1',
+ threadId: 'thread_1',
+ role: 'tool',
+ status: 'completed',
+ createdAt: '2026-01-01T00:00:00.000Z',
+ kind: 'tool_result',
+ toolName: 'read',
+ callId: id,
+ toolKind: 'tool_call',
+ output,
+ isError: false
+ } as TurnItem
+}
+
+describe('applyRequestHistoryHygiene cumulative tool-result budget', () => {
+ it('collapses older tool results once the cumulative budget is exhausted', () => {
+ // Each result is ~250 ASCII tokens (1000 chars / 4). With a 600-token
+ // budget and keepRecent=1, only the most recent couple should survive
+ // verbatim; older ones become a one-line digest.
+ const big = 'x'.repeat(1000)
+ const items = [
+ toolResult('a', big),
+ toolResult('b', big),
+ toolResult('c', big),
+ toolResult('d', big)
+ ]
+ const out = applyRequestHistoryHygiene(items, {
+ maxCumulativeToolResultTokens: 600,
+ keepRecentToolResults: 1,
+ // Keep per-result limits high so only the cumulative pass acts here.
+ maxToolResultTokens: 100_000,
+ maxToolResultBytes: 10_000_000,
+ maxToolResultLines: 100_000
+ })
+ const outputs = out.map((item) => (item.kind === 'tool_result' ? String(item.output) : ''))
+ // Newest (d) is always kept verbatim.
+ expect(outputs[3]).toBe(big)
+ // Oldest (a) must be collapsed to a digest marker.
+ expect(outputs[0]).toContain('cache hygiene')
+ expect(outputs[0]).not.toBe(big)
+ })
+
+ it('keeps everything when under budget', () => {
+ const small = 'hello world'
+ const items = [toolResult('a', small), toolResult('b', small)]
+ const out = applyRequestHistoryHygiene(items, {
+ maxCumulativeToolResultTokens: 100_000,
+ keepRecentToolResults: 4
+ })
+ expect(out).toBe(items)
+ })
+
+ it('does nothing when no cumulative cap is configured', () => {
+ const big = 'y'.repeat(5000)
+ const items = [toolResult('a', big), toolResult('b', big)]
+ const out = applyRequestHistoryHygiene(items, {
+ maxCumulativeToolResultTokens: 0,
+ maxToolResultTokens: 100_000,
+ maxToolResultBytes: 10_000_000,
+ maxToolResultLines: 100_000
+ })
+ expect(out).toBe(items)
+ })
+})
diff --git a/kun/src/loop/request-history-hygiene.ts b/kun/src/loop/request-history-hygiene.ts
index 42a3e89f..de904426 100644
--- a/kun/src/loop/request-history-hygiene.ts
+++ b/kun/src/loop/request-history-hygiene.ts
@@ -7,6 +7,21 @@ export type RequestHistoryHygieneOptions = {
maxToolArgumentStringBytes?: number
maxToolArgumentStringTokens?: number
maxArrayItems?: number
+ /**
+ * Cumulative token budget for ALL tool results combined across the
+ * sent history. Tool results are kept in full from newest to oldest
+ * until this budget is consumed; older results beyond the budget are
+ * collapsed to a one-line digest. This bounds total context growth
+ * regardless of how many tool calls a long session accumulates, which
+ * is what prevents runaway growth (e.g. a session ballooning to
+ * millions of tokens of stale tool output).
+ */
+ maxCumulativeToolResultTokens?: number
+ /**
+ * Number of most-recent tool results that are always kept at full
+ * per-result fidelity, even if the cumulative budget is exhausted.
+ */
+ keepRecentToolResults?: number
}
const DEFAULT_MAX_TOOL_RESULT_LINES = 320
@@ -15,6 +30,10 @@ const DEFAULT_MAX_TOOL_RESULT_TOKENS = 8_000
const DEFAULT_MAX_TOOL_ARGUMENT_STRING_BYTES = 8 * 1024
const DEFAULT_MAX_TOOL_ARGUMENT_STRING_TOKENS = 2_000
const DEFAULT_MAX_ARRAY_ITEMS = 80
+// 0 means "no cumulative cap" (back-compat). A positive value bounds the
+// combined size of all tool results in the sent history.
+const DEFAULT_MAX_CUMULATIVE_TOOL_RESULT_TOKENS = 0
+const DEFAULT_KEEP_RECENT_TOOL_RESULTS = 4
const MAX_SIGNAL_LINES = 48
const MAX_LINE_CHARS = 280
const LONG_ARGUMENT_PREVIEW_CHARS = 160
@@ -69,7 +88,87 @@ export function applyRequestHistoryHygiene(
}
return item
})
- return changed ? next : items
+ const budgeted = applyCumulativeToolResultBudget(next, limits)
+ if (budgeted !== next) changed = true
+ return changed ? budgeted : items
+}
+
+/**
+ * Enforce a combined token budget across all tool results in the sent
+ * history. The most recent `keepRecentToolResults` results are always
+ * kept verbatim; remaining results are kept newest-first until the
+ * cumulative budget is exhausted, after which older results are
+ * collapsed to a single-line digest. This bounds total context growth
+ * no matter how many tool calls accumulate over a long session.
+ */
+function applyCumulativeToolResultBudget(
+ items: TurnItem[],
+ limits: Required
+): TurnItem[] {
+ const budget = limits.maxCumulativeToolResultTokens
+ if (budget <= 0) return items
+
+ const toolResultIndexes: number[] = []
+ for (let index = 0; index < items.length; index += 1) {
+ if (items[index].kind === 'tool_result') toolResultIndexes.push(index)
+ }
+ if (toolResultIndexes.length === 0) return items
+
+ const alwaysKeep = new Set(toolResultIndexes.slice(-limits.keepRecentToolResults))
+ let used = 0
+ const collapse = new Set()
+ // Walk newest -> oldest so recent context is preserved first.
+ for (let cursor = toolResultIndexes.length - 1; cursor >= 0; cursor -= 1) {
+ const index = toolResultIndexes[cursor]
+ const item = items[index]
+ if (item.kind !== 'tool_result') continue
+ const cost = estimateTokens(stringifyOutput(item.output))
+ if (alwaysKeep.has(index)) {
+ used += cost
+ continue
+ }
+ if (used + cost <= budget) {
+ used += cost
+ continue
+ }
+ collapse.add(index)
+ }
+ if (collapse.size === 0) return items
+
+ return items.map((item, index) => {
+ if (!collapse.has(index) || item.kind !== 'tool_result') return item
+ return { ...item, output: digestStaleToolResult(item.toolName, item.isError, item.output) }
+ })
+}
+
+function digestStaleToolResult(toolName: string, isError: boolean | undefined, output: unknown): string {
+ const text = stringifyOutput(output)
+ const tokens = estimateTokens(text)
+ const firstLine = text
+ .split('\n')
+ .map((line) => line.trim())
+ .find((line) => line.length > 0) ?? ''
+ const preview = firstLine ? ` first line: ${clipInline(firstLine, 160)}` : ''
+ return (
+ `[cache hygiene: older ${toolName}${isError ? ' (error)' : ''} result elided to bound context, ` +
+ `approx ${tokens} token(s); re-run the tool or use narrower read/grep/bash ranges if needed.]${preview}`
+ )
+}
+
+function stringifyOutput(output: unknown): string {
+ if (typeof output === 'string') return output
+ if (output == null) return ''
+ try {
+ return JSON.stringify(output)
+ } catch {
+ return String(output)
+ }
+}
+
+function clipInline(text: string, max: number): string {
+ const compact = text.replace(/\s+/g, ' ').trim()
+ if (compact.length <= max) return compact
+ return `${compact.slice(0, Math.max(0, max - 3)).trim()}...`
}
function normalizeOptions(options: RequestHistoryHygieneOptions): Required {
@@ -81,7 +180,15 @@ function normalizeOptions(options: RequestHistoryHygieneOptions): Required
/** Highest known per-thread `seq`. Returns 0 when no events have been recorded. */
highestSeq(threadId: string): Promise
+ /**
+ * Optional indexed usage query. Implementations may return per-event
+ * usage deltas without replaying the full event log.
+ */
+ loadUsageRecords?(options?: { threadId?: string }): Promise
+ /** Optional indexed latest cumulative usage snapshot query. */
+ loadLatestUsageSnapshots?(options?: { threadIds?: string[] }): Promise
/** Forget the per-thread in-memory state without touching disk. */
resetMemory(): Promise
}
diff --git a/kun/src/ports/tool-host.ts b/kun/src/ports/tool-host.ts
index 93818e89..d849a5c3 100644
--- a/kun/src/ports/tool-host.ts
+++ b/kun/src/ports/tool-host.ts
@@ -1,4 +1,4 @@
-import type { ApprovalPolicy } from '../contracts/policy.js'
+import type { ApprovalPolicy, SandboxMode } from '../contracts/policy.js'
import type { ApprovalRequest } from '../domain/approval.js'
import type { TurnItem } from '../contracts/items.js'
import type { ModelCapabilityMetadata } from '../contracts/capabilities.js'
@@ -15,6 +15,9 @@ export type ToolProviderKind =
| 'memory'
| 'gui'
| 'delegation'
+ | 'image'
+ | 'audio'
+ | 'video'
export type ToolProviderPolicy = {
id: string
@@ -80,6 +83,8 @@ export type ToolHostContext = {
/** Optional tool-name allow-list. When set, other tools are not advertised or executed. */
allowedToolNames?: readonly string[]
approvalPolicy: ApprovalPolicy
+ /** Filesystem/command sandbox selected for this turn. Defaults at execution time for old callers. */
+ sandboxMode?: SandboxMode
abortSignal: AbortSignal
/** Resolves a pending approval with the user's decision. */
awaitApproval: (approval: ApprovalRequest) => Promise<'allow' | 'deny'>
diff --git a/kun/src/prompt/kun-system-prompt.ts b/kun/src/prompt/kun-system-prompt.ts
index 838477b8..ea319d3d 100644
--- a/kun/src/prompt/kun-system-prompt.ts
+++ b/kun/src/prompt/kun-system-prompt.ts
@@ -1,10 +1,10 @@
export const KUN_SYSTEM_PROMPT = [
- 'You are Kun, the GUI-native coding agent for DeepSeek-GUI.',
+ 'You are Kun, the GUI-native agent inside the Kun desktop app.',
'',
- 'This operating contract is intentionally stable. It is kept at the front of every Kun model request so DeepSeek prompt-cache can reuse the same prefix across Code, Write, Claw, plan, and tool continuations. Do not casually reorder, rewrite, or personalize this contract; runtime-specific and user-specific facts belong in later conversation turns or compacted history, not in this prefix.',
+ 'This operating contract is intentionally stable. It is kept at the front of every Kun model request so provider prompt caches can reuse the same prefix across Code, Write, Claw, plan, and tool continuations. Do not casually reorder, rewrite, or personalize this contract; runtime-specific and user-specific facts belong in later conversation turns or compacted history, not in this prefix.',
'',
'Core identity:',
- '- Work as a senior engineering collaborator inside the DeepSeek GUI application.',
+ '- Work as a senior engineering collaborator inside the Kun desktop application.',
'- Preserve the user intent exactly, especially negative constraints such as do not, never, avoid, keep, remove, or preserve.',
'- Prefer small, coherent changes that match the existing codebase over broad rewrites.',
'- Read current state before acting. The workspace, persisted thread history, and GUI HTTP/SSE contract are authoritative.',
@@ -37,7 +37,7 @@ export const KUN_SYSTEM_PROMPT = [
'- Mutable user content, file excerpts, tool results, timestamps, selected text, workspace status, and generated summaries must stay after the stable prefix.',
'- Compaction should preserve objectives, constraints, decisions, touched files, unresolved tasks, and relevant tool results while keeping the front prefix unchanged.',
'- When summarizing or resuming, keep the same agent system contract and tool shape whenever possible so the summary request can reuse bytes already cached by the main agent.',
- '- Cache telemetry must use DeepSeek native prompt_cache_hit_tokens and prompt_cache_miss_tokens when present. Fallback fields are acceptable only when native fields are absent.',
+ '- Cache telemetry must use provider-native prompt_cache_hit_tokens and prompt_cache_miss_tokens when present. Fallback fields are acceptable only when native fields are absent.',
'',
'Response style:',
'- Be clear, direct, and useful. Avoid performative filler.',
diff --git a/kun/src/server/routes/attachments.ts b/kun/src/server/routes/attachments.ts
index 21501366..d68ad6bf 100644
--- a/kun/src/server/routes/attachments.ts
+++ b/kun/src/server/routes/attachments.ts
@@ -18,6 +18,7 @@ export async function uploadAttachment(
name: parsed.data.name,
mimeType: parsed.data.mimeType,
data: Buffer.from(parsed.data.dataBase64, 'base64'),
+ localFilePath: parsed.data.localFilePath,
textFallback: parsed.data.textFallback,
threadId: parsed.data.threadId,
workspace: parsed.data.workspace
diff --git a/kun/src/server/routes/events.ts b/kun/src/server/routes/events.ts
index 2d54f71e..dbc4fc6b 100644
--- a/kun/src/server/routes/events.ts
+++ b/kun/src/server/routes/events.ts
@@ -12,13 +12,22 @@ const HEARTBEAT_INTERVAL_MS = 15_000
* `since_seq`, then subscribes to the event bus to deliver live
* updates. The stream closes when the request's `AbortSignal`
* fires (the client disconnects) or the server stops publishing.
+ *
+ * Delivery is deduplicated per connection: an event whose seq is at or
+ * below the connection's high-water mark is dropped, so an event that
+ * lands in both the persisted backlog and the live subscription (the
+ * recorder persists before publishing) is delivered exactly once.
+ * Heartbeats reuse the high-water mark instead of allocating fresh
+ * seqs — after a runtime restart the in-memory seq counter starts
+ * over, and stamping heartbeats with those low seqs used to rewind
+ * client cursors, which made the next subscription replay the entire
+ * thread history into the live transcript.
*/
export function buildEventStreamResponse(input: {
request: Request
threadId: string
eventBus: EventBus
sessionStore: SessionStore
- allocateSeq: (threadId: string) => number
}): Response {
const url = new URL(input.request.url)
const sinceSeqFromQuery = Number(url.searchParams.get('since_seq') ?? '0') || 0
@@ -46,14 +55,25 @@ export function buildEventStreamResponse(input: {
}
input.request.signal.addEventListener('abort', close)
try {
- const backlog = await input.sessionStore.loadEventsSince(input.threadId, sinceSeq)
- for (const event of backlog) {
+ let lastDeliveredSeq = sinceSeq
+ const deliver = (event: RuntimeEvent): void => {
+ if (typeof event.seq === 'number') {
+ if (event.seq <= lastDeliveredSeq) return
+ lastDeliveredSeq = event.seq
+ }
controller.enqueue(encoder.encode(encodeSseEvent(event)))
}
+ const highestSeq = await input.sessionStore.highestSeq(input.threadId).catch(() => 0)
+ const backlog = sinceSeq >= highestSeq
+ ? []
+ : await input.sessionStore.loadEventsSince(input.threadId, sinceSeq)
+ for (const event of backlog) {
+ deliver(event)
+ }
unsubscribe = input.eventBus.subscribe(input.threadId, (event: RuntimeEvent) => {
if (closed) return
try {
- controller.enqueue(encoder.encode(encodeSseEvent(event)))
+ deliver(event)
} catch {
close()
}
@@ -65,7 +85,7 @@ export function buildEventStreamResponse(input: {
encoder.encode(
encodeSseEvent({
kind: 'heartbeat',
- seq: input.allocateSeq(input.threadId),
+ seq: lastDeliveredSeq,
timestamp: new Date().toISOString(),
threadId: input.threadId
})
diff --git a/kun/src/server/routes/index.ts b/kun/src/server/routes/index.ts
index ec307941..a1d29cd3 100644
--- a/kun/src/server/routes/index.ts
+++ b/kun/src/server/routes/index.ts
@@ -222,8 +222,7 @@ export function buildRouter(runtime: ServerRuntime): Router {
request,
threadId: ctx.params.id,
eventBus: runtime.eventBus,
- sessionStore: runtime.sessionStore,
- allocateSeq: runtime.allocateSeq
+ sessionStore: runtime.sessionStore
})
})
router.add('POST', '/v1/approvals/:id', async (request, ctx) => {
diff --git a/kun/src/server/routes/server-runtime.ts b/kun/src/server/routes/server-runtime.ts
index 8e8b9ea9..f47302ac 100644
--- a/kun/src/server/routes/server-runtime.ts
+++ b/kun/src/server/routes/server-runtime.ts
@@ -13,6 +13,12 @@ import type { RuntimeInfoResponse } from '../../contracts/runtime-info.js'
import type { McpServerDiagnostic } from '../../adapters/tool/mcp-tool-provider.js'
import type { McpSearchRuntimeDiagnostic } from '../../adapters/tool/mcp-tool-search.js'
import type { WebProviderDiagnostic } from '../../adapters/tool/web-tool-provider.js'
+import type { ImageGenDiagnostic } from '../../adapters/tool/image-gen-tool-provider.js'
+import type {
+ MusicGenDiagnostic,
+ SpeechGenDiagnostic,
+ VideoGenDiagnostic
+} from '../../adapters/tool/media-gen-tool-provider.js'
import type { SkillRuntimeDiagnostics } from '../../skills/skill-runtime.js'
import type { AttachmentDiagnostics } from '../../contracts/attachments.js'
import type { AttachmentStore } from '../../attachments/attachment-store.js'
@@ -28,6 +34,10 @@ export type RuntimeToolDiagnostics = {
skills: SkillRuntimeDiagnostics
attachments: AttachmentDiagnostics
memory: MemoryDiagnostics
+ imageGen?: ImageGenDiagnostic[]
+ speechGen?: SpeechGenDiagnostic[]
+ musicGen?: MusicGenDiagnostic[]
+ videoGen?: VideoGenDiagnostic[]
}
/**
diff --git a/kun/src/server/routes/threads.ts b/kun/src/server/routes/threads.ts
index 8bf5fe2a..4cf609a1 100644
--- a/kun/src/server/routes/threads.ts
+++ b/kun/src/server/routes/threads.ts
@@ -95,6 +95,7 @@ export async function getThread(
sessionStore.highestSeq(threadId),
sessionStore.loadItems(threadId)
])
+ sessionItems = await healSessionItemsForFinishedTurns(thread, sessionItems, sessionStore)
}
const hydratedThread = hydrateThreadItemsFromSession(thread, sessionItems)
return jsonResponse({
@@ -103,6 +104,63 @@ export async function getThread(
})
}
+type FinishedTurnStatus = Extract
+
+async function healSessionItemsForFinishedTurns(
+ thread: ThreadRecord,
+ items: TurnItem[],
+ sessionStore: SessionStore
+): Promise