diff --git a/.aiignore b/.aiignore index e44d2066..9b950707 100644 --- a/.aiignore +++ b/.aiignore @@ -25,3 +25,4 @@ GEMINI.md .gitignore .aiignore _example +.intellijPlatform diff --git a/.gitignore b/.gitignore index b4c8f6f3..e3c4f957 100644 --- a/.gitignore +++ b/.gitignore @@ -86,3 +86,4 @@ yarn-error.log* /docs/tasks/ /.gradle-user-home-fix/ /.intellijPlatform/ +/run-ai.bat diff --git a/CHANGELOG.md b/CHANGELOG.md index bcb80344..091e66bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,38 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 --- +## [0.0.1.8] - 2026-04-26 + +### Added + +- **Native function calling** — agents can now use providers' structured `tools` API instead of the JSON-in-text envelope. Opt-in per model via `tools.native_tools: auto | always | never` in `config.yaml`; default is `auto` (enabled for models with `supportsFunctionCalling=true`). + - OpenAI (Chat Completions + Responses API) — strict-mode schema sanitization, `tool_choice: auto`. + - Anthropic — `tools` array + `content[].tool_use` response parsing; schema sanitized (composition keywords stripped). + - Gemini — `functionDeclarations` array + `functionCall` response parsing; Gemini-specific type normalization. +- `ToolSchemaSanitizer` — provider-aware JSON Schema normalization (OpenAI strict compatibility check, Anthropic forbidden-keyword stripping, Gemini single-type coercion and nullable fields). +- `NativeToolsFallbackTracker` — session-scoped in-memory registry; if a provider returns 400 "tools not supported", the model is marked and all subsequent requests in the same process skip native tools automatically. +- `NativeToolsResolver` (`NativeToolsMode` enum: `AUTO / ALWAYS / NEVER`) — central decision logic with precedence: fallback cache → NEVER → ALWAYS → `ModelDefinition.supportsFunctionCalling`. +- XML tag fallback in `ToolCallParser` — recovers tool calls from `` pseudo-tags emitted by weak local models (qwen<70B, some Gemma builds) that bypass the native API entirely. Warns on use. +- New models: **GPT-5.5**, **GPT-5.4 Nano**, **Qwen 3.6 35B/27B** (Ollama), **Qwen 3-next 80B**, **gpt-oss-safeguard 20B/120B**, OpenRouter `qwen/*` patterns. +- `claude-sonnet-4-6` added to `SupportedModels`. +- Per-iteration token and cost metrics persisted directly to the task row so the History panel and live stats bar show running totals without aggregating from message metadata. + +### Changed + +- `AgentTurnLoop` auto-falls back to the JSON-in-text path when a provider throws `ToolsNotSupportedException` — prompt is rebuilt without `` and the JSON envelope contract; no user action needed. +- When native tools are active the `response_format` / `json_object` override is suppressed (incompatible with function-calling mode on most providers). +- `TurnLLMCaller` accepts `nativeToolSchemas` and forwards them as `native_tools` in `kwargs`; retry handler also propagates `thinking`, `reasoningEffort`, and `noEgressEnabled`. +- Subagent profiles filter the native `tools` array to match their `` list — prevents the model calling tools the harness would reject. +- `ToolCallParser.extractToolCalls` pre-guard (`startsWith("{")`) removed — some models (e.g. glm-4.7-flash) emit prose before the JSON envelope, which the brace-matching strategy handles correctly. All-strategies-failed log downgraded from `WARN` to `DEBUG`. + +### Fixed + +- Anthropic tool results were mapped to `role: "assistant"` — now correctly mapped to `role: "user"`, fixing HTTP 400 rejections from claude-opus-4-6 and newer models that treat assistant-role tool results as prefill. +- `temperature` parameter no longer sent to models that deprecated it (`claude-opus-4-7`, `claude-opus-4-6`, `gpt-5.5`, `gpt-5.4-nano`) — removes spurious provider warnings. +- Anthropic streaming: `content_block_start` for `tool_use` blocks now captures the call `id` alongside the name. + +--- + ## [0.0.1.7] - 2025-04-17 ### Added diff --git a/README.md b/README.md index 30b8575c..02095f3f 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) [![IntelliJ](https://img.shields.io/badge/IntelliJ-2024.1+-orange.svg)](https://www.jetbrains.com/idea/) -[![Version](https://img.shields.io/badge/version-0.0.1.5-green.svg)](CHANGELOG.md) +[![Version](https://img.shields.io/badge/version-0.0.1.8-green.svg)](CHANGELOG.md) [![Stage](https://img.shields.io/badge/stage-early--active-yellow.svg)](docs/ROADMAP.md) **Open-source AI coding plugin for IntelliJ IDEA, built in Kotlin.** @@ -216,12 +216,12 @@ See [docs/config.md](docs/config.md) for full configuration reference. ## Project status -| | | -|---|---| -| **Version** | 0.0.1.5 | -| **Stage** | Early-stage — active development | -| **License** | MIT | -| **Community** | Small, growing — PRs and issues welcome | +| | | +|---|------------------------------------------| +| **Version** | 0.0.1.8 | +| **Stage** | Early-stage — active development | +| **License** | MIT | +| **Community** | Small, growing — PRs and issues welcome | | **Change cadence** | Fast. Breaking changes possible pre-1.0. | --- diff --git a/core/src/main/kotlin/pl/jclab/refio/core/api/ApiModels.kt b/core/src/main/kotlin/pl/jclab/refio/core/api/ApiModels.kt index fd9b8482..349338d7 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/api/ApiModels.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/api/ApiModels.kt @@ -444,7 +444,9 @@ data class ProjectContextResponse( // Active LLM request preview (exact runtime shape for next call) val activeLlmRequestPrompt: String? = null, // Auxiliary prompt preview (tool/system/user templates not in active request) - val auxiliaryPromptsPreview: String? = null + val auxiliaryPromptsPreview: String? = null, + // Raw context prompt as produced by ContextService.buildLLMContextPrompt (no system prompt, no messages) + val rawContextPrompt: String? = null ) /** diff --git a/core/src/main/kotlin/pl/jclab/refio/core/api/CoreApiRouter.kt b/core/src/main/kotlin/pl/jclab/refio/core/api/CoreApiRouter.kt index 542cdb28..72e69df2 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/api/CoreApiRouter.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/api/CoreApiRouter.kt @@ -113,6 +113,7 @@ class CoreApiRouter( configService = configService, ragRepository = persistence.ragRepository, snapshotRepository = persistence.snapshotRepository, + snapshotGroupRepository = persistence.snapshotGroupRepository, analysisReportRepository = persistence.projectAnalysisReportRepository, taskRepository = taskRepository, chatMessageRepository = persistence.chatMessageRepository, diff --git a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AgentTurnLoopFactory.kt b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AgentTurnLoopFactory.kt index 6dc27023..502bddc7 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AgentTurnLoopFactory.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AgentTurnLoopFactory.kt @@ -125,7 +125,8 @@ internal class AgentTurnLoopFactory( llmRetryHandler = null, workingMemoryIntegration = workingMemoryIntegration, agentEventBus = agentEventBus, - hookService = hookService + hookService = hookService, + toolPermissionsService = toolPermissionsService ) } } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AnalysisStack.kt b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AnalysisStack.kt index b97ec57c..11fa998a 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AnalysisStack.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AnalysisStack.kt @@ -5,6 +5,7 @@ import pl.jclab.refio.core.config.ConfigKeys import pl.jclab.refio.core.db.repositories.ChatMessageRepository import pl.jclab.refio.core.db.repositories.ProjectAnalysisReportRepository import pl.jclab.refio.core.db.repositories.RagRepository +import pl.jclab.refio.core.db.repositories.SnapshotGroupRepository import pl.jclab.refio.core.db.repositories.SnapshotRepository import pl.jclab.refio.core.db.repositories.SubtaskRepository import pl.jclab.refio.core.db.repositories.TaskRepository @@ -26,6 +27,7 @@ internal class AnalysisStack( private val configService: ConfigService, private val ragRepository: RagRepository, snapshotRepository: SnapshotRepository, + snapshotGroupRepository: SnapshotGroupRepository, analysisReportRepository: ProjectAnalysisReportRepository, taskRepository: TaskRepository, chatMessageRepository: ChatMessageRepository, @@ -104,7 +106,7 @@ internal class AnalysisStack( } else null val snapshotService: SnapshotService? = if (projectRoot != null) { - SnapshotService(snapshotRepository, projectRoot) + SnapshotService(snapshotRepository, snapshotGroupRepository, projectRoot) } else null /** diff --git a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/DomainRouters.kt b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/DomainRouters.kt index 723acff5..9a717fc6 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/DomainRouters.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/DomainRouters.kt @@ -193,6 +193,7 @@ internal class DomainRouters( projectAnalyzer = projectAnalyzer, richProjectAnalysisEngine = richProjectAnalysisEngine, promptSectionProviders = promptSectionProviders, + configService = configService, ) } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/PersistenceModule.kt b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/PersistenceModule.kt index 1897d078..6acb4fda 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/PersistenceModule.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/PersistenceModule.kt @@ -10,6 +10,7 @@ import pl.jclab.refio.core.db.repositories.DocumentationRepository import pl.jclab.refio.core.db.repositories.ProjectAnalysisReportRepository import pl.jclab.refio.core.db.repositories.PromptsRepository import pl.jclab.refio.core.db.repositories.RagRepository +import pl.jclab.refio.core.db.repositories.SnapshotGroupRepository import pl.jclab.refio.core.db.repositories.SnapshotRepository import pl.jclab.refio.core.db.repositories.SubtaskRepository import pl.jclab.refio.core.db.repositories.TaskRepository @@ -29,6 +30,7 @@ class PersistenceModule { val ragRepository = RagRepository() val documentationRepository = DocumentationRepository() val snapshotRepository = SnapshotRepository() + val snapshotGroupRepository = SnapshotGroupRepository() val projectAnalysisReportRepository = ProjectAnalysisReportRepository() val agentSessionRepository = AgentSessionRepository() val agentInstanceRepository = AgentInstanceRepository() diff --git a/core/src/main/kotlin/pl/jclab/refio/core/api/routers/ProjectContextRouter.kt b/core/src/main/kotlin/pl/jclab/refio/core/api/routers/ProjectContextRouter.kt index 49d1e8f6..66f5882f 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/api/routers/ProjectContextRouter.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/api/routers/ProjectContextRouter.kt @@ -10,10 +10,18 @@ import pl.jclab.refio.core.llm.LLMMessage import pl.jclab.refio.core.llm.LLMMessageMapper import pl.jclab.refio.core.llm.TokenEstimator import pl.jclab.refio.core.models.context.* +import pl.jclab.refio.core.llm.ModelDefinitions +import pl.jclab.refio.core.llm.NativeToolsFallbackTracker +import pl.jclab.refio.core.llm.parseNativeToolsMode +import pl.jclab.refio.core.llm.shouldUseNativeTools +import pl.jclab.refio.core.services.ConfigService import pl.jclab.refio.core.services.ContextService import pl.jclab.refio.core.services.ProjectAnalyzerService import pl.jclab.refio.core.services.PromptsService import pl.jclab.refio.core.services.analysis.project.RichProjectAnalysisEngine +import pl.jclab.refio.core.services.turn.nativeToolsDescriptionOverride +import pl.jclab.refio.core.config.ConfigKeys +import pl.jclab.refio.core.api.ModelOperation import pl.jclab.refio.core.logging.dualLogger import java.nio.file.Path @@ -37,7 +45,8 @@ class ProjectContextRouter( * (e.g. , ) that the runtime prompt has. * Pass emptyList() to preserve the legacy stripped preview. */ - private val promptSectionProviders: List = emptyList() + private val promptSectionProviders: List = emptyList(), + private val configService: ConfigService? = null, ) : Router { override suspend fun initialize() { @@ -48,6 +57,42 @@ class ProjectContextRouter( logger.info { "[ProjectContextRouter] Shutting down" } } + private fun resolveResponseContractFragment(nativeToolsActive: Boolean): String { + val name = if (nativeToolsActive) "response-contract-native" else "response-contract-json" + return promptsService.getFragment(name) + } + + /** + * Mirror of [TurnPromptBuilder.resolveMultiAgentSection] for preview rendering. Returns the + * full delegation-guidance block when the caller can actually invoke subagents — empty + * string otherwise. ProjectContextRouter previews the "main" run (no profile overrides), + * so only the mode-level tool list is checked for `invoke_subagent`. + */ + private fun resolveMultiAgentSectionFragment(mode: TaskMode, taskId: String): String { + val tools = toolDescriptionBuilder.getToolsForMode(mode, taskId) + val hasInvokeSubagent = tools.any { it.name.equals("invoke_subagent", ignoreCase = true) } + if (!hasInvokeSubagent) return "" + val fragmentName = when (mode) { + TaskMode.PLAN -> "multi-agent-plan" + TaskMode.AGENT -> "multi-agent-agent" + TaskMode.CHAT -> return "" + } + return promptsService.getFragment(fragmentName) + } + + private fun isNativeToolsActiveForTask(mode: TaskMode, taskId: String): Boolean { + if (configService == null) return false + val operation = when (mode) { + TaskMode.PLAN -> ModelOperation.PLAN + TaskMode.AGENT -> ModelOperation.CODING + TaskMode.CHAT -> return false + } + val (modelId, providerName) = configService.getModel(operation, taskId) + val nativeToolsMode = parseNativeToolsMode(configService.getTyped(ConfigKeys.NATIVE_TOOLS_MODE, taskId)) + val modelDef = ModelDefinitions.getDefinition(providerName, modelId) + return shouldUseNativeTools(nativeToolsMode, modelDef, modelId, NativeToolsFallbackTracker.getFallbackSet()) + } + /** * Get project context (for UI visualization). */ @@ -199,7 +244,7 @@ class ProjectContextRouter( ): RuntimePromptPreview { return when (task.mode) { TaskMode.CHAT -> buildChatRuntimePromptPreview(chatHistory, pendingUserInput, contextPrompt, contextSectionTokens) - TaskMode.PLAN -> buildPlanRuntimePromptPreview(task, chatHistory, pendingUserInput, contextPrompt, contextSectionTokens) + TaskMode.PLAN -> buildPlanRuntimePromptPreview(task, chatHistory, pendingUserInput, contextPrompt, userContextRefs, contextSectionTokens) TaskMode.AGENT -> buildAgentRuntimePromptPreview(task, chatHistory, pendingUserInput, contextPrompt, userContextRefs, contextSectionTokens) } } @@ -248,30 +293,63 @@ class ProjectContextRouter( chatHistory: List, pendingUserInput: String?, contextPrompt: String, + userContextRefs: List, contextSectionTokens: Map ): RuntimePromptPreview { - val toolDescriptions = toolDescriptionBuilder.getToolDescriptions(TaskMode.PLAN, task.id) - val validToolNames = toolDescriptionBuilder.getValidToolNames(TaskMode.PLAN, task.id) - val basePrompt = promptsService.getSystemPrompt( - type = PromptType.SYSTEM_PLAN, - variables = mapOf("tool_descriptions" to toolDescriptions, "valid_tool_names" to validToolNames) - ) - val systemPrompt = appendProviderSections(basePrompt, TaskMode.PLAN, task.id, iteration = 0) + val nativeTools = isNativeToolsActiveForTask(TaskMode.PLAN, task.id) + val responseContract = resolveResponseContractFragment(nativeTools) + val multiAgentSection = resolveMultiAgentSectionFragment(TaskMode.PLAN, task.id) + val systemPrompt = if (nativeTools) { + val basePrompt = promptsService.getSystemPrompt( + type = PromptType.SYSTEM_PLAN, + variables = mapOf( + "tool_descriptions" to nativeToolsDescriptionOverride(), + "response_contract" to responseContract, + "multi_agent_section" to multiAgentSection + ) + ) + appendProviderSections(basePrompt, TaskMode.PLAN, task.id, iteration = 0) + } else { + val toolDescriptions = toolDescriptionBuilder.getToolDescriptions(TaskMode.PLAN, task.id) + val validToolNames = toolDescriptionBuilder.getValidToolNames(TaskMode.PLAN, task.id) + val basePrompt = promptsService.getSystemPrompt( + type = PromptType.SYSTEM_PLAN, + variables = mapOf( + "tool_descriptions" to toolDescriptions, + "valid_tool_names" to validToolNames, + "response_contract" to responseContract, + "multi_agent_section" to multiAgentSection + ) + ) + appendProviderSections(basePrompt, TaskMode.PLAN, task.id, iteration = 0) + } - val requestText = pendingUserInput - ?: chatHistory.lastOrNull { it.role == MessageRole.USER }?.content ?: "" - val userPrompt = buildString { - appendLine("User request:") - appendLine(requestText) - appendLine() - appendLine("Create a detailed execution plan as JSON.") - }.trim() - val messages = listOf(LLMMessage(role = "user", content = userPrompt)) + // For native tools, show the same multi-message structure that AgentTurnLoop actually sends + // (ContextService builds stable context + working memory + conversation history). + // For the legacy JSON path, keep the old single-message "Create a detailed execution plan" format. + val messages: List + val effectiveContextPrompt: String + if (nativeTools) { + messages = buildAgentMessagesForPreview(task.id, chatHistory, pendingUserInput, userContextRefs, + pendingUserInput ?: chatHistory.lastOrNull { it.role == MessageRole.USER }?.content) + effectiveContextPrompt = "" + } else { + val requestText = pendingUserInput + ?: chatHistory.lastOrNull { it.role == MessageRole.USER }?.content ?: "" + val userPrompt = buildString { + appendLine("User request:") + appendLine(requestText) + appendLine() + appendLine("Create a detailed execution plan as JSON.") + }.trim() + messages = listOf(LLMMessage(role = "user", content = userPrompt)) + effectiveContextPrompt = contextPrompt + } val preparedPayload = LLMClient.prepareRequestPayload( messages = messages, systemPrompt = systemPrompt, - contextContent = contextPrompt.takeIf { it.isNotBlank() }, + contextContent = effectiveContextPrompt.takeIf { it.isNotBlank() }, systemMessages = emptyList() ) val activeTokens = preparedPayload.estimatedInputTokens @@ -282,7 +360,7 @@ class ProjectContextRouter( systemPromptForBreakdown = systemPrompt, systemMessages = emptyList(), messages = messages, - hasInjectedContext = contextPrompt.isNotBlank() + hasInjectedContext = effectiveContextPrompt.isNotBlank() ) val preview = renderActiveRequestPreview( @@ -302,18 +380,37 @@ class ProjectContextRouter( userContextRefs: List, contextSectionTokens: Map ): RuntimePromptPreview { - val toolDescriptions = toolDescriptionBuilder.getToolDescriptions(TaskMode.AGENT, task.id) - val toolSelectionMatrix = toolDescriptionBuilder.getToolSelectionMatrix(TaskMode.AGENT, task.id) - val baseSystemPromptRaw = promptsService.getSystemPrompt( - type = PromptType.SYSTEM_AGENT, - variables = mapOf( - "tool_descriptions" to toolDescriptions, - "tool_selection_matrix" to toolSelectionMatrix + val nativeToolsAgent = isNativeToolsActiveForTask(TaskMode.AGENT, task.id) + val responseContractAgent = resolveResponseContractFragment(nativeToolsAgent) + val multiAgentSectionAgent = resolveMultiAgentSectionFragment(TaskMode.AGENT, task.id) + val baseSystemPrompt = if (nativeToolsAgent) { + val toolSelectionMatrix = toolDescriptionBuilder.getToolSelectionMatrix(TaskMode.AGENT, task.id) + val baseRaw = promptsService.getSystemPrompt( + type = PromptType.SYSTEM_AGENT, + variables = mapOf( + "tool_descriptions" to nativeToolsDescriptionOverride(), + "tool_selection_matrix" to toolSelectionMatrix, + "response_contract" to responseContractAgent, + "multi_agent_section" to multiAgentSectionAgent + ) ) - ) - // Apply the same section providers used by TurnPromptBuilder in runtime - // so the preview reflects the real prompt (including ). - val baseSystemPrompt = appendProviderSections(baseSystemPromptRaw, TaskMode.AGENT, task.id, iteration = 0) + appendProviderSections(baseRaw, TaskMode.AGENT, task.id, iteration = 0) + } else { + val toolDescriptions = toolDescriptionBuilder.getToolDescriptions(TaskMode.AGENT, task.id) + val toolSelectionMatrix = toolDescriptionBuilder.getToolSelectionMatrix(TaskMode.AGENT, task.id) + val baseRaw = promptsService.getSystemPrompt( + type = PromptType.SYSTEM_AGENT, + variables = mapOf( + "tool_descriptions" to toolDescriptions, + "tool_selection_matrix" to toolSelectionMatrix, + "response_contract" to responseContractAgent, + "multi_agent_section" to multiAgentSectionAgent + ) + ) + // Apply the same section providers used by TurnPromptBuilder in runtime + // so the preview reflects the real prompt (including ). + appendProviderSections(baseRaw, TaskMode.AGENT, task.id, iteration = 0) + } val runtimeSystemPrompt = if (contextPrompt.isNotBlank()) { "$baseSystemPrompt\n\n\n$contextPrompt\n" @@ -530,7 +627,17 @@ class ProjectContextRouter( .filter { it != activeType } .sortedBy { it.name } .mapNotNull { type -> - val content = runCatching { promptsService.getSystemPrompt(type) }.getOrNull()?.takeIf { it.isNotBlank() } + // Auxiliary previews are non-active modes shown for reference. Supply blank + // placeholders for all declared variables so `PromptTemplate.render` doesn't + // throw — content quality doesn't matter here, we just need it to render. + val auxVariables = mapOf( + "tool_descriptions" to "", + "tool_selection_matrix" to "", + "valid_tool_names" to "", + "response_contract" to "", + "multi_agent_section" to "" + ) + val content = runCatching { promptsService.getSystemPrompt(type, auxVariables) }.getOrNull()?.takeIf { it.isNotBlank() } ?: return@mapNotNull null val role = if (type.name.endsWith("_USER")) "user_template" else "system" PromptPreviewEntry(source = type.name, role = role, content = content, estimatedTokens = TokenEstimator.estimateTokens(content)) @@ -633,7 +740,8 @@ class ProjectContextRouter( taskRequirementsPrompt = taskRequirementsPrompt, recentWorkPrompt = recentWorkPrompt, activeLlmRequestPrompt = activeLlmPreviewPrompt, - auxiliaryPromptsPreview = auxiliaryPreviewPrompt + auxiliaryPromptsPreview = auxiliaryPreviewPrompt, + rawContextPrompt = llmPrompt ) } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigKeys.kt b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigKeys.kt index 3d3abef0..3b8ea2a4 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigKeys.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigKeys.kt @@ -163,6 +163,22 @@ object ConfigKeys { default = false ) + // ==================== TOOLS ==================== + + /** + * Native function-calling mode. + * - "auto" (default): use native tools iff ModelDefinition.supportsFunctionCalling=true + * - "always": force native tools even on unlisted models (debug / opt-in) + * - "never" : force JSON-in-text path (escape hatch) + */ + val NATIVE_TOOLS_MODE = ConfigKey( + key = "tools.native_tools", + parser = { raw -> raw.trim().lowercase().takeIf { it in setOf("auto", "always", "never") } }, + default = "auto", + validator = { it in setOf("auto", "always", "never") }, + yamlAccessor = { it.getNativeToolsMode()?.trim()?.lowercase() } + ) + val GENERAL_EXECUTION_MODE = ConfigKey( key = "general.execution_mode", parser = { it.trim().uppercase().takeIf { s -> s.isNotBlank() } }, @@ -720,6 +736,7 @@ object ConfigKeys { AUTO_OPTIMIZE_PERCENTAGE, READ_ONLY_MODE, // Tools + NATIVE_TOOLS_MODE, TOOLS_PERMISSIONS, TOOL_PERMISSION_RUN_TERMINAL, // Tool summary diff --git a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlEmitter.kt b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlEmitter.kt index 730d7f6f..a5137183 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlEmitter.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlEmitter.kt @@ -32,6 +32,7 @@ internal object ConfigYamlEmitter { general.thinkingEnabled?.let { sb.appendLine(" thinkingEnabled: $it") } general.noEgressEnabled?.let { sb.appendLine(" noEgressEnabled: $it") } general.executionMode?.let { sb.appendLine(" executionMode: \"$it\"") } + general.nativeToolsMode?.let { sb.appendLine(" nativeToolsMode: \"$it\"") } sb.appendLine() } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlMerger.kt b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlMerger.kt index d604960e..586c3df8 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlMerger.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlMerger.kt @@ -40,6 +40,7 @@ internal object ConfigYamlMerger { thinkingEnabled = override.thinkingEnabled ?: base.thinkingEnabled, noEgressEnabled = override.noEgressEnabled ?: base.noEgressEnabled, executionMode = override.executionMode ?: base.executionMode, + nativeToolsMode = override.nativeToolsMode ?: base.nativeToolsMode, ) } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlModel.kt b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlModel.kt index 1c14c3b8..3aa9a31e 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlModel.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlModel.kt @@ -10,7 +10,8 @@ data class GeneralConfig( val advancedView: Boolean? = null, val thinkingEnabled: Boolean? = null, val noEgressEnabled: Boolean? = null, - val executionMode: String? = null + val executionMode: String? = null, + val nativeToolsMode: String? = null ) @Serializable diff --git a/core/src/main/kotlin/pl/jclab/refio/core/config/HierarchicalConfigLoader.kt b/core/src/main/kotlin/pl/jclab/refio/core/config/HierarchicalConfigLoader.kt index fb4c6a56..c41d82ec 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/config/HierarchicalConfigLoader.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/config/HierarchicalConfigLoader.kt @@ -96,6 +96,7 @@ class HierarchicalConfigLoader private constructor( fun getGeneralThinkingEnabled(): Boolean? = getConfig().general?.thinkingEnabled fun getGeneralNoEgressEnabled(): Boolean? = getConfig().general?.noEgressEnabled fun getGeneralExecutionMode(): String? = getConfig().general?.executionMode + fun getNativeToolsMode(): String? = getConfig().general?.nativeToolsMode // ═══════════════════════════════════════════════════════════════════════════════ // Provider Settings diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/DatabaseFactory.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/DatabaseFactory.kt index e8313d8b..9d4e6bf8 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/db/DatabaseFactory.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/db/DatabaseFactory.kt @@ -66,15 +66,21 @@ object DatabaseFactory { org.jetbrains.exposed.sql.transactions.TransactionManager.manager.defaultMinRepetitionDelay = 100 org.jetbrains.exposed.sql.transactions.TransactionManager.manager.defaultMaxRepetitionDelay = 1000 - // Create tables - transaction(database) { + // Migrations run BEFORE createMissingTablesAndColumns so they can rebuild + // tables whose schema drifted from the current Exposed definitions + // (e.g. V4 rewires snapshots/subtasks to use snapshot_groups). + // Each migration is idempotent and guards against missing tables for fresh DBs. + MigrationRunner.run(database) - // Create tables in dependency order - // Note: Subtasks <-> Snapshots have circular reference (both nullable) + transaction(database) { + // Create tables in dependency order. + // Snapshot groups have no FK back to subtasks (subtaskId is a plain column), + // so there's no cycle: Tasks -> SnapshotGroups -> Subtasks -> Snapshots. SchemaUtils.createMissingTablesAndColumns( TasksTable, - SnapshotsTable, // Created before Subtasks (circular ref handled via nullable FKs) + SnapshotGroupsTable, SubtasksTable, + SnapshotsTable, ChatMessagesTable, ApiLogsTable, MCPServersTable, @@ -95,8 +101,6 @@ object DatabaseFactory { logger.info { "All tables created successfully" } } - MigrationRunner.run(database) - // Set initialized AFTER everything is complete (tables + migrations) initialized = true diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/SnapshotGroupsTable.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/SnapshotGroupsTable.kt new file mode 100644 index 00000000..a74971c9 --- /dev/null +++ b/core/src/main/kotlin/pl/jclab/refio/core/db/SnapshotGroupsTable.kt @@ -0,0 +1,34 @@ +package pl.jclab.refio.core.db + +import org.jetbrains.exposed.sql.* +import java.util.UUID + +/** + * Group of file snapshots taken together before a single write operation. + * One group per tool execution (typically one subtask). A group can contain + * snapshots of multiple files. Referenced from [SubtasksTable.snapshotIdBeforeWrite]. + */ +object SnapshotGroupsTable : Table("snapshot_groups") { + val id = varchar("id", 36).clientDefault { UUID.randomUUID().toString() } + val taskId = varchar("task_id", 36).references(TasksTable.id, onDelete = ReferenceOption.CASCADE) + + // Plain column (no FK) to avoid circular FK headaches with subtasks table. + // Referential integrity in the other direction: subtasks.snapshot_id_before_write -> snapshot_groups.id. + val subtaskId = varchar("subtask_id", 36).nullable() + + val createdAt = long("created_at").clientDefault { System.currentTimeMillis() } + + override val primaryKey = PrimaryKey(id) + + init { + index("idx_snapshot_groups_task", false, taskId, createdAt) + index("idx_snapshot_groups_subtask", false, subtaskId) + } +} + +data class SnapshotGroup( + val id: String, + val taskId: String, + val subtaskId: String?, + val createdAt: Long +) diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/SnapshotsTable.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/SnapshotsTable.kt index f4c744b3..83b1ffa8 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/db/SnapshotsTable.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/db/SnapshotsTable.kt @@ -5,27 +5,27 @@ import java.util.UUID /** * Snapshots table definition using Exposed ORM DSL - * Stores file content snapshots for rollback capability + * Stores file content snapshots for rollback capability. + * Each snapshot belongs to a [SnapshotGroupsTable] (one group per tool execution). */ object SnapshotsTable : Table("snapshots") { val id = varchar("id", 36).clientDefault { UUID.randomUUID().toString() } val taskId = varchar("task_id", 36).references(TasksTable.id, onDelete = ReferenceOption.CASCADE) - val subtaskId = varchar("subtask_id", 36).references(SubtasksTable.id, onDelete = ReferenceOption.CASCADE).nullable() + val groupId = varchar("group_id", 36).references(SnapshotGroupsTable.id, onDelete = ReferenceOption.CASCADE) - val filePath = text("file_path") // Absolute or relative path within sandbox - val contentHash = varchar("content_hash", 64) // SHA-256 hash - val contentCompressed = blob("content_compressed") // zlib compressed content - val originalSize = long("original_size") // Size in bytes before compression - val compressionRatio = double("compression_ratio").default(1.0) // originalSize / compressedSize + val filePath = text("file_path") + val contentHash = varchar("content_hash", 64) + val contentCompressed = blob("content_compressed") + val originalSize = long("original_size") + val compressionRatio = double("compression_ratio").default(1.0) val createdAt = long("created_at").clientDefault { System.currentTimeMillis() } override val primaryKey = PrimaryKey(id) init { - // Index for efficient retrieval of snapshots by task index("idx_snapshots_task", false, taskId, createdAt) - // Index for finding snapshots by file path + index("idx_snapshots_group", false, groupId) index("idx_snapshots_file_hash", false, filePath, contentHash) } } @@ -36,7 +36,7 @@ object SnapshotsTable : Table("snapshots") { data class Snapshot( val id: String, val taskId: String, - val subtaskId: String?, + val groupId: String, val filePath: String, val contentHash: String, val contentCompressed: ByteArray, diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/SubtasksTable.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/SubtasksTable.kt index 702bd411..0d037ad1 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/db/SubtasksTable.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/db/SubtasksTable.kt @@ -38,8 +38,8 @@ object SubtasksTable : Table("subtasks") { val costUsd = double("cost_usd").default(0.0) val latencyMs = integer("latency_ms").default(0) - // Snapshots for rollback - val snapshotIdBeforeWrite = varchar("snapshot_id_before_write", 36).references(SnapshotsTable.id, onDelete = ReferenceOption.SET_NULL).nullable() + // Snapshots for rollback — references a snapshot group (set of files captured before one tool execution) + val snapshotIdBeforeWrite = varchar("snapshot_id_before_write", 36).references(SnapshotGroupsTable.id, onDelete = ReferenceOption.SET_NULL).nullable() // Timestamps val createdAt = long("created_at").clientDefault { System.currentTimeMillis() } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/MigrationRunner.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/MigrationRunner.kt index c8e5a1a6..d2ef6ac4 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/MigrationRunner.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/MigrationRunner.kt @@ -18,6 +18,7 @@ object MigrationRunner { // Seed data was for UI development only. Existing databases retain v1 data. V2DropAgentEventsSessionFk(), V3RenameSlashCommandToSlashPrompt(), + V4SnapshotGroups(), ) fun run(database: Database) { diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/SqliteIntrospection.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/SqliteIntrospection.kt new file mode 100644 index 00000000..e558ca45 --- /dev/null +++ b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/SqliteIntrospection.kt @@ -0,0 +1,24 @@ +package pl.jclab.refio.core.db.migrations + +import java.sql.Connection + +internal fun tableExists(jdbc: Connection, name: String): Boolean { + jdbc.prepareStatement( + "SELECT 1 FROM sqlite_master WHERE type='table' AND name = ?" + ).use { ps -> + ps.setString(1, name) + ps.executeQuery().use { rs -> return rs.next() } + } +} + +internal fun columnExists(jdbc: Connection, table: String, column: String): Boolean { + if (!tableExists(jdbc, table)) return false + jdbc.createStatement().use { st -> + st.executeQuery("PRAGMA table_info(\"$table\")").use { rs -> + while (rs.next()) { + if (rs.getString("name").equals(column, ignoreCase = true)) return true + } + } + } + return false +} diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V2DropAgentEventsSessionFk.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V2DropAgentEventsSessionFk.kt index a2dd36ee..1e4b30f8 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V2DropAgentEventsSessionFk.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V2DropAgentEventsSessionFk.kt @@ -27,6 +27,10 @@ class V2DropAgentEventsSessionFk : Migration { override fun migrate(database: Database) { transaction(database) { val jdbc = (connection.connection as java.sql.Connection) + if (!tableExists(jdbc, "agent_events")) { + logger.info { "agent_events table does not exist (fresh DB); skipping V2 rebuild" } + return@transaction + } jdbc.createStatement().use { st -> st.execute("PRAGMA foreign_keys = OFF") try { diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V3RenameSlashCommandToSlashPrompt.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V3RenameSlashCommandToSlashPrompt.kt index 82900747..5df7f138 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V3RenameSlashCommandToSlashPrompt.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V3RenameSlashCommandToSlashPrompt.kt @@ -20,6 +20,10 @@ class V3RenameSlashCommandToSlashPrompt : Migration { override fun migrate(database: Database) { transaction(database) { val jdbc = (connection.connection as java.sql.Connection) + if (!tableExists(jdbc, "prompts")) { + logger.info { "prompts table does not exist (fresh DB); skipping V3 rename" } + return@transaction + } jdbc.createStatement().use { st -> val updated = st.executeUpdate( "UPDATE prompts SET type = 'SLASH_PROMPT' WHERE type = 'SLASH_COMMAND'" diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V4SnapshotGroups.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V4SnapshotGroups.kt new file mode 100644 index 00000000..c9f14f0c --- /dev/null +++ b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V4SnapshotGroups.kt @@ -0,0 +1,204 @@ +package pl.jclab.refio.core.db.migrations + +import org.jetbrains.exposed.sql.Database +import org.jetbrains.exposed.sql.transactions.transaction +import pl.jclab.refio.core.logging.dualLogger +import java.util.UUID + +private val logger = dualLogger("V4SnapshotGroups") + +/** + * Introduces `snapshot_groups` and rewires FKs to fix the snapshot wiring: + * + * - `snapshots.subtask_id` (FK -> subtasks.id) becomes `snapshots.group_id` (FK -> snapshot_groups.id). + * - `subtasks.snapshot_id_before_write` previously declared FK -> snapshots.id; the runtime + * always tried to store a subtaskId there, so every link attempt failed the FK constraint + * and the column stayed NULL. We point the FK at snapshot_groups.id instead. + * + * SQLite cannot alter FKs in place, so affected tables are rebuilt. Existing snapshots are + * migrated by creating one group per distinct `subtask_id` (subtask_id is preserved on the + * group for traceability). + */ +class V4SnapshotGroups : Migration { + override val version: Int = 4 + + override fun migrate(database: Database) { + transaction(database) { + val jdbc = connection.connection as java.sql.Connection + // Fresh DB: snapshots is created later with new schema, nothing to migrate. + if (!tableExists(jdbc, "snapshots") || !columnExists(jdbc, "snapshots", "subtask_id")) { + logger.info { "snapshots table absent or already migrated; skipping V4" } + return@transaction + } + jdbc.createStatement().use { st -> + st.execute("PRAGMA foreign_keys = OFF") + try { + // 1) Create snapshot_groups if not present + st.execute( + """ + CREATE TABLE IF NOT EXISTS snapshot_groups ( + id VARCHAR(36) PRIMARY KEY, + task_id VARCHAR(36) NOT NULL, + subtask_id VARCHAR(36), + created_at BIGINT NOT NULL, + FOREIGN KEY (task_id) REFERENCES tasks(id) ON DELETE CASCADE + ) + """.trimIndent() + ) + st.execute("CREATE INDEX IF NOT EXISTS idx_snapshot_groups_task ON snapshot_groups(task_id, created_at)") + st.execute("CREATE INDEX IF NOT EXISTS idx_snapshot_groups_subtask ON snapshot_groups(subtask_id)") + + // 2) Create groups for existing snapshots. + // One group per distinct (task_id, subtask_id) — NULL subtask_id also forms its own group per task. + val existingSnapshots = mutableListOf>() + jdbc.prepareStatement( + "SELECT DISTINCT task_id, subtask_id, MIN(created_at) FROM snapshots GROUP BY task_id, subtask_id" + ).use { ps -> + ps.executeQuery().use { rs -> + while (rs.next()) { + existingSnapshots.add( + Triple(rs.getString(1), rs.getString(2), rs.getLong(3)) + ) + } + } + } + + val groupIdBySubtaskKey = mutableMapOf, String>() + jdbc.prepareStatement( + "INSERT INTO snapshot_groups(id, task_id, subtask_id, created_at) VALUES (?, ?, ?, ?)" + ).use { ins -> + for ((taskId, subtaskId, createdAt) in existingSnapshots) { + val groupId = UUID.randomUUID().toString() + groupIdBySubtaskKey[taskId to subtaskId] = groupId + ins.setString(1, groupId) + ins.setString(2, taskId) + if (subtaskId == null) ins.setNull(3, java.sql.Types.VARCHAR) else ins.setString(3, subtaskId) + ins.setLong(4, createdAt) + ins.addBatch() + } + ins.executeBatch() + } + + // 3) Rebuild `snapshots` with group_id instead of subtask_id + st.execute( + """ + CREATE TABLE snapshots_new ( + id VARCHAR(36) PRIMARY KEY, + task_id VARCHAR(36) NOT NULL, + group_id VARCHAR(36) NOT NULL, + file_path TEXT NOT NULL, + content_hash VARCHAR(64) NOT NULL, + content_compressed BLOB NOT NULL, + original_size BIGINT NOT NULL, + compression_ratio DOUBLE NOT NULL DEFAULT 1.0, + created_at BIGINT NOT NULL, + FOREIGN KEY (task_id) REFERENCES tasks(id) ON DELETE CASCADE, + FOREIGN KEY (group_id) REFERENCES snapshot_groups(id) ON DELETE CASCADE + ) + """.trimIndent() + ) + + // Copy rows, resolving group_id from the (task_id, subtask_id) pair + jdbc.prepareStatement( + "SELECT id, task_id, subtask_id, file_path, content_hash, content_compressed, original_size, compression_ratio, created_at FROM snapshots" + ).use { ps -> + jdbc.prepareStatement( + "INSERT INTO snapshots_new(id, task_id, group_id, file_path, content_hash, content_compressed, original_size, compression_ratio, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)" + ).use { ins -> + ps.executeQuery().use { rs -> + while (rs.next()) { + val id = rs.getString(1) + val taskId = rs.getString(2) + val subtaskId = rs.getString(3) // may be null + val groupId = groupIdBySubtaskKey[taskId to subtaskId] + ?: continue // orphan; shouldn't happen, skip safely + ins.setString(1, id) + ins.setString(2, taskId) + ins.setString(3, groupId) + ins.setString(4, rs.getString(4)) + ins.setString(5, rs.getString(5)) + ins.setBytes(6, rs.getBytes(6)) + ins.setLong(7, rs.getLong(7)) + ins.setDouble(8, rs.getDouble(8)) + ins.setLong(9, rs.getLong(9)) + ins.addBatch() + } + ins.executeBatch() + } + } + } + + st.execute("DROP TABLE snapshots") + st.execute("ALTER TABLE snapshots_new RENAME TO snapshots") + st.execute("CREATE INDEX IF NOT EXISTS idx_snapshots_task ON snapshots(task_id, created_at)") + st.execute("CREATE INDEX IF NOT EXISTS idx_snapshots_group ON snapshots(group_id)") + st.execute("CREATE INDEX IF NOT EXISTS idx_snapshots_file_hash ON snapshots(file_path, content_hash)") + + // 4) Rebuild `subtasks` to change FK of snapshot_id_before_write. + // Pre-existing values are effectively all NULL (FK always failed), so we can also + // NULL them defensively during the rebuild. + st.execute( + """ + CREATE TABLE subtasks_new ( + id VARCHAR(36) PRIMARY KEY, + task_id VARCHAR(36) NOT NULL, + order_index INT NOT NULL, + kind VARCHAR(32) NOT NULL, + status VARCHAR(16) NOT NULL DEFAULT 'NEW', + description TEXT NOT NULL, + params_json TEXT, + step_plan_json TEXT, + summary TEXT, + requires_approval INT NOT NULL DEFAULT 0, + approval_status VARCHAR(32) NOT NULL DEFAULT 'NOT_REQUIRED', + approved_at BIGINT, + result TEXT, + error_message TEXT, + error_stacktrace TEXT, + llm_model VARCHAR(64), + llm_provider VARCHAR(32), + input_tokens INT NOT NULL DEFAULT 0, + output_tokens INT NOT NULL DEFAULT 0, + cost_usd DOUBLE NOT NULL DEFAULT 0.0, + latency_ms INT NOT NULL DEFAULT 0, + snapshot_id_before_write VARCHAR(36), + created_at BIGINT NOT NULL, + updated_at BIGINT NOT NULL, + started_at BIGINT, + completed_at BIGINT, + FOREIGN KEY (task_id) REFERENCES tasks(id) ON DELETE CASCADE, + FOREIGN KEY (snapshot_id_before_write) REFERENCES snapshot_groups(id) ON DELETE SET NULL, + UNIQUE (task_id, order_index) + ) + """.trimIndent() + ) + + st.execute( + """ + INSERT INTO subtasks_new + (id, task_id, order_index, kind, status, description, params_json, step_plan_json, + summary, requires_approval, approval_status, approved_at, result, error_message, + error_stacktrace, llm_model, llm_provider, input_tokens, output_tokens, cost_usd, + latency_ms, snapshot_id_before_write, created_at, updated_at, started_at, completed_at) + SELECT + id, task_id, order_index, kind, status, description, params_json, step_plan_json, + summary, requires_approval, approval_status, approved_at, result, error_message, + error_stacktrace, llm_model, llm_provider, input_tokens, output_tokens, cost_usd, + latency_ms, NULL, created_at, updated_at, started_at, completed_at + FROM subtasks + """.trimIndent() + ) + + st.execute("DROP TABLE subtasks") + st.execute("ALTER TABLE subtasks_new RENAME TO subtasks") + + logger.info { + "Migrated ${existingSnapshots.size} snapshot group(s); rebuilt snapshots + subtasks with corrected FKs" + } + } finally { + st.execute("PRAGMA foreign_keys = ON") + } + } + } + } +} diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/RagRepository.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/RagRepository.kt index 58194de2..80b77e8d 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/RagRepository.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/RagRepository.kt @@ -296,20 +296,27 @@ class RagRepository { // ========== Embeddings Operations ========== /** - * Create embedding + * Create embedding. + * + * Uses INSERT OR IGNORE: if the parent chunk has been concurrently deleted + * (FK violation) or the (chunkId, model) unique key already exists, the + * insert is silently skipped and `null` is returned. This avoids spamming + * the log with `SQLITE_CONSTRAINT_FOREIGNKEY` exceptions during concurrent + * RAG index refreshes (FileAnalyzerService re-indexes while another caller + * may have evicted the file row). */ fun createEmbedding( chunkId: Int, model: String, vector: ByteArray, dimensions: Int - ): Int = transaction { - EmbeddingsTable.insertAndGetId { + ): Int? = transaction { + EmbeddingsTable.insertIgnoreAndGetId { it[EmbeddingsTable.chunkId] = chunkId it[EmbeddingsTable.model] = model it[EmbeddingsTable.vector] = ExposedBlob(vector) it[EmbeddingsTable.dimensions] = dimensions - }.value + }?.value } /** diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/SnapshotGroupRepository.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/SnapshotGroupRepository.kt new file mode 100644 index 00000000..ca001ac1 --- /dev/null +++ b/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/SnapshotGroupRepository.kt @@ -0,0 +1,51 @@ +package pl.jclab.refio.core.db.repositories + +import org.jetbrains.exposed.sql.* +import org.jetbrains.exposed.sql.transactions.transaction +import pl.jclab.refio.core.db.SnapshotGroup +import pl.jclab.refio.core.db.SnapshotGroupsTable +import pl.jclab.refio.core.logging.dualLogger + +private val logger = dualLogger("SnapshotGroupRepository") + +/** + * Repository for [SnapshotGroupsTable]. A group aggregates the file snapshots + * captured before a single tool execution. + */ +class SnapshotGroupRepository { + + fun create(taskId: String, subtaskId: String?): SnapshotGroup { + return transaction { + val id = SnapshotGroupsTable.insert { + it[SnapshotGroupsTable.taskId] = taskId + it[SnapshotGroupsTable.subtaskId] = subtaskId + } get SnapshotGroupsTable.id + + logger.info { "Created snapshot group: id=$id, taskId=$taskId, subtaskId=$subtaskId" } + findById(id) ?: error("Failed to retrieve created snapshot group $id") + } + } + + fun findById(id: String): SnapshotGroup? = transaction { + SnapshotGroupsTable.selectAll() + .where { SnapshotGroupsTable.id eq id } + .map(::toGroup) + .singleOrNull() + } + + fun findBySubtaskId(subtaskId: String): SnapshotGroup? = transaction { + SnapshotGroupsTable.selectAll() + .where { SnapshotGroupsTable.subtaskId eq subtaskId } + .orderBy(SnapshotGroupsTable.createdAt to SortOrder.DESC) + .limit(1) + .map(::toGroup) + .singleOrNull() + } + + private fun toGroup(row: ResultRow) = SnapshotGroup( + id = row[SnapshotGroupsTable.id], + taskId = row[SnapshotGroupsTable.taskId], + subtaskId = row[SnapshotGroupsTable.subtaskId], + createdAt = row[SnapshotGroupsTable.createdAt] + ) +} diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/SnapshotRepository.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/SnapshotRepository.kt index 411d5e92..7d56f46c 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/SnapshotRepository.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/SnapshotRepository.kt @@ -24,7 +24,7 @@ class SnapshotRepository { */ fun create( taskId: String, - subtaskId: String? = null, + groupId: String, filePath: String, content: String, contentHash: String @@ -36,7 +36,7 @@ class SnapshotRepository { val snapshotId = SnapshotsTable.insert { it[SnapshotsTable.taskId] = taskId - it[SnapshotsTable.subtaskId] = subtaskId + it[SnapshotsTable.groupId] = groupId it[SnapshotsTable.filePath] = filePath it[SnapshotsTable.contentHash] = contentHash it[contentCompressed] = ExposedBlob(compressed) @@ -78,12 +78,12 @@ class SnapshotRepository { } /** - * Find snapshots for a specific subtask + * Find snapshots belonging to a snapshot group (one group per tool execution). */ - fun findBySubtaskId(subtaskId: String): List { + fun findByGroupId(groupId: String): List { return transaction { SnapshotsTable.selectAll() - .where { SnapshotsTable.subtaskId eq subtaskId } + .where { SnapshotsTable.groupId eq groupId } .orderBy(SnapshotsTable.createdAt to SortOrder.DESC) .map { rowToSnapshot(it) } } @@ -205,7 +205,7 @@ class SnapshotRepository { return Snapshot( id = row[SnapshotsTable.id], taskId = row[SnapshotsTable.taskId], - subtaskId = row[SnapshotsTable.subtaskId], + groupId = row[SnapshotsTable.groupId], filePath = row[SnapshotsTable.filePath], contentHash = row[SnapshotsTable.contentHash], contentCompressed = row[SnapshotsTable.contentCompressed].bytes, diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/Base.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/Base.kt index 01a6ac01..7c6b9277 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/llm/Base.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/Base.kt @@ -56,6 +56,33 @@ data class LLMUsage( val totalTokens: Int ) +/** + * Single tool invocation emitted by the LLM via native function-calling API. + * + * Unified across providers: OpenAI tool_calls[], Anthropic content[].tool_use, + * Ollama message.tool_calls[]. Each adapter maps its provider-specific shape to this + * type before returning from chat(). + * + * @property id Stable call identifier. Ollama omits it so adapters generate UUID. + * @property name Tool name as registered in ToolRegistry. + * @property argumentsJson Raw JSON string of arguments. AgentTurnLoop parses this. + */ +data class NativeToolCall( + val id: String, + val name: String, + val argumentsJson: String, +) + +/** + * Incremental tool-call delta for streaming. + */ +data class NativeToolCallDelta( + val index: Int, + val idDelta: String? = null, + val nameDelta: String? = null, + val argumentsDelta: String? = null, +) + /** * Single chunk from streaming LLM response (US-027) */ @@ -63,7 +90,8 @@ data class StreamChunk( val delta: String, // Incremental content val thinking: String? = null, // Incremental thinking content (reasoning models) val finishReason: String? = null, // "stop", "length", "cancelled", etc. - val usage: LLMUsage? = null // Present only on final chunk + val usage: LLMUsage? = null, // Present only on final chunk + val toolCallDelta: NativeToolCallDelta? = null // Present only on native-tools streaming path ) /** @@ -77,9 +105,28 @@ data class LLMResponse( val cost: Double, // Estimated cost in USD val finishReason: String? = null, val rawResponse: Map? = null, // Allow nullable values in response map - val thinking: String? = null // Complete thinking process (reasoning models) + val thinking: String? = null, // Complete thinking process (reasoning models) + /** + * Tool calls emitted via native function-calling API. + * + * - null = adapter did NOT use native tools path (fall back to JSON-in-text parsing) + * - emptyList() = native tools path was used, but model chose not to call any tool + * - non-empty = AgentTurnLoop should use these directly, skipping ToolCallParser + * + * Adapters MUST leave this null when `tools` was not passed in the request. + */ + val nativeToolCalls: List? = null, ) +/** + * Thrown by adapters when a provider rejects a request because native tool calling is + * not supported (e.g. HTTP 400 with "tool_choice"/"tools" in the error message). + * + * Caught by AgentTurnLoop to mark the model in [NativeToolsFallbackTracker] and retry + * without tools on the JSON-in-text path. + */ +class ToolsNotSupportedException(message: String, cause: Throwable? = null) : Exception(message, cause) + /** * Abstract base class for LLM adapters. * diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/JsonExtractor.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/JsonExtractor.kt index b7d54ec3..cf146bdf 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/llm/JsonExtractor.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/JsonExtractor.kt @@ -95,6 +95,23 @@ object JsonExtractor { return rawJson } + // Turn-loop envelope: {"response": "...", "actions": [{"tool": ..., "args": ...}]} + // Treat each action as a subtask so downstream plan-extraction code works uniformly. + // This is the canonical shape emitted by PLAN/AGENT modes on the JSON-in-text path + // and by the native-tools serializer. + if (rawJson.containsKey("actions") && rawJson["actions"] is List<*>) { + logger.info { "[JSON] Normalizing envelope: 'actions' array → 'subtasks'" } + val subtasks = normalizeStepsArray(rawJson["actions"]) + val planDescription = rawJson["response"]?.toString() + ?: rawJson["plan"]?.toString() + ?: rawJson["summary"]?.toString() + ?: "Execution plan" + return mapOf( + "plan" to planDescription, + "subtasks" to subtasks + ) + } + // Helper function to find key case-insensitively fun findKey(vararg keys: String): Pair? { for (key in keys) { diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/LLMClient.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/LLMClient.kt index 841f04fc..8f2730fe 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/llm/LLMClient.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/LLMClient.kt @@ -364,11 +364,14 @@ class LLMClient( kwargs = adapterKwargs ) - // For streaming mode, build response from accumulated data + // For streaming mode, build response from accumulated data while preserving + // adapter-supplied fields (nativeToolCalls, thinking, rawResponse). Dropping + // nativeToolCalls here would force AgentTurnLoop back into JSON-in-text parsing + // even when the adapter successfully parsed the native tool_calls stream. val finalResponse = if (stream && contentBuilder.isNotEmpty()) { val usage = finalUsage ?: response.usage val cost = estimateCost(usage, provider, model) - LLMResponse( + response.copy( content = contentBuilder.toString(), usage = usage, model = model, diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/ModelDefinitions.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/ModelDefinitions.kt index ca6e686d..682bef83 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/llm/ModelDefinitions.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/ModelDefinitions.kt @@ -71,6 +71,40 @@ object ModelDefinitions { */ val OPENAI_MODELS = mapOf( + // + // GPT 5.5 + // + "gpt-5.5" to ModelDefinition( + id = "gpt-5.5", + name = "GPT-5.5", + provider = "openai", + description = "Best intelligence at scale for agentic, coding, and professional workflows", + capabilities = listOf( + ModelCapability.CHAT_COMPLETION, + ModelCapability.TEXT_COMPLETION, + ModelCapability.VISION, + ), + modelType = ModelType.TEXT, + maxContext = 1_050_000, + maxOutputTokens = 128_000, + costPer1MInput = 2.50, + costPer1MOutput = 15.00, + supportsVision = true, + supportsReasoning = true, + supportsStreaming = true, + supportsFunctionCalling = true, + supportsThinking = true, + reasoningTokensMultiplier = 2.5, + endpointType = ApiEndpointType.CHAT_COMPLETIONS, + apiFormat = ApiFormat.CHAT_COMPLETIONS, + paramMappings = mapOf( + "max_tokens" to "max_completion_tokens" + ), + removeParams = listOf( + "temperature" + ), + active = true + ), // // GPT 5.4 // @@ -135,6 +169,36 @@ object ModelDefinitions { ), active = true ), + "gpt-5.4-nano" to ModelDefinition( + id = "gpt-5.4-nano", + name = "GPT-5.4 Nano", + provider = "openai", + description = "Ultra cost-effective model for simple tasks", + capabilities = listOf( + ModelCapability.CHAT_COMPLETION, + ModelCapability.TEXT_COMPLETION, + ModelCapability.VISION, + ), + modelType = ModelType.TEXT, + maxContext = 128_000, + maxOutputTokens = 32_768, + costPer1MInput = 0.05, + costPer1MOutput = 0.40, + supportsVision = false, + supportsReasoning = true, + supportsStreaming = true, + supportsFunctionCalling = true, + paramMappings = mapOf( + "max_tokens" to "max_completion_tokens" + ), + removeParams = listOf( + "frequency_penalty", + "presence_penalty", + "top_p", + "temperature" + ), + active = true + ), // // GPT 5.2 @@ -1225,6 +1289,7 @@ object ModelDefinitions { supportsStreaming = true, supportsFunctionCalling = true, supportsThinking = true, + removeParams = listOf("temperature"), active = true ), @@ -1248,6 +1313,7 @@ object ModelDefinitions { supportsStreaming = true, supportsFunctionCalling = true, supportsThinking = true, + removeParams = listOf("temperature"), active = true ), @@ -2879,6 +2945,283 @@ object ModelDefinitions { active = true ), + "qwen3.6:35b" to ModelDefinition( + id = "qwen3.6:35b", + name = "Qwen 3.6 35B", + provider = "ollama", + description = "Qwen 3.6 35B multimodal model with 256K context and native tool use", + capabilities = listOf( + ModelCapability.CHAT_COMPLETION, + ModelCapability.VISION, + ModelCapability.TOOL_USE + ), + modelType = ModelType.MULTIMODAL, + maxContext = 256_000, + maxOutputTokens = null, + costPer1MInput = 0.0, + costPer1MOutput = 0.0, + supportsVision = true, + supportsReasoning = true, + supportsStreaming = true, + supportsFunctionCalling = true, + defaultParams = mapOf("temperature" to 0.7), + active = true + ), + + + "qwen3.6:27b" to ModelDefinition( + id = "qwen3.6:27b", + name = "Qwen 3.6 27B", + provider = "ollama", + description = "Qwen 3.6 27B multimodal model with 256K context and native tool use", + capabilities = listOf( + ModelCapability.CHAT_COMPLETION, + ModelCapability.VISION, + ModelCapability.TOOL_USE + ), + modelType = ModelType.MULTIMODAL, + maxContext = 256_000, + maxOutputTokens = null, + costPer1MInput = 0.0, + costPer1MOutput = 0.0, + supportsVision = true, + supportsReasoning = true, + supportsStreaming = true, + supportsFunctionCalling = true, + defaultParams = mapOf("temperature" to 0.7), + active = true + ), + + "qwen3-next:80b" to ModelDefinition( + id = "qwen3-next:80b", + name = "Qwen 3 Next 80B", + provider = "ollama", + description = "Qwen 3 Next 80B with native tool use and long context", + capabilities = listOf( + ModelCapability.CHAT_COMPLETION, + ModelCapability.TEXT_COMPLETION, + ModelCapability.TOOL_USE + ), + modelType = ModelType.TEXT, + maxContext = 128_000, + maxOutputTokens = null, + costPer1MInput = 0.0, + costPer1MOutput = 0.0, + supportsVision = false, + supportsReasoning = true, + supportsStreaming = true, + supportsFunctionCalling = true, + defaultParams = mapOf("temperature" to 0.7), + active = true + ), + + "gpt-oss-safeguard:20b" to ModelDefinition( + id = "gpt-oss-safeguard:20b", + name = "GPT-OSS Safeguard 20B", + provider = "ollama", + description = "OpenAI gpt-oss-safeguard 20B with native tool use", + capabilities = listOf( + ModelCapability.CHAT_COMPLETION, + ModelCapability.TEXT_COMPLETION, + ModelCapability.TOOL_USE + ), + modelType = ModelType.TEXT, + maxContext = 128_000, + maxOutputTokens = null, + costPer1MInput = 0.0, + costPer1MOutput = 0.0, + supportsVision = false, + supportsReasoning = true, + supportsStreaming = true, + supportsFunctionCalling = true, + defaultParams = mapOf("temperature" to 0.7), + active = true + ), + + "gpt-oss-safeguard:120b" to ModelDefinition( + id = "gpt-oss-safeguard:120b", + name = "GPT-OSS Safeguard 120B", + provider = "ollama", + description = "OpenAI gpt-oss-safeguard 120B with native tool use", + capabilities = listOf( + ModelCapability.CHAT_COMPLETION, + ModelCapability.TEXT_COMPLETION, + ModelCapability.TOOL_USE + ), + modelType = ModelType.TEXT, + maxContext = 128_000, + maxOutputTokens = null, + costPer1MInput = 0.0, + costPer1MOutput = 0.0, + supportsVision = false, + supportsReasoning = true, + supportsStreaming = true, + supportsFunctionCalling = true, + defaultParams = mapOf("temperature" to 0.7), + active = true + ), + + "deepseek-v3.2:latest" to ModelDefinition( + id = "deepseek-v3.2:latest", + name = "DeepSeek V3.2", + provider = "ollama", + description = "DeepSeek V3.2 with native tool use and reasoning", + capabilities = listOf( + ModelCapability.CHAT_COMPLETION, + ModelCapability.TEXT_COMPLETION, + ModelCapability.TOOL_USE + ), + modelType = ModelType.TEXT, + maxContext = 128_000, + maxOutputTokens = null, + costPer1MInput = 0.0, + costPer1MOutput = 0.0, + supportsVision = false, + supportsReasoning = true, + supportsStreaming = true, + supportsFunctionCalling = true, + defaultParams = mapOf("temperature" to 0.7), + active = true + ), + + "kimi-k2-thinking:latest" to ModelDefinition( + id = "kimi-k2-thinking:latest", + name = "Kimi K2 Thinking", + provider = "ollama", + description = "Moonshot Kimi K2 Thinking with native tool use", + capabilities = listOf( + ModelCapability.CHAT_COMPLETION, + ModelCapability.TEXT_COMPLETION, + ModelCapability.TOOL_USE + ), + modelType = ModelType.TEXT, + maxContext = 200_000, + maxOutputTokens = null, + costPer1MInput = 0.0, + costPer1MOutput = 0.0, + supportsVision = false, + supportsReasoning = true, + supportsStreaming = true, + supportsFunctionCalling = true, + defaultParams = mapOf("temperature" to 0.7), + active = true + ), + + "minimax-m2:latest" to ModelDefinition( + id = "minimax-m2:latest", + name = "MiniMax M2", + provider = "ollama", + description = "MiniMax M2 with native tool use", + capabilities = listOf( + ModelCapability.CHAT_COMPLETION, + ModelCapability.TEXT_COMPLETION, + ModelCapability.TOOL_USE + ), + modelType = ModelType.TEXT, + maxContext = 128_000, + maxOutputTokens = null, + costPer1MInput = 0.0, + costPer1MOutput = 0.0, + supportsVision = false, + supportsReasoning = true, + supportsStreaming = true, + supportsFunctionCalling = true, + defaultParams = mapOf("temperature" to 0.7), + active = true + ), + + "nemotron-3-super:120b" to ModelDefinition( + id = "nemotron-3-super:120b", + name = "Nemotron 3 Super 120B", + provider = "ollama", + description = "NVIDIA Nemotron 3 Super 120B with native tool use", + capabilities = listOf( + ModelCapability.CHAT_COMPLETION, + ModelCapability.TEXT_COMPLETION, + ModelCapability.TOOL_USE + ), + modelType = ModelType.TEXT, + maxContext = 128_000, + maxOutputTokens = null, + costPer1MInput = 0.0, + costPer1MOutput = 0.0, + supportsVision = false, + supportsReasoning = true, + supportsStreaming = true, + supportsFunctionCalling = true, + defaultParams = mapOf("temperature" to 0.7), + active = true + ), + + "nemotron-3-nano:4b" to ModelDefinition( + id = "nemotron-3-nano:4b", + name = "Nemotron 3 Nano 4B", + provider = "ollama", + description = "NVIDIA Nemotron 3 Nano 4B with native tool use", + capabilities = listOf( + ModelCapability.CHAT_COMPLETION, + ModelCapability.TEXT_COMPLETION, + ModelCapability.TOOL_USE + ), + modelType = ModelType.TEXT, + maxContext = 128_000, + maxOutputTokens = null, + costPer1MInput = 0.0, + costPer1MOutput = 0.0, + supportsVision = false, + supportsReasoning = true, + supportsStreaming = true, + supportsFunctionCalling = true, + defaultParams = mapOf("temperature" to 0.7), + active = true + ), + + "nemotron-3-nano:30b" to ModelDefinition( + id = "nemotron-3-nano:30b", + name = "Nemotron 3 Nano 30B", + provider = "ollama", + description = "NVIDIA Nemotron 3 Nano 30B with native tool use", + capabilities = listOf( + ModelCapability.CHAT_COMPLETION, + ModelCapability.TEXT_COMPLETION, + ModelCapability.TOOL_USE + ), + modelType = ModelType.TEXT, + maxContext = 128_000, + maxOutputTokens = null, + costPer1MInput = 0.0, + costPer1MOutput = 0.0, + supportsVision = false, + supportsReasoning = true, + supportsStreaming = true, + supportsFunctionCalling = true, + defaultParams = mapOf("temperature" to 0.7), + active = true + ), + + "nemotron-cascade-2:30b" to ModelDefinition( + id = "nemotron-cascade-2:30b", + name = "Nemotron Cascade 2 30B", + provider = "ollama", + description = "NVIDIA Nemotron Cascade 2 30B with native tool use", + capabilities = listOf( + ModelCapability.CHAT_COMPLETION, + ModelCapability.TEXT_COMPLETION, + ModelCapability.TOOL_USE + ), + modelType = ModelType.TEXT, + maxContext = 128_000, + maxOutputTokens = null, + costPer1MInput = 0.0, + costPer1MOutput = 0.0, + supportsVision = false, + supportsReasoning = true, + supportsStreaming = true, + supportsFunctionCalling = true, + defaultParams = mapOf("temperature" to 0.7), + active = true + ), + "qwen3-coder:30b" to ModelDefinition( id = "qwen3-coder:30b", name = "Qwen 3 Coder 30B", @@ -3007,8 +3350,7 @@ object ModelDefinitions { description = "Compact reasoning model with excellent efficiency", capabilities = listOf( ModelCapability.CHAT_COMPLETION, - ModelCapability.REASONING, - ModelCapability.TOOL_USE + ModelCapability.REASONING ), modelType = ModelType.TEXT, maxContext = 64_000, @@ -3018,7 +3360,7 @@ object ModelDefinitions { supportsVision = false, supportsReasoning = true, supportsStreaming = true, - supportsFunctionCalling = true, + supportsFunctionCalling = false, defaultParams = mapOf("temperature" to 0.7), active = true ), @@ -3030,8 +3372,7 @@ object ModelDefinitions { description = "Open reasoning model approaching O3/Gemini 2.5 Pro performance", capabilities = listOf( ModelCapability.CHAT_COMPLETION, - ModelCapability.REASONING, - ModelCapability.TOOL_USE + ModelCapability.REASONING ), modelType = ModelType.TEXT, maxContext = 64_000, @@ -3041,7 +3382,7 @@ object ModelDefinitions { supportsVision = false, supportsReasoning = true, supportsStreaming = true, - supportsFunctionCalling = true, + supportsFunctionCalling = false, defaultParams = mapOf("temperature" to 0.7), active = true ), @@ -3053,8 +3394,7 @@ object ModelDefinitions { description = "High-performance reasoning model", capabilities = listOf( ModelCapability.CHAT_COMPLETION, - ModelCapability.REASONING, - ModelCapability.TOOL_USE + ModelCapability.REASONING ), modelType = ModelType.TEXT, maxContext = 64_000, @@ -3064,7 +3404,7 @@ object ModelDefinitions { supportsVision = false, supportsReasoning = true, supportsStreaming = true, - supportsFunctionCalling = true, + supportsFunctionCalling = false, defaultParams = mapOf("temperature" to 0.7), active = true ), @@ -3076,8 +3416,7 @@ object ModelDefinitions { description = "Large reasoning model with superior capabilities", capabilities = listOf( ModelCapability.CHAT_COMPLETION, - ModelCapability.REASONING, - ModelCapability.TOOL_USE + ModelCapability.REASONING ), modelType = ModelType.TEXT, maxContext = 64_000, @@ -3087,7 +3426,7 @@ object ModelDefinitions { supportsVision = false, supportsReasoning = true, supportsStreaming = true, - supportsFunctionCalling = true, + supportsFunctionCalling = false, defaultParams = mapOf("temperature" to 0.7), active = true ), @@ -3099,8 +3438,7 @@ object ModelDefinitions { description = "Very large reasoning model with excellent performance", capabilities = listOf( ModelCapability.CHAT_COMPLETION, - ModelCapability.REASONING, - ModelCapability.TOOL_USE + ModelCapability.REASONING ), modelType = ModelType.TEXT, maxContext = 64_000, @@ -3110,7 +3448,7 @@ object ModelDefinitions { supportsVision = false, supportsReasoning = true, supportsStreaming = true, - supportsFunctionCalling = true, + supportsFunctionCalling = false, defaultParams = mapOf("temperature" to 0.7), active = true ), @@ -3122,8 +3460,7 @@ object ModelDefinitions { description = "Flagship reasoning model with state-of-the-art capabilities", capabilities = listOf( ModelCapability.CHAT_COMPLETION, - ModelCapability.REASONING, - ModelCapability.TOOL_USE + ModelCapability.REASONING ), modelType = ModelType.TEXT, maxContext = 64_000, @@ -3133,7 +3470,7 @@ object ModelDefinitions { supportsVision = false, supportsReasoning = true, supportsStreaming = true, - supportsFunctionCalling = true, + supportsFunctionCalling = false, defaultParams = mapOf("temperature" to 0.7), active = true ), @@ -3145,8 +3482,7 @@ object ModelDefinitions { description = "Massive MoE reasoning model rivaling closed-source models", capabilities = listOf( ModelCapability.CHAT_COMPLETION, - ModelCapability.REASONING, - ModelCapability.TOOL_USE + ModelCapability.REASONING ), modelType = ModelType.TEXT, maxContext = 64_000, @@ -3156,7 +3492,7 @@ object ModelDefinitions { supportsVision = false, supportsReasoning = true, supportsStreaming = true, - supportsFunctionCalling = true, + supportsFunctionCalling = false, defaultParams = mapOf("temperature" to 0.7), active = true ), @@ -4447,6 +4783,149 @@ object ModelDefinitions { ) ) + /** + * OpenRouter Models Registry (curated) + * + * OpenRouter exposes 200+ models via dynamic listing. This registry contains + * static definitions for the popular families where we know native + * function-calling is reliable — so AgentTurnLoop takes the native tool_calls + * path instead of the JSON-in-text envelope. Unknown OpenRouter model IDs + * still fall back to null (JSON envelope) — that preserves backwards + * compatibility for exotic community models. + * + * Matching is prefix-based in [getDefinition] (e.g. "amazon/nova-lite-v1" + * matches entry "amazon/nova-"). + */ + val OPENROUTER_MODELS = mapOf( + // Anthropic + "anthropic/claude-opus" to openrouterDef("anthropic/claude-opus-*", maxContext = 1_000_000, vision = true), + "anthropic/claude-sonnet" to openrouterDef("anthropic/claude-sonnet-*", maxContext = 1_000_000, vision = true), + "anthropic/claude-haiku" to openrouterDef("anthropic/claude-haiku-*", maxContext = 200_000, vision = true), + "anthropic/claude-" to openrouterDef("anthropic/claude-*", maxContext = 200_000, vision = true), + // OpenAI + "openai/gpt-5.4-pro" to openrouterDef("openai/gpt-5.4-pro", maxContext = 1_050_000, vision = true), + "openai/gpt-5.4" to openrouterDef("openai/gpt-5.4-*", maxContext = 400_000, vision = true), + "openai/gpt-5" to openrouterDef("openai/gpt-5-*", maxContext = 400_000, vision = true), + "openai/gpt-audio" to openrouterDef("openai/gpt-audio*", maxContext = 128_000), + "openai/gpt-" to openrouterDef("openai/gpt-*", maxContext = 128_000, vision = true), + "openai/o" to openrouterDef("openai/o*", maxContext = 200_000, reasoning = true), + // Google + "google/gemini-3" to openrouterDef("google/gemini-3*", maxContext = 1_048_576, vision = true), + "google/gemini-" to openrouterDef("google/gemini-*", maxContext = 1_000_000, vision = true), + "google/gemma-" to openrouterDef("google/gemma-*", maxContext = 262_144, vision = true), + // Amazon + "amazon/nova-" to openrouterDef("amazon/nova-*", maxContext = 300_000, vision = true), + // Meta + "meta-llama/llama-" to openrouterDef("meta-llama/llama-*", maxContext = 128_000), + "meta-llama/" to openrouterDef("meta-llama/*", maxContext = 128_000), + // Mistral + "mistralai/" to openrouterDef("mistralai/*", maxContext = 262_144), + // Qwen + "qwen/qwen3.6" to openrouterDef("qwen/qwen3.6-*", maxContext = 1_000_000, vision = true), + "qwen/qwen3.5" to openrouterDef("qwen/qwen3.5-*", maxContext = 262_144, vision = true), + // qwen3-coder-* variants emit XML pseudo-tags instead of native tool_calls + // (observed: `......`). + // Prefix entry must come before "qwen/qwen3" to win the prefix match. + "qwen/qwen3-coder" to openrouterDef("qwen/qwen3-coder-*", maxContext = 262_144, functionCalling = false), + "qwen/qwen3" to openrouterDef("qwen/qwen3-*", maxContext = 262_144), + "qwen/" to openrouterDef("qwen/*", maxContext = 128_000), + // DeepSeek + "deepseek/" to openrouterDef("deepseek/*", maxContext = 128_000), + // xAI + "x-ai/grok-" to openrouterDef("x-ai/grok-*", maxContext = 2_000_000, vision = true), + // Cohere + "cohere/command-" to openrouterDef("cohere/command-*", maxContext = 128_000), + // Moonshot + "moonshotai/kimi" to openrouterDef("moonshotai/kimi-*", maxContext = 262_144, vision = true), + "moonshotai/" to openrouterDef("moonshotai/*", maxContext = 128_000), + // MiniMax + // minimax-m2.7 emits `...` + // pseudo-XML instead of native tool_calls. Force JSON envelope mode. + "minimax/minimax-m2.7" to openrouterDef("minimax/minimax-m2.7*", maxContext = 196_608, functionCalling = false), + "minimax/minimax-m2" to openrouterDef("minimax/minimax-m2*", maxContext = 196_608), + "minimax/" to openrouterDef("minimax/*", maxContext = 128_000), + // Z.AI + "z-ai/glm-5v" to openrouterDef("z-ai/glm-5v*", maxContext = 202_752, vision = true), + "z-ai/glm-" to openrouterDef("z-ai/glm-*", maxContext = 202_752), + "z-ai/" to openrouterDef("z-ai/*", maxContext = 128_000), + // ByteDance + "bytedance-seed/" to openrouterDef("bytedance-seed/*", maxContext = 262_144, vision = true), + // NVIDIA + "nvidia/nemotron" to openrouterDef("nvidia/nemotron-*", maxContext = 262_144), + "nvidia/" to openrouterDef("nvidia/*", maxContext = 128_000), + // InclusionAI (Ling family) + "inclusionai/" to openrouterDef("inclusionai/*", maxContext = 262_144), + // Arcee + "arcee-ai/" to openrouterDef("arcee-ai/*", maxContext = 262_144), + // Kwai + "kwaipilot/" to openrouterDef("kwaipilot/*", maxContext = 256_000), + // Reka + "rekaai/" to openrouterDef("rekaai/*", maxContext = 16_384, vision = true), + // Xiaomi + "xiaomi/mimo" to openrouterDef("xiaomi/mimo*", maxContext = 1_048_576, vision = true), + // Upstage + "upstage/solar" to openrouterDef("upstage/solar-*", maxContext = 128_000), + // Inception + "inception/mercury" to openrouterDef("inception/mercury*", maxContext = 128_000), + // Tencent + "tencent/" to openrouterDef("tencent/*", maxContext = 262_144), + // Writer + "writer/palmyra" to openrouterDef("writer/palmyra-*", maxContext = 1_040_000), + // Allen AI + "allenai/olmo" to openrouterDef("allenai/olmo-*", maxContext = 65_536), + // StepFun + "stepfun/step" to openrouterDef("stepfun/step-*", maxContext = 262_144), + // Liquid + "liquid/lfm" to openrouterDef("liquid/lfm-*", maxContext = 32_768), + // AION + "aion-labs/" to openrouterDef("aion-labs/*", maxContext = 131_072), + // Baidu + "baidu/" to openrouterDef("baidu/*", maxContext = 65_536, vision = true), + // OpenRouter meta-models + "openrouter/" to openrouterDef("openrouter/*", maxContext = 200_000, vision = true) + ) + + private fun openrouterDef( + id: String, + maxContext: Int, + vision: Boolean = false, + reasoning: Boolean = false, + /** + * Whether the upstream model reliably emits native OpenAI-format + * `tool_calls`. When false, AgentTurnLoop falls back to the + * `response-contract-json` system prompt and parses JSON envelopes + * out of plain text — that is the right path for models which + * advertise function calling but actually emit pseudo-XML + * (e.g. ``, `...`). + */ + functionCalling: Boolean = true + ): ModelDefinition = ModelDefinition( + id = id, + name = id, + provider = "openrouter", + description = if (functionCalling) + "OpenRouter curated family with native function-calling" + else + "OpenRouter family that does NOT emit native tool_calls — JSON envelope only", + capabilities = buildList { + add(ModelCapability.CHAT_COMPLETION) + add(ModelCapability.TEXT_COMPLETION) + if (vision) add(ModelCapability.VISION) + }, + modelType = ModelType.TEXT, + maxContext = maxContext, + maxOutputTokens = null, + costPer1MInput = 0.0, + costPer1MOutput = 0.0, + supportsVision = vision, + supportsReasoning = reasoning, + supportsStreaming = true, + supportsFunctionCalling = functionCalling, + endpointType = ApiEndpointType.CHAT_COMPLETIONS, + apiFormat = ApiFormat.CHAT_COMPLETIONS, + active = true + ) + /** * Get model definition by ID and provider. * Returns null if not found (use fallback). @@ -4463,7 +4942,10 @@ object ModelDefinitions { "gemini" -> GEMINI_MODELS[modelId] "lmstudio" -> LM_STUDIO_MODELS[modelId] "zai" -> ZAI_MODELS[modelId] - "openrouter" -> null // OpenRouter uses dynamic models + "openrouter" -> OPENROUTER_MODELS.entries + .firstOrNull { (prefix, _) -> modelId.startsWith(prefix) } + ?.value + ?.copy(id = modelId, name = modelId) else -> null } } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/NativeToolsFallbackTracker.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/NativeToolsFallbackTracker.kt new file mode 100644 index 00000000..55b05938 --- /dev/null +++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/NativeToolsFallbackTracker.kt @@ -0,0 +1,32 @@ +package pl.jclab.refio.core.llm + +import pl.jclab.refio.core.logging.dualLogger +import java.util.concurrent.ConcurrentHashMap + +private val logger = dualLogger("NativeToolsFallbackTracker") + +/** + * In-memory registry of models that failed native function-calling in the current process run. + * + * When a model returns a provider error indicating tools are not supported, it is added here. + * Subsequent requests in the same process skip the native path for that model automatically. + * + * Not persisted — a process restart gives each model a fresh chance. + */ +object NativeToolsFallbackTracker { + private val fallbackModels = ConcurrentHashMap.newKeySet() + + fun isFallback(modelId: String): Boolean = modelId in fallbackModels + + fun markFallback(modelId: String, reason: String) { + if (fallbackModels.add(modelId)) { + logger.warn { "[NATIVE_TOOLS_FALLBACK] Marking $modelId as fallback: $reason" } + } + } + + fun getFallbackSet(): Set = fallbackModels.toSet() + + fun clear() { + fallbackModels.clear() + } +} diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/NativeToolsResolver.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/NativeToolsResolver.kt new file mode 100644 index 00000000..0cd8872a --- /dev/null +++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/NativeToolsResolver.kt @@ -0,0 +1,39 @@ +package pl.jclab.refio.core.llm + +import pl.jclab.refio.core.logging.dualLogger + +private val logger = dualLogger("NativeToolsResolver") + +enum class NativeToolsMode { AUTO, ALWAYS, NEVER } + +fun parseNativeToolsMode(raw: String?): NativeToolsMode = when (raw?.trim()?.lowercase()) { + "always" -> NativeToolsMode.ALWAYS + "never" -> NativeToolsMode.NEVER + else -> NativeToolsMode.AUTO +} + +/** + * Decide whether a given request should use native function-calling API. + * + * Precedence: + * 1. modelId in fallbackFlags → false (session-scoped failure cache) + * 2. mode == NEVER → false + * 3. mode == ALWAYS → true + * 4. mode == AUTO → ModelDefinition.supportsFunctionCalling (false if no definition) + */ +fun shouldUseNativeTools( + mode: NativeToolsMode, + definition: ModelDefinition?, + modelId: String, + fallbackFlags: Set = emptySet(), +): Boolean { + if (modelId in fallbackFlags) { + logger.debug { "[NATIVE_TOOLS] $modelId in session fallback set, forcing JSON path" } + return false + } + return when (mode) { + NativeToolsMode.NEVER -> false + NativeToolsMode.ALWAYS -> true + NativeToolsMode.AUTO -> definition?.supportsFunctionCalling == true + } +} diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/SupportedModels.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/SupportedModels.kt index a3c36c71..d4f23a34 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/llm/SupportedModels.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/SupportedModels.kt @@ -28,7 +28,14 @@ object SupportedModels { * - GPT-3.5 Turbo (cost-effective) * - O1 series (reasoning models) */ - private val OPENAI_SUPPORTED = setOf( + private val OPENAI_SUPPORTED = setOf( + // GPT-5.5 + "gpt-5.5", + "gpt-5.5-mini", + "gpt-5.5-nano", + "gpt-5.5-codex-max", + "gpt-5.5-codex", + "gpt-5.5-codex-mini", // GPT-5.4 "gpt-5.4", "gpt-5.4-mini", @@ -80,7 +87,7 @@ object SupportedModels { /** * Anthropic supported models. */ - private val ANTHROPIC_SUPPORTED = setOf( + private val ANTHROPIC_SUPPORTED = setOf( // Opus models "claude-opus-4-7", "anthropic.claude-opus-4-7", @@ -93,6 +100,7 @@ object SupportedModels { "claude-opus-4-0", "claude-opus-4-20250514", // Sonnet models + "claude-sonnet-4-6", "claude-sonnet-4-5-20250929", "claude-sonnet-4-5", "claude-sonnet-4-20250514", @@ -117,7 +125,7 @@ object SupportedModels { * * Format: "provider/model" (e.g., "google/gemini-2.0-flash-exp") */ - private val OPENROUTER_PATTERNS = setOf( + private val OPENROUTER_PATTERNS = setOf( // Google Gemini models (1.x and 2.x series) Regex("^.*gpt-.*"), Regex("^.*amazon.*"), @@ -177,7 +185,7 @@ object SupportedModels { /** * Ollama supported models. */ - private val OLLAMA_SUPPORTED = setOf( + private val OLLAMA_SUPPORTED = setOf( Regex("^.*"), ) diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/ToolSchemaSanitizer.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/ToolSchemaSanitizer.kt new file mode 100644 index 00000000..b1f57073 --- /dev/null +++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/ToolSchemaSanitizer.kt @@ -0,0 +1,209 @@ +package pl.jclab.refio.core.llm + +import pl.jclab.refio.core.tools.base.ToolSchema + +/** + * Provider-specific schema normalization for native tool calling. + * + * The source schemas come from tool definitions and are intentionally provider-agnostic. + * Each provider accepts a slightly different subset: + * - OpenAI strict mode requires fully strict object schemas. + * - Anthropic accepts JSON Schema objects but is picky about composition/meta keywords. + * - Gemini expects an OpenAPI-style schema shape and nullable fields instead of JSON Schema unions. + */ +object ToolSchemaSanitizer { + + data class OpenAIToolSchema( + val tool: ToolSchema, + val strict: Boolean, + val strictIncompatibilities: List + ) + + fun forOpenAI(tool: ToolSchema): OpenAIToolSchema { + val sanitized = sanitizeForOpenAI(tool.parametersJsonSchema) + val incompatibilities = mutableListOf() + collectOpenAIStrictIncompatibilities( + node = sanitized, + path = "\$", + incompatibilities = incompatibilities + ) + return OpenAIToolSchema( + tool = tool.copy(parametersJsonSchema = sanitized), + strict = incompatibilities.isEmpty(), + strictIncompatibilities = incompatibilities + ) + } + + fun forAnthropic(tool: ToolSchema): ToolSchema = + tool.copy(parametersJsonSchema = sanitizeForAnthropic(tool.parametersJsonSchema)) + + fun forGemini(tool: ToolSchema): ToolSchema = + tool.copy(parametersJsonSchema = sanitizeForGemini(tool.parametersJsonSchema)) + + private fun sanitizeForOpenAI(schema: Map): Map { + @Suppress("UNCHECKED_CAST") + return sanitizeOpenAINode(schema) as Map + } + + private fun sanitizeOpenAINode(node: Any?): Any? { + return when (node) { + is Map<*, *> -> { + val result = LinkedHashMap() + for ((rawKey, rawValue) in node) { + val key = rawKey as? String ?: continue + if (key == "\$schema" || key == "default") continue + result[key] = sanitizeOpenAINode(rawValue) + } + result + } + is List<*> -> node.map { sanitizeOpenAINode(it) } + else -> node + } + } + + private fun collectOpenAIStrictIncompatibilities( + node: Any?, + path: String, + incompatibilities: MutableList + ) { + when (node) { + is Map<*, *> -> { + val typeValue = node["type"] + val properties = node["properties"] as? Map<*, *> + val additionalProperties = node["additionalProperties"] + val required = (node["required"] as? List<*>)?.mapNotNull { it as? String }?.toSet() + val propertyNames = properties?.keys?.mapNotNull { it as? String }?.toSet().orEmpty() + + if (node.containsKey("oneOf") || node.containsKey("allOf") || node.containsKey("anyOf")) { + incompatibilities += "$path uses schema composition keywords" + } + + if (isObjectType(typeValue) || properties != null) { + if (additionalProperties != false) { + incompatibilities += "$path is object-like without additionalProperties=false" + } + if (required != propertyNames) { + incompatibilities += "$path required keys do not exactly match properties" + } + } + + if (additionalProperties is Map<*, *>) { + incompatibilities += "$path uses dynamic object properties" + collectOpenAIStrictIncompatibilities( + additionalProperties, + "$path.additionalProperties", + incompatibilities + ) + } + + if (properties != null) { + for ((rawPropName, rawPropSchema) in properties) { + val propName = rawPropName as? String ?: continue + collectOpenAIStrictIncompatibilities( + rawPropSchema, + "$path.properties.$propName", + incompatibilities + ) + } + } + + for ((rawKey, rawValue) in node) { + val key = rawKey as? String ?: continue + if (key == "properties" || key == "additionalProperties") continue + collectOpenAIStrictIncompatibilities(rawValue, "$path.$key", incompatibilities) + } + } + is List<*> -> node.forEachIndexed { index, item -> + collectOpenAIStrictIncompatibilities(item, "$path[$index]", incompatibilities) + } + } + } + + private fun sanitizeForAnthropic(schema: Map): Map { + @Suppress("UNCHECKED_CAST") + return sanitizeAnthropicNode(schema) as Map + } + + private fun sanitizeAnthropicNode(node: Any?): Any? { + val forbidden = setOf("oneOf", "allOf", "anyOf", "\$schema", "default") + return when (node) { + is Map<*, *> -> { + val result = LinkedHashMap() + for ((rawKey, rawValue) in node) { + val key = rawKey as? String ?: continue + if (key in forbidden) continue + result[key] = sanitizeAnthropicNode(rawValue) + } + result + } + is List<*> -> node.map { sanitizeAnthropicNode(it) } + else -> node + } + } + + private fun sanitizeForGemini(schema: Map): Map { + @Suppress("UNCHECKED_CAST") + return sanitizeGeminiNode(schema) as Map + } + + private fun sanitizeGeminiNode(node: Any?): Any? { + val forbidden = setOf("oneOf", "allOf", "anyOf", "\$schema", "additionalProperties", "default") + return when (node) { + is Map<*, *> -> { + val result = LinkedHashMap() + val originalType = node["type"] + val nullable = isNullableType(originalType) + for ((rawKey, rawValue) in node) { + val key = rawKey as? String ?: continue + if (key in forbidden) continue + result[key] = when (key) { + "type" -> toGeminiType(rawValue) + else -> sanitizeGeminiNode(rawValue) + } + } + if (nullable) { + result["nullable"] = true + } + result + } + is List<*> -> node.map { sanitizeGeminiNode(it) } + else -> node + } + } + + private fun isObjectType(typeValue: Any?): Boolean { + return when (typeValue) { + is String -> typeValue.equals("object", ignoreCase = true) + is List<*> -> typeValue.filterIsInstance().any { it.equals("object", ignoreCase = true) } + else -> false + } + } + + private fun isNullableType(typeValue: Any?): Boolean { + return when (typeValue) { + is List<*> -> typeValue.filterIsInstance().any { it.equals("null", ignoreCase = true) } + else -> false + } + } + + private fun toGeminiType(typeValue: Any?): Any? { + val normalized = when (typeValue) { + is String -> typeValue + is List<*> -> typeValue + .filterIsInstance() + .firstOrNull { !it.equals("null", ignoreCase = true) } + ?: typeValue.filterIsInstance().firstOrNull() + else -> null + } ?: return typeValue + + return when (normalized.lowercase()) { + "string" -> "STRING" + "integer" -> "INTEGER" + "boolean" -> "BOOLEAN" + "number" -> "NUMBER" + "array" -> "ARRAY" + "object" -> "OBJECT" + else -> normalized.uppercase() + } + } +} diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/AnthropicAdapter.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/AnthropicAdapter.kt index 3ee21884..203d697d 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/AnthropicAdapter.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/AnthropicAdapter.kt @@ -8,8 +8,12 @@ import pl.jclab.refio.core.llm.LLMMessage import pl.jclab.refio.core.llm.LLMResponse import pl.jclab.refio.core.llm.LLMUsage import pl.jclab.refio.core.llm.ModelConfig +import pl.jclab.refio.core.llm.NativeToolCall import pl.jclab.refio.core.llm.StreamChunk +import pl.jclab.refio.core.llm.ToolSchemaSanitizer +import pl.jclab.refio.core.llm.ToolsNotSupportedException import pl.jclab.refio.core.llm.toModelConfig +import pl.jclab.refio.core.tools.base.ToolSchema import pl.jclab.refio.core.utils.GsonInstance.gson import io.ktor.client.* import io.ktor.client.call.* @@ -86,6 +90,8 @@ class AnthropicAdapter( // Stream aborted by a guardrail (see core/llm/streaming/) — must propagate // so the caller can see StreamAbortedException instead of RefioError.LLMError. throw e + } catch (e: ToolsNotSupportedException) { + throw e } catch (e: Exception) { throw LLMErrorMapper.fromThrowable(provider, model, timeout, e) } @@ -123,11 +129,12 @@ class AnthropicAdapter( // "user"/"assistant" in the messages array — remap "tool" (used by // LLMMessageMapper for tool results) to "assistant" so the request // body stays valid. + // Tool results must be on the "user" role for Anthropic — mapping them to + // "assistant" makes the conversation end with an assistant message, which Anthropic + // interprets as a prefill (and some models like opus-4-6 reject prefill entirely). val claudeMessages = nonSystemMessages.mapIndexed { index, msg -> - val mappedRole = if (msg.role == "tool") "assistant" else msg.role + val mappedRole = if (msg.role == "tool") "user" else msg.role val content = toAnthropicMessageContent(msg) - // Anthropic API rejects requests where assistant content ends with trailing whitespace. - // Trim trailing whitespace from the last assistant message to prevent HTTP 400. val sanitizedContent = if (index == nonSystemMessages.lastIndex && mappedRole == "assistant" && content is String) { content.trimEnd() } else { @@ -170,7 +177,14 @@ class AnthropicAdapter( "[ANTHROPIC] Using maxTokens=$effectiveMaxTokens (requested=$maxTokens, configLimit=$maxOutputLimit, modelLimit=${modelLimit ?: "n/a"})" } - put("temperature", temperature) + // Respect model-level removeParams (e.g. claude-opus-4-7 deprecated `temperature`). + val removeParams = pl.jclab.refio.core.llm.ModelDefinitions + .getDefinition("anthropic", model) + ?.removeParams + ?: emptyList() + if ("temperature" !in removeParams) { + put("temperature", temperature) + } // Add system prompt as separate parameter (not a message) // Use combinedSystemPrompt which combines all system messages from systemMessages parameter @@ -208,6 +222,14 @@ class AnthropicAdapter( (kwargs["top_p"] as? Number)?.let { put("top_p", it) } (kwargs["top_k"] as? Number)?.let { put("top_k", it) } (kwargs["stop_sequences"] as? List<*>)?.let { put("stop_sequences", it) } + + // Native function-calling tools + @Suppress("UNCHECKED_CAST") + val nativeTools = kwargs["native_tools"] as? List + if (!nativeTools.isNullOrEmpty()) { + put("tools", buildAnthropicToolsArray(nativeTools)) + logger.info { "[ANTHROPIC][NATIVE_TOOLS] Sending ${nativeTools.size} tool schemas" } + } } val requestJson = gson.toJson(requestBody) @@ -226,6 +248,46 @@ class AnthropicAdapter( } } + private fun buildAnthropicToolsArray(tools: List): List> = + tools.map { rawTool -> + val tool = ToolSchemaSanitizer.forAnthropic(rawTool) + mapOf( + "name" to tool.name, + "description" to tool.description, + "input_schema" to tool.parametersJsonSchema, + ) + } + + private fun sanitizeInputSchemaForAnthropic(toolName: String, schema: Map): Map { + val forbidden = listOf("oneOf", "allOf", "anyOf") + val stripped = forbidden.filter { schema.containsKey(it) } + if (stripped.isEmpty()) return schema + logger.warn { + "[ANTHROPIC][NATIVE_TOOLS] Tool '$toolName' schema has top-level ${stripped.joinToString()} " + + "which Anthropic rejects — stripping. Encode constraints in field descriptions instead." + } + return schema.filterKeys { it !in forbidden } + } + + private fun parseNativeAnthropicToolCalls(contentBlocks: List>): List { + return contentBlocks.mapNotNull { block -> + if (block["type"] != "tool_use") return@mapNotNull null + val id = block["id"] as? String ?: return@mapNotNull null + val name = block["name"] as? String ?: return@mapNotNull null + val input = block["input"] + val argumentsJson = when (input) { + is Map<*, *> -> gson.toJson(input) + null -> "{}" + else -> gson.toJson(input) + } + NativeToolCall( + id = id, + name = ToolCallContentNormalizer.normalizeToolName(name), + argumentsJson = argumentsJson + ) + } + } + private suspend fun executeStandard( apiKey: String, requestBody: Map, @@ -286,6 +348,9 @@ class AnthropicAdapter( source = source ) + if (requestBody.containsKey("tools") && isToolsNotSupportedError(httpStatus, errorMessage)) { + throw ToolsNotSupportedException(fullErrorMessage) + } throw LLMErrorMapper.fromHttpStatus(provider, model, httpStatus, fullErrorMessage) } @@ -338,14 +403,21 @@ class AnthropicAdapter( when (block["type"]) { "text" -> textContent += (block["text"] as? String ?: "") "thinking" -> { - // Collect thinking process for UI display val thinking = block["thinking"] as? String ?: "" thinkingContent += thinking logger.debug { "[ANTHROPIC] Claude thinking: ${thinking.take(200)}..." } } } } - if (textContent.isBlank()) { + + val toolsWereRequested = requestBody.containsKey("tools") + val nativeToolCalls: List? = if (toolsWereRequested) { + parseNativeAnthropicToolCalls(contentBlocks).also { calls -> + logger.info { "$logPrefix [NATIVE_TOOLS_RESPONSE] calls=${calls.size}" } + } + } else null + + if (nativeToolCalls == null && textContent.isBlank()) { val normalizedToolCallsJson = ToolCallContentNormalizer.fromAnthropicContentBlocks(contentBlocks) if (normalizedToolCallsJson != null) { textContent = normalizedToolCallsJson @@ -370,9 +442,12 @@ class AnthropicAdapter( cost = cost, finishReason = stopReason, rawResponse = response, - thinking = thinkingContent.takeIf { it.isNotEmpty() } // Include thinking for UI + thinking = thinkingContent.takeIf { it.isNotEmpty() }, + nativeToolCalls = nativeToolCalls ) + } catch (e: ToolsNotSupportedException) { + throw e } catch (e: Exception) { // Error #15: Log error (console + database) val latencyMs = (System.currentTimeMillis() - startTime).toInt() @@ -403,7 +478,8 @@ class AnthropicAdapter( ): LLMResponse { val contentBuilder = StringBuilder() val thinkingBuilder = StringBuilder() // Collect thinking process - val activeToolUseByIndex = mutableMapOf>() + // Triple: (id, name, argsBuilder) + val activeToolUseByIndex = mutableMapOf>() val completedToolUseBlocks = mutableListOf>() var inputTokens = 0 var outputTokens = 0 @@ -458,6 +534,9 @@ class AnthropicAdapter( source = source ) + if (requestBody.containsKey("tools") && isToolsNotSupportedError(httpStatus ?: 500, errorMessage)) { + throw ToolsNotSupportedException(errorMessage) + } throw LLMErrorMapper.fromHttpStatus(provider, model, httpStatus ?: 500, errorMessage) } @@ -505,13 +584,14 @@ class AnthropicAdapter( val contentBlock = chunk["content_block"] as? Map ?: emptyMap() if (contentBlock["type"] == "tool_use") { val name = contentBlock["name"] as? String + val id = contentBlock["id"] as? String if (!name.isNullOrBlank()) { val argsBuilder = StringBuilder() val input = contentBlock["input"] if (input is Map<*, *> && input.isNotEmpty()) { argsBuilder.append(gson.toJson(input)) } - activeToolUseByIndex[index] = name to argsBuilder + activeToolUseByIndex[index] = Triple(id, name, argsBuilder) } } } @@ -550,7 +630,7 @@ class AnthropicAdapter( val index = (chunk["index"] as? Number)?.toInt() ?: -1 val partialJson = delta["partial_json"] as? String if (!partialJson.isNullOrEmpty()) { - activeToolUseByIndex[index]?.second?.append(partialJson) + activeToolUseByIndex[index]?.third?.append(partialJson) } } } @@ -560,7 +640,7 @@ class AnthropicAdapter( val index = (chunk["index"] as? Number)?.toInt() ?: -1 val active = activeToolUseByIndex.remove(index) if (active != null) { - val (name, argsBuilder) = active + val (toolId, name, argsBuilder) = active val argsRaw = argsBuilder.toString().trim() val input = if (argsRaw.isEmpty()) { emptyMap() @@ -575,6 +655,7 @@ class AnthropicAdapter( completedToolUseBlocks.add( mapOf( "type" to "tool_use", + "id" to (toolId ?: java.util.UUID.randomUUID().toString()), "name" to name, "input" to input ) @@ -664,7 +745,14 @@ class AnthropicAdapter( logger.info { "$logPrefix Streaming completed in ${latencyMs}ms, tokens=$inputTokens/$outputTokens, logged to API logs" } - if (contentBuilder.isEmpty()) { + val toolsWereRequested = requestBody.containsKey("tools") + val streamNativeToolCalls: List? = if (toolsWereRequested) { + parseNativeAnthropicToolCalls(completedToolUseBlocks).also { calls -> + logger.info { "$logPrefix [NATIVE_TOOLS_RESPONSE] (stream) calls=${calls.size}" } + } + } else null + + if (streamNativeToolCalls == null && contentBuilder.isEmpty()) { val normalizedToolCallsJson = ToolCallContentNormalizer.fromAnthropicContentBlocks(completedToolUseBlocks) if (normalizedToolCallsJson != null) { contentBuilder.append(normalizedToolCallsJson) @@ -680,7 +768,8 @@ class AnthropicAdapter( cost = cost, finishReason = finalStopReason, rawResponse = syntheticResponse, - thinking = thinkingBuilder.takeIf { it.isNotEmpty() }?.toString() // Include thinking for UI + thinking = thinkingBuilder.takeIf { it.isNotEmpty() }?.toString(), + nativeToolCalls = streamNativeToolCalls ) } catch (e: CancellationException) { @@ -699,6 +788,8 @@ class AnthropicAdapter( source = source ) throw e + } catch (e: ToolsNotSupportedException) { + throw e } catch (e: Exception) { val latencyMs = (System.currentTimeMillis() - startTime).toInt() logger.apiError( @@ -803,6 +894,26 @@ class AnthropicAdapter( ) } + private fun isToolsNotSupportedError(httpStatus: Int, errorMessage: String): Boolean { + if (httpStatus != 400 && httpStatus != 422) return false + val lower = errorMessage.lowercase() + val mentionsTooling = + lower.contains("tools") || + lower.contains("tool_use") || + lower.contains("function calling") || + lower.contains("input_schema") || + lower.contains("schema") + val unsupportedShape = + lower.contains("not supported") || + lower.contains("unsupported") || + lower.contains("unknown parameter") || + lower.contains("unexpected parameter") || + lower.contains("invalid parameter") || + lower.contains("invalid schema") || + lower.contains("invalid_request_error") + return mentionsTooling && unsupportedShape + } + override suspend fun close() { if (ownsHttpClient) { client.close() diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/GeminiAdapter.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/GeminiAdapter.kt index 84575323..f278fcc6 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/GeminiAdapter.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/GeminiAdapter.kt @@ -1,34 +1,31 @@ package pl.jclab.refio.core.llm.adapters +import io.ktor.client.* +import io.ktor.client.call.* +import io.ktor.client.request.* +import io.ktor.http.* +import io.ktor.utils.io.* +import kotlinx.coroutines.CancellationException +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.withContext +import pl.jclab.refio.core.config.ConfigKeys +import pl.jclab.refio.core.errors.LLMErrorMapper +import pl.jclab.refio.core.errors.RefioError import pl.jclab.refio.core.llm.BaseLLMAdapter import pl.jclab.refio.core.llm.LLMMessage import pl.jclab.refio.core.llm.LLMResponse import pl.jclab.refio.core.llm.LLMUsage import pl.jclab.refio.core.llm.ModelConfig +import pl.jclab.refio.core.llm.NativeToolCall import pl.jclab.refio.core.llm.StreamChunk +import pl.jclab.refio.core.llm.ToolSchemaSanitizer +import pl.jclab.refio.core.llm.ToolsNotSupportedException import pl.jclab.refio.core.llm.toModelConfig -import pl.jclab.refio.core.utils.GsonInstance.gson -import io.ktor.client.HttpClient -import io.ktor.client.call.body -import io.ktor.client.engine.cio.CIO -import io.ktor.client.plugins.HttpTimeout -import io.ktor.client.plugins.contentnegotiation.ContentNegotiation -import io.ktor.client.plugins.logging.* -import io.ktor.client.plugins.logging.Logger as KtorLogger -import io.ktor.client.request.* -import io.ktor.http.* -import io.ktor.serialization.gson.gson -import pl.jclab.refio.core.config.ConfigKeys import pl.jclab.refio.core.logging.dualLogger import pl.jclab.refio.core.security.SecureLogger -import io.ktor.client.request.get -import kotlinx.coroutines.CancellationException -import kotlinx.coroutines.withContext -import kotlinx.coroutines.Dispatchers -import io.ktor.utils.io.ByteReadChannel -import pl.jclab.refio.core.errors.LLMErrorMapper -import pl.jclab.refio.core.errors.RefioError -import java.util.UUID +import pl.jclab.refio.core.tools.base.ToolSchema +import pl.jclab.refio.core.utils.GsonInstance.gson +import java.util.* /** * Adapter for Google Gemini models (generateContent API). @@ -103,6 +100,8 @@ class GeminiAdapter( // Stream aborted by a guardrail (see core/llm/streaming/) — must propagate // so the caller can see StreamAbortedException instead of RefioError.LLMError. throw e + } catch (e: ToolsNotSupportedException) { + throw e } catch (e: Exception) { throw LLMErrorMapper.fromThrowable(provider, model, timeoutMs, e) } @@ -192,9 +191,65 @@ class GeminiAdapter( body["generationConfig"] = generationConfig } + @Suppress("UNCHECKED_CAST") + val nativeTools = kwargs["native_tools"] as? List + if (!nativeTools.isNullOrEmpty()) { + body["tools"] = listOf( + mapOf( + "functionDeclarations" to nativeTools.map { rawTool -> + val tool = ToolSchemaSanitizer.forGemini(rawTool) + mapOf( + "name" to tool.name, + "description" to tool.description, + "parameters" to tool.parametersJsonSchema, + ) + } + ) + ) + logger.info { "[GEMINI][NATIVE_TOOLS] Sending ${nativeTools.size} tool schemas" } + } + return body } + /** + * Gemini rejects top-level `oneOf/allOf/anyOf` and JSON-Schema meta keys like `$schema` + * on function parameter schemas. Strip them conservatively and log a warning so the + * authors can encode constraints in field descriptions. + */ + private fun sanitizeSchemaForGemini(toolName: String, schema: Map): Map { + @Suppress("UNCHECKED_CAST") + return sanitizeGeminiNode(schema, toolName) as Map + } + + private fun sanitizeGeminiNode(node: Any?, toolName: String): Any? { + val forbidden = setOf("oneOf", "allOf", "anyOf", "\$schema", "additionalProperties", "default") + return when (node) { + is Map<*, *> -> { + val result = LinkedHashMap() + for ((k, v) in node) { + val key = k as? String ?: continue + if (key in forbidden) continue + val sanitizedValue = if (key == "type" && v is List<*>) { + val firstString = v.firstOrNull { it is String && it != "null" } as? String + ?: (v.firstOrNull { it is String } as? String) + ?: "string" + logger.warn { + "[GEMINI][NATIVE_TOOLS] Tool '$toolName' has type=$v (array) — coercing to '$firstString'. Gemini accepts only single types." + } + firstString + } else { + sanitizeGeminiNode(v, toolName) + } + result[key] = sanitizedValue + } + result + } + is List<*> -> node.map { sanitizeGeminiNode(it, toolName) } + else -> node + } + } + private suspend fun executeStandard( apiKey: String, requestBody: Map, @@ -235,7 +290,13 @@ class GeminiAdapter( subtaskId = subtaskId, source = source ) - throw LLMErrorMapper.fromHttpStatus(provider, model, httpStatus, "Gemini API error (HTTP $httpStatus)") + val errorMessage = "Gemini API error (HTTP $httpStatus): ${gson.toJson(rawResponse)}" + if ((requestBody["tools"] as? List<*>)?.isNotEmpty() == true && + isToolsNotSupportedError(httpStatus, errorMessage) + ) { + throw ToolsNotSupportedException(errorMessage) + } + throw LLMErrorMapper.fromHttpStatus(provider, model, httpStatus, errorMessage) } if (rawResponse["candidates"] !is List<*>) { @@ -249,8 +310,15 @@ class GeminiAdapter( val usage = extractUsage(rawResponse) val cost = estimateCost(usage) val content = extractContent(rawResponse) - val normalizedToolCallsJson = if (content.isBlank()) { - ToolCallContentNormalizer.fromGeminiParts(extractParts(rawResponse)) + val toolsWereRequested = (requestBody["tools"] as? List<*>)?.isNotEmpty() == true + val parts = extractParts(rawResponse) + val nativeToolCalls: List? = if (toolsWereRequested) { + parseNativeGeminiToolCalls(parts).also { calls -> + logger.info { "$logPrefix [NATIVE_TOOLS_RESPONSE] calls=${calls.size}" } + } + } else null + val normalizedToolCallsJson = if (nativeToolCalls == null && content.isBlank()) { + ToolCallContentNormalizer.fromGeminiParts(parts) } else { null } @@ -284,8 +352,13 @@ class GeminiAdapter( provider = provider, cost = cost, finishReason = finishReason, - rawResponse = rawResponse + rawResponse = rawResponse, + nativeToolCalls = nativeToolCalls ) + } catch (e: ToolsNotSupportedException) { + throw e + } catch (e: ToolsNotSupportedException) { + throw e } catch (e: Exception) { val latencyMs = (System.currentTimeMillis() - startTime).toInt() logger.apiError( @@ -304,6 +377,26 @@ class GeminiAdapter( } } + private fun parseNativeGeminiToolCalls(parts: List>): List { + return parts.mapNotNull { part -> + @Suppress("UNCHECKED_CAST") + val functionCall = part["functionCall"] as? Map ?: return@mapNotNull null + val name = functionCall["name"] as? String ?: return@mapNotNull null + val args = functionCall["args"] + val argumentsJson = when (args) { + null -> "{}" + is Map<*, *> -> gson.toJson(args) + is String -> args.ifBlank { "{}" } + else -> gson.toJson(args) + } + NativeToolCall( + id = java.util.UUID.randomUUID().toString(), + name = ToolCallContentNormalizer.normalizeToolName(name), + argumentsJson = argumentsJson + ) + } + } + private suspend fun executeStreaming( apiKey: String, requestBody: Map, @@ -343,11 +436,17 @@ class GeminiAdapter( subtaskId = subtaskId, source = source ) + val errorMessage = "Gemini streaming error (HTTP $httpStatus): $errorBody" + if ((requestBody["tools"] as? List<*>)?.isNotEmpty() == true && + isToolsNotSupportedError(httpStatus ?: 500, errorMessage) + ) { + throw ToolsNotSupportedException(errorMessage) + } throw LLMErrorMapper.fromHttpStatus( provider, model, httpStatus ?: 500, - "Gemini streaming error (HTTP $httpStatus)" + errorMessage ) } @@ -433,7 +532,14 @@ class GeminiAdapter( source = source ) - if (contentBuilder.isEmpty()) { + val toolsWereRequested = (requestBody["tools"] as? List<*>)?.isNotEmpty() == true + val streamNativeToolCalls: List? = if (toolsWereRequested) { + parseNativeGeminiToolCalls(functionCallParts).also { calls -> + logger.info { "$logPrefix [NATIVE_TOOLS_RESPONSE] (stream) calls=${calls.size}" } + } + } else null + + if (streamNativeToolCalls == null && contentBuilder.isEmpty()) { val normalizedToolCallsJson = ToolCallContentNormalizer.fromGeminiParts(functionCallParts) if (normalizedToolCallsJson != null) { contentBuilder.append(normalizedToolCallsJson) @@ -447,7 +553,8 @@ class GeminiAdapter( model = model, provider = provider, cost = cost, - finishReason = finalFinishReason + finishReason = finalFinishReason, + nativeToolCalls = streamNativeToolCalls ) } catch (e: CancellationException) { // Guardrail-triggered abort — log and rethrow as-is, do NOT wrap. @@ -575,6 +682,28 @@ class GeminiAdapter( ) } + private fun isToolsNotSupportedError(httpStatus: Int, errorMessage: String): Boolean { + if (httpStatus != 400 && httpStatus != 422) return false + val lower = errorMessage.lowercase() + val mentionsTooling = + lower.contains("tools") || + lower.contains("functiondeclarations") || + lower.contains("function declarations") || + lower.contains("function calling") || + lower.contains("parameters") || + lower.contains("schema") + val unsupportedShape = + lower.contains("not supported") || + lower.contains("unsupported") || + lower.contains("unknown field") || + lower.contains("unknown name") || + lower.contains("unexpected") || + lower.contains("invalid argument") || + lower.contains("invalid value") || + lower.contains("invalid schema") + return mentionsTooling && unsupportedShape + } + override suspend fun close() { if (ownsHttpClient) { client.close() diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/OllamaAdapter.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/OllamaAdapter.kt index de217495..848e1782 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/OllamaAdapter.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/OllamaAdapter.kt @@ -5,8 +5,10 @@ import pl.jclab.refio.core.llm.LLMMessage import pl.jclab.refio.core.llm.LLMResponse import pl.jclab.refio.core.llm.LLMUsage import pl.jclab.refio.core.llm.ModelConfig +import pl.jclab.refio.core.llm.NativeToolCall import pl.jclab.refio.core.llm.StreamChunk import pl.jclab.refio.core.llm.toModelConfig +import pl.jclab.refio.core.tools.base.ToolSchema import pl.jclab.refio.core.services.ConfigService.Companion.DEFAULT_CONTEXT_SIZE import pl.jclab.refio.core.utils.GsonInstance.gson import io.ktor.client.* @@ -120,6 +122,8 @@ class OllamaAdapter( val responseFormat = kwargs["response_format"] as? Map<*, *> val jsonMode = responseFormat?.get("type") == "json_object" val thinkingRequested = kwargs["thinking"] as? Boolean ?: false + @Suppress("UNCHECKED_CAST") + val nativeTools = kwargs["native_tools"] as? List val requestBody = buildOllamaRequestBody( ollamaMessages = ollamaMessages, @@ -127,7 +131,8 @@ class OllamaAdapter( thinkingRequested = thinkingRequested, streaming = streaming, maxTokens = maxTokens, - temperature = temperature + temperature = temperature, + tools = nativeTools ) val requestJson = gson.toJson(requestBody) @@ -193,13 +198,19 @@ class OllamaAdapter( thinkingRequested: Boolean, streaming: Boolean, maxTokens: Int?, - temperature: Double + temperature: Double, + tools: List? = null ): Map { return buildMap { put("model", model) put("messages", ollamaMessages) put("stream", streaming) + if (!tools.isNullOrEmpty()) { + put("tools", buildOllamaToolsArray(tools)) + logger.info { "[OLLAMA][NATIVE_TOOLS] Sending ${tools.size} tool schemas: ${tools.map { it.name }}" } + } + // Keep model in GPU memory to avoid loading delays. val keepAlive = configService?.getTyped(ConfigKeys.PROVIDER_OLLAMA_KEEP_ALIVE) ?: ConfigKeys.PROVIDER_OLLAMA_KEEP_ALIVE.default @@ -307,6 +318,9 @@ class OllamaAdapter( source = source ) + if (requestBody.containsKey("tools") && isToolsNotSupportedError(httpStatus, errorMessage)) { + throw pl.jclab.refio.core.llm.ToolsNotSupportedException(fullErrorMessage) + } throw LLMErrorMapper.fromHttpStatus(provider, model, httpStatus, fullErrorMessage) } @@ -353,7 +367,14 @@ class OllamaAdapter( val rawThinking = messageMap["thinking"] as? String // Reasoning models (gpt-oss) val rawToolCalls = extractOllamaToolCalls(messageMap) - if (rawContent.isBlank() && rawToolCalls.isNotEmpty()) { + val toolsWereRequested = requestBody.containsKey("tools") + val nativeToolCalls = if (toolsWereRequested) { + parseNativeOllamaToolCalls(rawToolCalls).also { calls -> + logger.info { "$logPrefix [NATIVE_TOOLS_RESPONSE] calls=${calls.size}" } + } + } else null + + if (nativeToolCalls == null && rawContent.isBlank() && rawToolCalls.isNotEmpty()) { rawContent = convertToolCallsToCanonicalJson(rawToolCalls) logger.info { "$logPrefix [TOOL_CALLS_NORMALIZED] Converted Ollama message.tool_calls to canonical JSON content " + @@ -394,7 +415,8 @@ class OllamaAdapter( cost = 0.0, // Free local execution finishReason = doneReason, rawResponse = response, - thinking = finalThinking + thinking = finalThinking, + nativeToolCalls = nativeToolCalls ) } catch (e: Exception) { @@ -476,6 +498,12 @@ class OllamaAdapter( source = source ) + if (requestBody.containsKey("tools") && + httpStatus == 400 && + errorBody.contains("does not support tools", ignoreCase = true) + ) { + throw pl.jclab.refio.core.llm.ToolsNotSupportedException(errorMessage) + } throw LLMErrorMapper.fromHttpStatus(provider, model, httpStatus ?: 500, errorMessage) } @@ -647,7 +675,14 @@ class OllamaAdapter( var rawContent = rawContentBuilder.toString() val rawThinking = rawThinkingBuilder.takeIf { it.isNotEmpty() }?.toString() - if (rawContent.isBlank() && rawToolCalls.isNotEmpty()) { + val toolsWereRequestedStream = requestBody.containsKey("tools") + val streamNativeToolCalls = if (toolsWereRequestedStream) { + parseNativeOllamaToolCalls(rawToolCalls).also { calls -> + logger.info { "$logPrefix [NATIVE_TOOLS_RESPONSE] (stream) calls=${calls.size}" } + } + } else null + + if (streamNativeToolCalls == null && rawContent.isBlank() && rawToolCalls.isNotEmpty()) { rawContent = convertToolCallsToCanonicalJson(rawToolCalls) logger.info { "$logPrefix [TOOL_CALLS_NORMALIZED] Converted Ollama streamed message.tool_calls to canonical JSON content " + @@ -677,7 +712,8 @@ class OllamaAdapter( cost = 0.0, // Free local execution finishReason = finalDoneReason, rawResponse = syntheticResponse, - thinking = finalThinking + thinking = finalThinking, + nativeToolCalls = streamNativeToolCalls ) } catch (e: CancellationException) { // Guardrail-triggered abort — log and rethrow as-is, do NOT wrap. @@ -811,6 +847,39 @@ class OllamaAdapter( return Pair("", rawThinking) } + private fun buildOllamaToolsArray(tools: List): List> { + return tools.map { tool -> + mapOf( + "type" to "function", + "function" to mapOf( + "name" to tool.name, + "description" to tool.description, + "parameters" to tool.parametersJsonSchema, + ) + ) + } + } + + private fun parseNativeOllamaToolCalls(rawCalls: List>): List { + return rawCalls.mapNotNull { call -> + @Suppress("UNCHECKED_CAST") + val function = call["function"] as? Map ?: return@mapNotNull null + val name = function["name"] as? String ?: return@mapNotNull null + val argsRaw = function["arguments"] + val argumentsJson = when (argsRaw) { + is String -> argsRaw + is Map<*, *> -> gson.toJson(argsRaw) + null -> "{}" + else -> gson.toJson(argsRaw) + } + NativeToolCall( + id = java.util.UUID.randomUUID().toString(), + name = normalizeToolName(name), + argumentsJson = argumentsJson, + ) + } + } + private fun extractOllamaToolCalls(messageMap: Map): List> { @Suppress("UNCHECKED_CAST") val toolCalls = messageMap["tool_calls"] as? List> ?: return emptyList() @@ -860,6 +929,22 @@ class OllamaAdapter( } } + private fun isToolsNotSupportedError(httpStatus: Int, errorMessage: String): Boolean { + if (httpStatus != 400 && httpStatus != 422) return false + val lower = errorMessage.lowercase() + val mentionsTooling = + lower.contains("\"tools\"") || + lower.contains("tool calling") || + lower.contains("function calling") + val unsupportedShape = + lower.contains("not supported") || + lower.contains("unsupported") || + lower.contains("unknown field") || + lower.contains("unknown parameter") || + lower.contains("unexpected") + return mentionsTooling && unsupportedShape + } + override suspend fun close() { client.close() } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/OpenAIAdapter.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/OpenAIAdapter.kt index ed240d78..2f3dada2 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/OpenAIAdapter.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/OpenAIAdapter.kt @@ -8,8 +8,12 @@ import pl.jclab.refio.core.llm.LLMMessage import pl.jclab.refio.core.llm.LLMResponse import pl.jclab.refio.core.llm.LLMUsage import pl.jclab.refio.core.llm.ModelConfig +import pl.jclab.refio.core.llm.NativeToolCall import pl.jclab.refio.core.llm.StreamChunk +import pl.jclab.refio.core.llm.ToolSchemaSanitizer +import pl.jclab.refio.core.llm.ToolsNotSupportedException import pl.jclab.refio.core.llm.toModelConfig +import pl.jclab.refio.core.tools.base.ToolSchema import pl.jclab.refio.core.services.ConfigService.Companion.DEFAULT_CONTEXT_SIZE import pl.jclab.refio.core.utils.GsonInstance.gson import io.ktor.client.* @@ -160,6 +164,25 @@ class OpenAIAdapter( ) } + @Suppress("UNCHECKED_CAST") + val tools = transformed["tools"] as? List> + if (tools != null) { + transformed["tools"] = tools.map { tool -> + val function = tool["function"] as? Map + if ((tool["type"] as? String) == "function" && function != null) { + mapOf( + "type" to "function", + "name" to (function["name"] as? String ?: ""), + "description" to (function["description"] as? String ?: ""), + "parameters" to (function["parameters"] as? Map<*, *> ?: emptyMap()), + "strict" to true + ) + } else { + tool + } + } + } + // Add reasoning parameter for reasoning models based on thinking flag // thinking: Boolean or String (low/medium/high) // - false (Boolean) → effort: "low" @@ -266,7 +289,24 @@ class OpenAIAdapter( content = extractTextFromOutputItems(output) } - if (content.isBlank()) { + val toolCalls = output.mapNotNull { item -> + if (item["type"] != "function_call") return@mapNotNull null + val id = (item["call_id"] as? String) + ?: (item["id"] as? String) + ?: return@mapNotNull null + val name = item["name"] as? String ?: return@mapNotNull null + val arguments = item["arguments"] as? String ?: "{}" + mapOf( + "id" to id, + "type" to "function", + "function" to mapOf( + "name" to name, + "arguments" to arguments + ) + ) + } + + if (content.isBlank() && toolCalls.isEmpty()) { // No silent fallback — let caller see the malformed structure. (Previously // probed top-level "text" field; that path hid bugs in Responses API // integration. See REFACTOR.md §1.) @@ -292,9 +332,10 @@ class OpenAIAdapter( mapOf( "message" to mapOf( "role" to role, - "content" to content + "content" to content, + "tool_calls" to toolCalls ), - "finish_reason" to status + "finish_reason" to if (toolCalls.isNotEmpty()) "tool_calls" else status ) ) @@ -495,6 +536,19 @@ class OpenAIAdapter( } } + // Add native tools if requested and model supports function calling + @Suppress("UNCHECKED_CAST") + val nativeTools = kwargs["native_tools"] as? List + if (!nativeTools.isNullOrEmpty()) { + baseParams["tools"] = if (definition?.apiFormat == pl.jclab.refio.core.llm.ApiFormat.RESPONSES) { + buildResponsesToolsArray(nativeTools) + } else { + buildOpenAIToolsArray(nativeTools) + } + baseParams["tool_choice"] = "auto" + logger.info { "[OPENAI][NATIVE_TOOLS] Sending ${nativeTools.size} tool schemas" } + } + with(OpenAICompatibleHelpers) { baseParams.addCommonKwargs(kwargs) } // Apply format transformation if needed @@ -525,11 +579,62 @@ class OpenAIAdapter( // Stream aborted by a guardrail (see core/llm/streaming/) — must propagate // so the caller can see StreamAbortedException instead of RefioError.LLMError. throw e + } catch (e: ToolsNotSupportedException) { + throw e } catch (e: Exception) { throw LLMErrorMapper.fromThrowable(provider, model, timeoutMs, e) } } + private fun buildOpenAIToolsArray(tools: List): List> = + tools.map { rawTool -> + val tool = ToolSchemaSanitizer.forOpenAI(rawTool).tool + mapOf( + "type" to "function", + "function" to mapOf( + "name" to tool.name, + "description" to tool.description, + "parameters" to tool.parametersJsonSchema, + ) + ) + } + + private fun buildResponsesToolsArray(tools: List): List> = + tools.map { rawTool -> + val sanitized = ToolSchemaSanitizer.forOpenAI(rawTool) + if (!sanitized.strict) { + logger.warn { + "[OPENAI][NATIVE_TOOLS] Tool '${rawTool.name}' is not strict-compatible; " + + "sending strict=false. Reasons: ${sanitized.strictIncompatibilities.joinToString("; ")}" + } + } + val tool = sanitized.tool + mapOf( + "type" to "function", + "name" to tool.name, + "description" to tool.description, + "parameters" to tool.parametersJsonSchema, + "strict" to sanitized.strict + ) + } + + private fun parseNativeOpenAIToolCalls(rawToolCalls: Any?): List { + @Suppress("UNCHECKED_CAST") + val toolCalls = rawToolCalls as? List> ?: return emptyList() + return toolCalls.mapNotNull { call -> + val id = call["id"] as? String ?: return@mapNotNull null + @Suppress("UNCHECKED_CAST") + val function = call["function"] as? Map ?: return@mapNotNull null + val name = function["name"] as? String ?: return@mapNotNull null + val argsString = function["arguments"] as? String ?: "{}" + NativeToolCall( + id = id, + name = ToolCallContentNormalizer.normalizeToolName(name), + argumentsJson = argsString + ) + } + } + private suspend fun executeStandard( apiKey: String, requestBody: Map, @@ -603,6 +708,9 @@ class OpenAIAdapter( source = source ) + if (requestBody.containsKey("tools") && isToolsNotSupportedError(httpStatus, errorMessage)) { + throw ToolsNotSupportedException(fullErrorMessage) + } throw LLMErrorMapper.fromHttpStatus(provider, model, httpStatus, fullErrorMessage) } @@ -652,16 +760,24 @@ class OpenAIAdapter( @Suppress("UNCHECKED_CAST") val message = choice["message"] as? Map ?: emptyMap() val content = message["content"] as? String ?: "" - val normalizedToolCallsJson = if (content.isBlank()) { - ToolCallContentNormalizer.fromOpenAiToolCalls(message["tool_calls"]) + val finishReason = choice["finish_reason"] as? String + + val toolsWereRequested = requestBody.containsKey("tools") + val nativeToolCalls: List? = if (toolsWereRequested) { + parseNativeOpenAIToolCalls(message["tool_calls"]).also { calls -> + logger.info { "$logPrefix [NATIVE_TOOLS_RESPONSE] calls=${calls.size}" } + } + } else null + + val finalContent = if (nativeToolCalls == null && content.isBlank()) { + val normalizedToolCallsJson = ToolCallContentNormalizer.fromOpenAiToolCalls(message["tool_calls"]) + if (normalizedToolCallsJson != null) { + logger.info { "$logPrefix [TOOL_CALLS_NORMALIZED] Converted OpenAI tool_calls to canonical JSON content" } + } + normalizedToolCallsJson ?: content } else { - null - } - val finalContent = normalizedToolCallsJson ?: content - if (normalizedToolCallsJson != null) { - logger.info { "$logPrefix [TOOL_CALLS_NORMALIZED] Converted OpenAI tool_calls to canonical JSON content" } + content } - val finishReason = choice["finish_reason"] as? String logger.info { "$logPrefix Response processed: model=$responseModel, " + @@ -676,7 +792,8 @@ class OpenAIAdapter( provider = provider, cost = cost, finishReason = finishReason, - rawResponse = response + rawResponse = response, + nativeToolCalls = nativeToolCalls ) } catch (e: Exception) { @@ -852,7 +969,12 @@ class OpenAIAdapter( } } - if (contentBuilder.isEmpty()) { + val toolsWereRequested = requestBody.containsKey("tools") + val streamNativeToolCalls = toolCallAccumulator.toNativeToolCalls(toolsWereRequested)?.also { calls -> + logger.info { "$logPrefix [NATIVE_TOOLS_RESPONSE] (stream) calls=${calls.size}" } + } + + if (streamNativeToolCalls == null && contentBuilder.isEmpty()) { val normalizedToolCallsJson = toolCallAccumulator.toCanonicalJson() if (normalizedToolCallsJson != null) { contentBuilder.append(normalizedToolCallsJson) @@ -929,7 +1051,8 @@ class OpenAIAdapter( provider = provider, cost = cost, finishReason = finalFinishReason, - rawResponse = syntheticResponse + rawResponse = syntheticResponse, + nativeToolCalls = streamNativeToolCalls ) } catch (e: Exception) { @@ -965,6 +1088,7 @@ class OpenAIAdapter( var finalFinishReason: String? = null var finalUsage: LLMUsage? = null var finalRawResponse: Map? = null + val responseOutputItems = linkedMapOf>() val endpoint = getEndpoint(definition) try { @@ -993,6 +1117,9 @@ class OpenAIAdapter( subtaskId = subtaskId, source = source ) + if (requestBody.containsKey("tools") && isToolsNotSupportedError(httpStatus ?: 500, errorMessage)) { + throw ToolsNotSupportedException(errorMessage) + } throw IllegalStateException(errorMessage) } @@ -1040,6 +1167,38 @@ class OpenAIAdapter( } } + "response.output_item.added", + "response.output_item.done" -> { + @Suppress("UNCHECKED_CAST") + val item = (eventData["item"] as? Map)?.toMutableMap() + val index = (eventData["output_index"] as? Number)?.toInt() + if (item != null && index != null) { + responseOutputItems[index] = item + } + } + + "response.function_call_arguments.delta" -> { + val index = (eventData["output_index"] as? Number)?.toInt() + val delta = eventData["delta"] as? String + if (index != null && delta != null) { + val item = responseOutputItems.getOrPut(index) { + mutableMapOf("type" to "function_call") + } + val existing = item["arguments"] as? String ?: "" + item["arguments"] = existing + delta + (eventData["item_id"] as? String)?.let { item["id"] = it } + } + } + + "response.function_call_arguments.done" -> { + @Suppress("UNCHECKED_CAST") + val item = (eventData["item"] as? Map)?.toMutableMap() + val index = (eventData["output_index"] as? Number)?.toInt() + if (item != null && index != null) { + responseOutputItems[index] = item + } + } + "response.completed" -> { @Suppress("UNCHECKED_CAST") finalRawResponse = eventData["response"] as? Map @@ -1085,12 +1244,15 @@ class OpenAIAdapter( ) } + val streamedOutputItems = responseOutputItems.toSortedMap().values.map { it.toMap() } val rawResponse = finalRawResponse ?: buildSyntheticResponsesPayload( content = contentBuilder.toString(), finishReason = finalFinishReason, - usage = usage + usage = usage, + outputItems = streamedOutputItems ) + val transformedResponse = transformResponseFromResponses(rawResponse) val responseJson = gson.toJson(rawResponse) val cost = estimateCost(usage) @@ -1132,7 +1294,14 @@ class OpenAIAdapter( provider = provider, cost = cost, finishReason = finalFinishReason, - rawResponse = transformResponseFromResponses(rawResponse) + rawResponse = transformedResponse, + nativeToolCalls = run { + @Suppress("UNCHECKED_CAST") + val choices = transformedResponse["choices"] as? List> ?: emptyList() + @Suppress("UNCHECKED_CAST") + val message = choices.firstOrNull()?.get("message") as? Map ?: emptyMap() + if (requestBody.containsKey("tools")) parseNativeOpenAIToolCalls(message["tool_calls"]) else null + } ) } catch (e: Exception) { val latencyMs = (System.currentTimeMillis() - startTime).toInt() @@ -1276,15 +1445,14 @@ class OpenAIAdapter( private fun buildSyntheticResponsesPayload( content: String, finishReason: String?, - usage: LLMUsage + usage: LLMUsage, + outputItems: List> = emptyList() ): Map { val status = finishReason ?: "completed" - - return mapOf( - "id" to "synthetic_${UUID.randomUUID()}", - "model" to model, - "status" to status, - "output" to listOf( + val finalOutput = if (outputItems.isNotEmpty()) { + outputItems + } else { + listOf( mapOf( "type" to "message", "role" to "assistant", @@ -1296,7 +1464,14 @@ class OpenAIAdapter( ) ) ) - ), + ) + } + + return mapOf( + "id" to "synthetic_${UUID.randomUUID()}", + "model" to model, + "status" to status, + "output" to finalOutput, "usage" to mapOf( "input_tokens" to usage.inputTokens, "output_tokens" to usage.outputTokens, @@ -1398,6 +1573,28 @@ class OpenAIAdapter( ) } + private fun isToolsNotSupportedError(httpStatus: Int, errorMessage: String): Boolean { + if (httpStatus != 400 && httpStatus != 422) return false + val lower = errorMessage.lowercase() + val mentionsTooling = + lower.contains("tools") || + lower.contains("tool_choice") || + lower.contains("tool calling") || + lower.contains("function calling") || + lower.contains("function '") || + lower.contains("schema for function") + val unsupportedShape = + lower.contains("not supported") || + lower.contains("unsupported") || + lower.contains("unknown parameter") || + lower.contains("unexpected parameter") || + lower.contains("invalid parameter") || + lower.contains("invalid schema") || + lower.contains("invalid_function_parameters") || + lower.contains("additionalproperties") + return mentionsTooling && unsupportedShape + } + override suspend fun close() { if (ownsHttpClient) { client.close() diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/OpenRouterAdapter.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/OpenRouterAdapter.kt index e7a2b365..27fd29d3 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/OpenRouterAdapter.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/OpenRouterAdapter.kt @@ -148,7 +148,7 @@ class OpenRouterAdapter( ModelConfig( id = modelId, - name = "$modelName (via OpenRouter)", + name = modelName, provider = "openrouter", capabilities = capabilities, maxContext = contextLength, diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/ToolCallContentNormalizer.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/ToolCallContentNormalizer.kt index 59a4061e..3d8583ee 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/ToolCallContentNormalizer.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/ToolCallContentNormalizer.kt @@ -1,5 +1,6 @@ package pl.jclab.refio.core.llm.adapters +import pl.jclab.refio.core.llm.NativeToolCall import pl.jclab.refio.core.utils.GsonInstance.gson internal object ToolCallContentNormalizer { @@ -10,6 +11,7 @@ internal object ToolCallContentNormalizer { internal class OpenAiStreamingToolCallAccumulator { private data class CallBuffer( + var id: String? = null, var name: String? = null, val argumentsBuilder: StringBuilder = StringBuilder() ) @@ -29,6 +31,9 @@ internal object ToolCallContentNormalizer { val index = (toolCall["index"] as? Number)?.toInt() ?: fallbackIndex val buffer = callsByIndex.getOrPut(index) { CallBuffer() } + val id = toolCall["id"] as? String + if (!id.isNullOrBlank()) buffer.id = id + @Suppress("UNCHECKED_CAST") val function = toolCall["function"] as? Map ?: continue val name = function["name"] as? String @@ -48,6 +53,21 @@ internal object ToolCallContentNormalizer { } } + fun toNativeToolCalls(toolsWereRequested: Boolean): List? { + if (!toolsWereRequested) return null + return callsByIndex + .toSortedMap() + .values + .mapNotNull { buffer -> + val name = buffer.name ?: return@mapNotNull null + NativeToolCall( + id = buffer.id ?: java.util.UUID.randomUUID().toString(), + name = name, + argumentsJson = buffer.argumentsBuilder.toString().ifEmpty { "{}" } + ) + } + } + fun toCanonicalJson(): String? { val namedCalls = callsByIndex .toSortedMap() diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/AgentTurnLoop.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/AgentTurnLoop.kt index 03920cc3..921d9e5e 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/AgentTurnLoop.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/AgentTurnLoop.kt @@ -25,6 +25,7 @@ import pl.jclab.refio.core.services.AgentTurnLoop.UserMessageStrategy import pl.jclab.refio.core.services.monitoring.GlobalMetrics import pl.jclab.refio.core.services.monitoring.OperationInfo import pl.jclab.refio.core.services.turn.ToolCallParser +import pl.jclab.refio.core.services.turn.TrivialTaskDetector import pl.jclab.refio.core.services.turn.TurnEventListener import pl.jclab.refio.core.services.turn.TurnPrompt import pl.jclab.refio.core.services.turn.GuardianContext @@ -47,6 +48,14 @@ import pl.jclab.refio.core.services.turn.TurnSubagentValidator import pl.jclab.refio.core.services.turn.TurnToolExecutor import pl.jclab.refio.core.services.turn.ToolRejectedException import pl.jclab.refio.core.tools.base.ToolRegistry +import pl.jclab.refio.core.tools.base.ToolSchema +import pl.jclab.refio.core.llm.ModelDefinitions +import pl.jclab.refio.core.llm.NativeToolCall +import pl.jclab.refio.core.llm.NativeToolsFallbackTracker +import pl.jclab.refio.core.llm.ToolsNotSupportedException +import pl.jclab.refio.core.llm.parseNativeToolsMode +import pl.jclab.refio.core.llm.shouldUseNativeTools +import pl.jclab.refio.core.db.ToolCallData import pl.jclab.refio.core.logging.dualLogger import java.util.* import java.util.concurrent.CancellationException @@ -113,7 +122,8 @@ class AgentTurnLoop( private val workingMemoryIntegration: WorkingMemoryIntegration? = null, private val pendingUserMessageQueue: PendingUserMessageQueue? = null, private val agentEventBus: pl.jclab.refio.core.agents.events.AgentEventBus? = null, - private val hookService: pl.jclab.refio.core.services.hooks.HookService? = null + private val hookService: pl.jclab.refio.core.services.hooks.HookService? = null, + private val toolPermissionsService: ToolPermissionsService? = null ) { private val json = Json { ignoreUnknownKeys = true; prettyPrint = false } @@ -400,6 +410,7 @@ class AgentTurnLoop( // NOTE: Nudges are skipped when the model previously executed tool calls — plain text // after successful tool usage is treated as intentional completion, not format loss. var plainTextNudgeCount = 0 + var lastPlainTextContent: String? = null var totalTokensIn = 0 var totalTokensOut = 0 var totalCost = 0.0 @@ -458,6 +469,36 @@ class AgentTurnLoop( ) val responseFormat = turnLLMCaller.resolveResponseFormat(mode, effectiveProvider) + // Resolve native tools mode once per turn (not per iteration — model/config don't change mid-turn) + val initialNativeToolSchemas: List? = run { + val svc = toolPermissionsService ?: return@run null + val nativeModeRaw = configService.getTyped(ConfigKeys.NATIVE_TOOLS_MODE, taskId) + val nativeToolsMode = parseNativeToolsMode(nativeModeRaw) + val modelDef = ModelDefinitions.getDefinition(effectiveProvider, effectiveModel) + if (shouldUseNativeTools(nativeToolsMode, modelDef, effectiveModel, NativeToolsFallbackTracker.getFallbackSet())) { + val modeSchemas = toolRegistry.getToolSchemas(mode, svc, taskId) + // Subagent profiles must see ONLY their allowed/disallowed tools in the native + // `tools` array — otherwise the model calls tools the harness then rejects with + // "Tool 'X' is not available to the subagent". The prompt's + // is already filtered via resolveToolDescriptionsForProfile; this aligns the + // native channel. + val filtered = turnPromptBuilder.filterNativeToolSchemasByProfile(modeSchemas, profileOverrides) + if (filtered.size != modeSchemas.size) { + logger.info { + "[NATIVE_TOOLS] Filtered schemas for profile '${profileOverrides?.subagentName ?: "?"}': " + + "${modeSchemas.size} → ${filtered.size}" + } + } + filtered + } else { + null + } + } + var activeNativeToolSchemas = initialNativeToolSchemas + activeNativeToolSchemas?.let { schemas -> + logger.info { "[NATIVE_TOOLS] Enabled for taskId=$taskId, mode=$mode, schemas=${schemas.size}" } + } + // Wire turn state updater so TurnToolExecutor can set WAITING_FOR_PERMISSION turnToolExecutor.turnStateUpdater = { phase -> updateTurnState { copy(phase = phase) } @@ -507,13 +548,36 @@ class AgentTurnLoop( OperationInfo.TurnBuildingPrompt(iteration, turnPromptBuilder.getHistorySize(taskId)) ) + // Trivial-task harness guard: when the user prompt is "create file X with Y" + // and we're on the very first iteration, narrow the tool schema to write tools + // only. Stops weak models from burning 3+ turns on file_search / read_directory + // pre-flight reads. Subagent profiles already had their tools filtered, so we + // only narrow the top-level (DEFAULT profile) call. + val schemasSnapshot = activeNativeToolSchemas + val iterationNativeToolSchemas = if ( + iteration == 1 + && profileOverrides?.subagentName == null + && schemasSnapshot != null + ) { + TrivialTaskDetector.maybeRestrictForIteration1( + schemas = schemasSnapshot, + userInput = userMessageStrategy.getUserMessage(taskId), + iteration = iteration, + modeName = mode.name + ) + } else { + schemasSnapshot + } + + val useNativeTools = iterationNativeToolSchemas != null + // Auto-compact if context window is filling if (config.enableAutoCompaction && conversationCompactor != null) { val maxTokens = tokenEstimator.getSafeTokenLimit(effectiveProvider, effectiveModel) val tempPrompt = buildPrompt( taskId, mode, iteration, maxIterations, userContextRefs, runProfile, profileOverrides, - writeToolsExecutedInTurn + writeToolsExecutedInTurn, useNativeTools ) val (fits, estimated) = tokenEstimator.checkFits(tempPrompt, maxTokens, provider = effectiveProvider) @@ -527,10 +591,10 @@ class AgentTurnLoop( } } - val prompt = buildPrompt( + var prompt = buildPrompt( taskId, mode, iteration, maxIterations, userContextRefs, runProfile, profileOverrides, - writeToolsExecutedInTurn + writeToolsExecutedInTurn, useNativeTools ) // Call LLM @@ -540,31 +604,64 @@ class AgentTurnLoop( val llmCallStartNanos = System.nanoTime() // Mutable so the empty-content recovery path below can re-bind it after pulling // a JSON envelope out of the `thinking` field (qwen3 / Ollama edge case). - var llmResponse = if (config.maxRetries > 0 && llmRetryHandler != null) { + suspend fun callModelWithPrompt( + currentPrompt: TurnPrompt, + nativeSchemas: List? + ) = if (config.maxRetries > 0 && llmRetryHandler != null) { + val thinkingRequested = configService.getTyped(ConfigKeys.GENERAL_THINKING_ENABLED, taskId) llmRetryHandler.callWithRetry( provider = effectiveProvider, model = effectiveModel, - messages = prompt.messages, - systemPrompt = prompt.systemPrompt, + messages = currentPrompt.messages, + systemPrompt = currentPrompt.systemPrompt, taskId = taskId, source = "AgentTurnLoop", maxRetries = config.maxRetries, baseDelayMs = config.retryBackoffMs, responseFormat = responseFormat, + thinking = turnLLMCaller.resolveThinkingEnabled(effectiveProvider, effectiveModel, thinkingRequested), + reasoningEffort = profileOverrides?.reasoningEffort, + noEgressEnabled = configService.getTyped(ConfigKeys.GENERAL_NO_EGRESS_ENABLED, taskId), stream = effectiveStreamCallback != null, - onChunk = effectiveStreamCallback + onChunk = effectiveStreamCallback, + kwargs = nativeSchemas?.let { mapOf("native_tools" to it) } ?: emptyMap() ) } else { turnLLMCaller.callLLM( taskId = taskId, mode = mode, - prompt = prompt, + prompt = currentPrompt, streamCallback = effectiveStreamCallback, model = effectiveModel, provider = effectiveProvider, - profileOverrides = profileOverrides + profileOverrides = profileOverrides, + nativeToolSchemas = nativeSchemas ) } + + var llmResponse: pl.jclab.refio.core.llm.LLMResponse + while (true) { + try { + llmResponse = callModelWithPrompt(prompt, iterationNativeToolSchemas) + break + } catch (e: ToolsNotSupportedException) { + val modelKey = effectiveModel ?: "unknown" + NativeToolsFallbackTracker.markFallback(modelKey, e.message ?: "provider error") + if (activeNativeToolSchemas == null) { + throw e + } + logger.warn { + "[NATIVE_TOOLS_FALLBACK] taskId=$taskId, model=$modelKey — " + + "rebuilding prompt and retrying on JSON path" + } + activeNativeToolSchemas = null + prompt = buildPrompt( + taskId, mode, iteration, maxIterations, + userContextRefs, runProfile, profileOverrides, + writeToolsExecutedInTurn, false + ) + } + } val llmDurationMs = (System.nanoTime() - llmCallStartNanos) / 1_000_000 // Emit LLMCallCompleted for Session Trace panel / cost analytics @@ -597,6 +694,20 @@ class AgentTurnLoop( totalTokensOut += llmResponse.usage.outputTokens totalCost += llmResponse.cost + // Persist per-iteration metrics on the task row so HistoryPanel + // and live stats bar can show running totals without aggregating + // from message metadata. + try { + taskRepository.incrementMetrics( + id = taskId, + tokensIn = llmResponse.usage.inputTokens, + tokensOut = llmResponse.usage.outputTokens, + costUsd = llmResponse.cost + ) + } catch (e: Exception) { + logger.warn(e) { "[AGENT_TURN_LOOP] Failed to increment task metrics for $taskId" } + } + // Populate per-session metrics for Session Trace footer / cost analytics GlobalMetrics.forAgent(taskId).recordTokens( tokensIn = llmResponse.usage.inputTokens, @@ -613,13 +724,15 @@ class AgentTurnLoop( durationMs = llmDurationMs ) - if (mode != TaskMode.CHAT && llmResponse.content.isBlank()) { + if (mode != TaskMode.CHAT && llmResponse.content.isBlank() && llmResponse.nativeToolCalls == null) { // Fallback 1: recover JSON from the thinking field. Some Ollama setups // (qwen3 with think=true defaulted) emit the JSON envelope inside `thinking` // while `content` stays empty. We accept recovery if thinking *looks like* // a JSON envelope — either it parses to tool calls, OR its trimmed form // starts with `{` (a final-response envelope without `actions`). The // downstream pipeline handles both shapes. + // Note: when nativeToolCalls != null, empty content is normal — tool calls + // come via the native API channel, not as text content. val thinking = llmResponse.thinking val thinkingTrimmed = thinking?.trim().orEmpty() val looksLikeEnvelope = thinkingTrimmed.startsWith("{") @@ -674,7 +787,7 @@ class AgentTurnLoop( content = "Your previous reply contained empty content in structured JSON mode. " + "Generate the full JSON envelope again from scratch. " + "Do not continue or patch the previous output. Reply with JSON only: " + - "{\"actions\":[{\"tool\":\"NAME\",\"arguments\":{...}}]," + + "{\"actions\":[{\"tool\":\"NAME\",\"args\":{...}}]," + "\"response\":\"...\",\"intent\":\"implementation\"}. " + "No prose, no markdown fences.", toolCalls = null, @@ -709,15 +822,56 @@ class AgentTurnLoop( } } - // Check if model invoked tools - val contentForExtraction = toolCallParser.preprocessContent(llmResponse.content, taskId) - val jsonEnvelopeInspection = toolCallParser.inspectJsonEnvelope(contentForExtraction) - val toolCalls = toolCallParser.extractToolCalls(contentForExtraction, mode, profileOverrides) - val looksLikeJsonResponse = - jsonEnvelopeInspection.hasJsonEnvelope || contentForExtraction.trim().startsWith("[") + // Check if model invoked tools — two paths: + // 1. Native path: llmResponse.nativeToolCalls != null (set by adapter when tools were requested) + // 2. JSON-in-text path: classic ToolCallParser extraction from content + val nativeCalls = llmResponse.nativeToolCalls + val contentForExtraction: String + val toolCalls: List + val looksLikeJsonResponse: Boolean + val jsonEnvelopeInspection: pl.jclab.refio.core.services.turn.ToolCallParser.JsonEnvelopeInspection + + if (nativeCalls != null) { + // Native function-calling path — skip JSON-in-text parsing entirely + logger.info { "[NATIVE_TOOLS_PATH] taskId=$taskId, iteration=$iteration, calls=${nativeCalls.size}" } + contentForExtraction = llmResponse.content + toolCalls = nativeCalls.map { native -> + ToolCallData(id = native.id, name = native.name, arguments = native.argumentsJson) + } + looksLikeJsonResponse = false + jsonEnvelopeInspection = toolCallParser.inspectJsonEnvelope("") + } else if (activeNativeToolSchemas != null && isJsonEnvelopeFallback(llmResponse.content)) { + // Native tools were requested but the model emitted a JSON envelope in text + // instead of native tool_calls. Treat this as a native-calling failure: + // mark the model in the session fallback cache so subsequent iterations use + // the JSON-in-text path, and parse the envelope we already have. + val modelKey = effectiveModel ?: "unknown" + NativeToolsFallbackTracker.markFallback( + modelKey, + "model ignored native tool_calls and emitted JSON envelope in text" + ) + logger.warn { + "[NATIVE_TOOLS_FALLBACK] taskId=$taskId, model=$modelKey — parsing envelope " + + "from text content; future iterations will use JSON-in-text path" + } + activeNativeToolSchemas = null + contentForExtraction = toolCallParser.preprocessContent(llmResponse.content, taskId) + jsonEnvelopeInspection = toolCallParser.inspectJsonEnvelope(contentForExtraction) + toolCalls = toolCallParser.extractToolCalls(contentForExtraction, mode, profileOverrides) + looksLikeJsonResponse = + jsonEnvelopeInspection.hasJsonEnvelope || contentForExtraction.trim().startsWith("[") + } else { + // Classic JSON-in-text path + contentForExtraction = toolCallParser.preprocessContent(llmResponse.content, taskId) + jsonEnvelopeInspection = toolCallParser.inspectJsonEnvelope(contentForExtraction) + toolCalls = toolCallParser.extractToolCalls(contentForExtraction, mode, profileOverrides) + looksLikeJsonResponse = + jsonEnvelopeInspection.hasJsonEnvelope || contentForExtraction.trim().startsWith("[") + } - // Check for truncated response with incomplete JSON + // Check for truncated response with incomplete JSON (JSON-in-text path only) val isTruncatedWithIncompleteJson = + nativeCalls == null && llmResponse.finishReason == "length" && jsonEnvelopeInspection.hasJsonEnvelope && !jsonEnvelopeInspection.isComplete && @@ -751,12 +905,26 @@ class AgentTurnLoop( // read-only loops, transient HTTP errors etc.; those were all removed // because they confused the model more than they helped. - // Save assistant message with tool calls + // Save assistant message with tool calls. + // For both paths we persist a canonical {response, actions} JSON envelope + // so the UI renders a "Plan" bubble summarizing the actions. Tool calls + // themselves are also stored structurally in `toolCalls` for execution and + // adapter mapping back to native tool_use on later turns. + // + // Native path: preserve original LLM text for UI display (e.g. "I'll create + // the file…"). Tool calls are stored structurally in toolCallsJson so the + // LLM history is correctly reconstructed independently of the content field. + // Fall back to a JSON envelope only when the model produced no text at all. logger.info { "[TOOL_CALLS] taskId=$taskId, count=${toolCalls.size}" } + val assistantContent = if (nativeCalls != null) { + llmResponse.content.takeIf { it.isNotBlank() } ?: buildActionsEnvelopeJson(toolCalls) + } else { + llmResponse.content + } chatMessageRepository.create( taskId = taskId, role = MessageRole.ASSISTANT, - content = llmResponse.content, + content = assistantContent, thinking = turnResponseProcessor.resolveAssistantThinking(llmResponse), toolCalls = toolCalls, tokensIn = llmResponse.usage.inputTokens, @@ -1131,25 +1299,128 @@ class AgentTurnLoop( // contradict the intentional simplification in TurnGuardrails. val hasIncompleteJsonEnvelope = jsonEnvelopeInspection.hasJsonEnvelope && !jsonEnvelopeInspection.isComplete - // Skip nudge when the model previously produced valid JSON with tool calls - // and now returns plain text — this is an intentional completion, not a - // format lapse. Only nudge when the model never succeeded with JSON format - // (usedTools is empty) or when the envelope is structurally incomplete - // (truncated response that needs regeneration). - val modelPreviouslyUsedTools = usedTools.isNotEmpty() + // Detect "effectively empty" JSON envelope: model returned a complete object + // like `{}` or `{"response":""}` with no actions/subtasks and no prose. + // Seen with MiniMax under native-tools + response_format=json_object conflict + // (fixed separately in TurnLLMCaller). Without this guard the turn exits as + // success=true with 0 actions and the user sees nothing happen. + val isEffectivelyEmptyEnvelope = + jsonEnvelopeInspection.hasJsonEnvelope && + jsonEnvelopeInspection.isComplete && + toolCalls.isEmpty() && + nativeCalls == null && + isEmptyJsonEnvelope(contentForExtraction) + // Native-tools mode: model emitted a tool call in text instead of via the + // native tool_calls channel. Two observed patterns: + // - pseudo-XML: ..., ... + // - inline JSON: {"name":"foo","arguments":{...}} or {"tool_calls":[...]} + // Parser extracts nothing from these (shape isn't {actions:[...]}), so + // without this guard the turn exits as success=true with 0 actions. + // Observed with glm-5, glm-5.1, glm-4.7 on Z.AI. + val nativeTextEmbeddedToolCall = + activeNativeToolSchemas != null && + nativeCalls == null && + toolCalls.isEmpty() && + looksLikeTextEmbeddedToolCall(contentForExtraction) + // Native-tools mode: model emitted a short intent announcement ("Let me + // first check...", "I'll create the game...") without any tool call, on + // iteration 1. User asked for work to be done, model is stalling. Nudge + // once before accepting prose as a terminal answer. + val nativeIntentAnnouncement = + activeNativeToolSchemas != null && + nativeCalls == null && + toolCalls.isEmpty() && + iteration == 1 && + looksLikeIntentAnnouncement(contentForExtraction) + // In the JSON-envelope path, AGENT mode never uses plain text as a + // terminal signal — completion is `{"actions": []}`. So ANY blank/prose + // reply (no envelope) is a format lapse, regardless of whether the model + // previously produced valid envelopes. We used to skip the nudge when + // `usedTools` was non-empty (assuming prose = "I'm done"), but weaker + // models routinely emit a tool call in turn 1 and then lapse into prose + // like "File doesn't exist. I'll create X..." in turn 2 without ever + // re-emitting the envelope. That produced silent success with no action. + val isRepeatedPlainText = + !looksLikeJsonResponse && + contentForExtraction.isNotBlank() && + lastPlainTextContent?.trim() == contentForExtraction.trim() + // When native function-calling is active and the model returned prose + // without any tool_calls, that is a legitimate final answer — not a + // format lapse. The JSON-envelope contract only applies to the legacy + // JSON-in-text path. Skipping the guard here lets informational answers + // ("Co to za projekt?") terminate cleanly in AGENT mode instead of being + // nudged into a JSON envelope the model was never asked to emit. + val nativeToolsActive = activeNativeToolSchemas != null + val requiresFormatRetry = - mode == TaskMode.AGENT && + nativeCalls == null && + mode == TaskMode.AGENT && contentForExtraction.isNotBlank() && plainTextNudgeCount < 2 && iteration < maxIterations && - (hasIncompleteJsonEnvelope || (!looksLikeJsonResponse && !modelPreviouslyUsedTools)) + !isRepeatedPlainText && + ( + isEffectivelyEmptyEnvelope || + nativeTextEmbeddedToolCall || + nativeIntentAnnouncement || + (!nativeToolsActive && (hasIncompleteJsonEnvelope || !looksLikeJsonResponse)) + ) + + // Hard-fail when nudges are exhausted (or model is repeating itself). + // Returning prose as `success=true` would let the UI claim the task is + // done while the model has neither executed a tool this turn nor emitted + // a proper terminal envelope. Does not apply in native tool-calling mode. + val shouldHardFailFormat = + !nativeToolsActive && + nativeCalls == null && + mode == TaskMode.AGENT && + contentForExtraction.isNotBlank() && + !looksLikeJsonResponse && + !hasIncompleteJsonEnvelope && + (plainTextNudgeCount >= 2 || isRepeatedPlainText) + + if (shouldHardFailFormat) { + logger.error { + "[FORMAT_UNRECOVERABLE] taskId=$taskId, iteration=$iteration: " + + "model kept returning plain text after $plainTextNudgeCount nudge(s) " + + "(repeated=$isRepeatedPlainText, toolsUsedSoFar=${usedTools.size}). Failing turn." + } + val result = TurnResult( + success = false, + response = "The model kept replying with plain text instead of the required JSON " + + "envelope and never produced a tool call. Nudges were exhausted. " + + "This usually means the selected model cannot follow the structured format " + + "— try a model tuned for tool use (e.g. qwen3.5:9b, llama3.1) or enable native " + + "function-calling.", + iterations = iteration, + tokensIn = totalTokensIn, + tokensOut = totalTokensOut, + cost = totalCost, + toolsUsed = usedTools.distinct() + ) + return turnFinalizer.completeTurn( + taskId, + result, + listener, + runId, + parentRunId, + depth, + persistAssistantMessage = true, + metadata = subagentMetadata, + agentName = persistAgentName, + agentDepth = persistAgentDepth, + ) + } if (requiresFormatRetry) { + lastPlainTextContent = contentForExtraction plainTextNudgeCount++ - val retryReason = if (hasIncompleteJsonEnvelope) { - "LLM returned incomplete JSON envelope" - } else { - "LLM returned plain text without JSON structure" + val retryReason = when { + isEffectivelyEmptyEnvelope -> "LLM returned empty JSON envelope (no actions, no response)" + nativeTextEmbeddedToolCall -> "LLM emitted tool call in text content instead of native tool_calls channel" + nativeIntentAnnouncement -> "LLM announced intent in prose without emitting a native tool call" + hasIncompleteJsonEnvelope -> "LLM returned incomplete JSON envelope" + else -> "LLM returned plain text without JSON structure" } logger.warn { "[FORMAT_RETRY_NUDGE] taskId=$taskId, iteration=$iteration: " + @@ -1174,17 +1445,29 @@ class AgentTurnLoop( chatMessageRepository.create( taskId = taskId, role = MessageRole.SYSTEM, - content = if (hasIncompleteJsonEnvelope) { - "Your previous reply contained incomplete JSON. Generate the full JSON envelope again from scratch. " + - "Do not continue or patch the previous output. Reply with JSON only: " + - "{\"actions\":[{\"tool\":\"NAME\",\"arguments\":{...}}]," + - "\"response\":\"...\",\"intent\":\"implementation\"}. " + - "No prose, no markdown fences." - } else { - "Reply with JSON only: " + - "{\"actions\":[{\"tool\":\"NAME\",\"arguments\":{...}}]," + - "\"response\":\"...\",\"intent\":\"implementation\"}. " + - "No prose, no markdown fences." + content = when { + nativeTextEmbeddedToolCall -> + "Your previous reply embedded a tool call inside the text content " + + "(e.g. ... or JSON in prose). Those are " + + "ignored. Tools must be invoked through the native tool_calls / " + + "tool_use channel of this API — emit them as structured tool_calls, " + + "not as text. Retry now using the native channel." + nativeIntentAnnouncement -> + "You announced an intent (\"let me check\", \"I'll create...\") but did " + + "not invoke any tool. Do not narrate — call the tool directly via " + + "the native tool_calls channel. If the task is already complete, " + + "reply with the final answer instead of an intent announcement." + hasIncompleteJsonEnvelope -> + "Your previous reply contained incomplete JSON. Generate the full JSON envelope again from scratch. " + + "Do not continue or patch the previous output. Reply with JSON only: " + + "{\"actions\":[{\"tool\":\"NAME\",\"args\":{...}}]," + + "\"response\":\"...\",\"intent\":\"implementation\"}. " + + "No prose, no markdown fences." + else -> + "Reply with JSON only: " + + "{\"actions\":[{\"tool\":\"NAME\",\"args\":{...}}]," + + "\"response\":\"...\",\"intent\":\"implementation\"}. " + + "No prose, no markdown fences." }, toolCalls = null, agentName = persistAgentName, @@ -1193,7 +1476,7 @@ class AgentTurnLoop( continue } - if (mode == TaskMode.AGENT && hasIncompleteJsonEnvelope) { + if (nativeCalls == null && mode == TaskMode.AGENT && hasIncompleteJsonEnvelope) { logger.error { "[MALFORMED_JSON_ENVELOPE] taskId=$taskId, iteration=$iteration: " + "assistant returned incomplete JSON after retries exhausted" @@ -1289,7 +1572,7 @@ class AgentTurnLoop( logger.info { "[TURN_COMPLETE] taskId=$taskId, iterations=$iteration" } val textResponse = toolCallParser.extractTextResponse(llmResponse.content) - turnResponseProcessor.tryCreatePlanSubtasks(taskId, mode, executionMode, llmResponse) + turnResponseProcessor.tryCreatePlanSubtasks(taskId, mode, executionMode, llmResponse, runProfile) chatMessageRepository.create( taskId = taskId, @@ -1466,7 +1749,8 @@ class AgentTurnLoop( userContextRefs: List = emptyList(), runProfile: TurnRunProfile = TurnRunProfile.DEFAULT, profileOverrides: TurnProfileOverrides? = null, - writeToolsExecutedInTurn: Int = 0 + writeToolsExecutedInTurn: Int = 0, + useNativeTools: Boolean = false ): TurnPrompt { val turnPrompt = turnPromptBuilder.buildPrompt( taskId = taskId, @@ -1476,7 +1760,8 @@ class AgentTurnLoop( userContextRefs = userContextRefs, runProfile = runProfile, profileOverrides = profileOverrides, - writeToolsExecutedInTurn = writeToolsExecutedInTurn + writeToolsExecutedInTurn = writeToolsExecutedInTurn, + nativeToolsActive = useNativeTools ) // Build PromptSnapshot for UI inspection @@ -1599,6 +1884,112 @@ class AgentTurnLoop( return false } + /** + * Serialize tool calls into the canonical `{response, actions}` JSON envelope used + * by the JSON-in-text path and by the UI's plan-bubble renderer. Called on the native + * function-calling path so the persisted assistant message has the same shape regardless + * of which transport the model actually used. + */ + private fun buildActionsEnvelopeJson(toolCalls: List): String { + val actions = toolCalls.map { tc -> + val argsMap: Map = runCatching { + @Suppress("UNCHECKED_CAST") + pl.jclab.refio.core.utils.GsonInstance.gson + .fromJson(tc.arguments, Map::class.java) as? Map + }.getOrNull() ?: emptyMap() + mapOf( + "tool" to tc.name, + "args" to argsMap + ) + } + val envelope = mapOf( + "response" to "", + "actions" to actions + ) + return pl.jclab.refio.core.utils.GsonInstance.gson.toJson(envelope) + } + + /** + * Heuristic: does `content` look like a `{response, actions}` JSON envelope the model + * emitted in text instead of using native tool_calls? Used to trigger the native→JSON + * fallback path without waiting for a provider-level [ToolsNotSupportedException]. + */ + /** + * Returns true when `content` parses as a JSON object with no usable payload: + * no `actions`/`tool_calls`/`subtasks`/`steps` array, and no non-blank `response`/`content` text. + * Such responses are a silent no-op — we nudge the model to emit a real envelope. + */ + private fun isEmptyJsonEnvelope(content: String): Boolean { + val trimmed = content.trim().removePrefix("```json").removePrefix("```").removeSuffix("```").trim() + if (!trimmed.startsWith("{")) return false + return try { + val element = kotlinx.serialization.json.Json.parseToJsonElement(trimmed) + val obj = element as? kotlinx.serialization.json.JsonObject ?: return false + val hasActions = listOf("actions", "tool_calls", "subtasks", "steps").any { key -> + (obj[key] as? kotlinx.serialization.json.JsonArray)?.isNotEmpty() == true + } + if (hasActions) return false + val hasText = listOf("response", "content", "answer", "text").any { key -> + val v = obj[key] as? kotlinx.serialization.json.JsonPrimitive + v != null && v.isString && v.content.isNotBlank() + } + !hasText + } catch (_: Exception) { + false + } + } + + private fun isJsonEnvelopeFallback(content: String): Boolean { + val trimmed = content.trim() + if (!trimmed.startsWith("{") && !trimmed.startsWith("```")) return false + // Fast path: presence of the distinctive keys. Avoids parsing JSON on every turn. + val body = if (trimmed.startsWith("```")) trimmed.removePrefix("```").removePrefix("json").trim() else trimmed + return body.contains("\"actions\"") && (body.contains("\"tool\"") || body.contains("\"response\"")) + } + + /** + * Detect tool calls embedded in text content (not via native tool_calls channel). + * Matches two observed patterns from weak models under native-tools mode: + * - pseudo-XML: ..., ..., + * - inline JSON tool shape: {"name":"foo","arguments":{...}} or {"tool_calls":[...]} + * Parser can't dispatch these (shape isn't {actions:[...]}) so we must nudge. + */ + private fun looksLikeTextEmbeddedToolCall(content: String): Boolean { + val trimmed = content.trim() + if (trimmed.isBlank()) return false + // Pseudo-XML tags (tolerant of trailing punctuation like `` from glm-5) + if (trimmed.contains(" 300) return false + val lower = trimmed.lowercase() + val openers = listOf( + "let me ", "let's ", "i'll ", "i will ", "i am going to ", "i'm going to ", + "first, i", "first i", "checking ", "i need to ", "i should ", "i'll first" + ) + return openers.any { lower.startsWith(it) } + } + /** * Get last user message from history. */ diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/ConfigYamlApplier.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/ConfigYamlApplier.kt index 7eb41307..df20c6f7 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/ConfigYamlApplier.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/ConfigYamlApplier.kt @@ -86,6 +86,16 @@ class ConfigYamlApplier( } } + yamlConfig.general?.let { g -> + applyKey(ConfigKeys.FORMAT_MARKDOWN.key, g.formatMarkdown?.toString(), "format markdown") + applyKey(ConfigKeys.STREAMING_ENABLED.key, g.streamingEnabled?.toString(), "streaming enabled") + applyKey(ConfigKeys.ADVANCED_VIEW.key, g.advancedView?.toString(), "advanced view") + applyKey(ConfigKeys.GENERAL_THINKING_ENABLED.key, g.thinkingEnabled?.toString(), "thinking enabled") + applyKey(ConfigKeys.GENERAL_NO_EGRESS_ENABLED.key, g.noEgressEnabled?.toString(), "no egress enabled") + applyKey(ConfigKeys.GENERAL_EXECUTION_MODE.key, g.executionMode, "execution mode") + applyKey(ConfigKeys.NATIVE_TOOLS_MODE.key, g.nativeToolsMode, "native tools mode") + } + yamlConfig.providers?.let { p -> applyKey(ConfigKeys.PROVIDER_OLLAMA_ENDPOINT.key, p.ollama?.endpoint, "Ollama endpoint") applyKey(ConfigKeys.PROVIDER_ANTHROPIC_API_KEY.key, p.anthropic?.apiKey, "Anthropic API key") diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/ContextService.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/ContextService.kt index bf9bfa25..54cb2dec 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/ContextService.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/ContextService.kt @@ -180,6 +180,13 @@ class ContextService( // 4. Get subtasks val subtasks = transaction { subtaskRepository.findByTaskId(taskId) } + // 4a. Lazily rebuild WORKING_MEMORY from persisted subtasks. Working memory lives + // in-memory only, so after a plugin/app restart (or when the user reopens an old + // conversation) the in-memory map is empty even though the underlying data — + // subtask params + result — is still in the database. hasEntries() guards against + // re-running the extraction on every turn within a live session. + workingMemoryService?.rebuildFromSubtasks(taskId = taskId, subtasks = subtasks) + // 4. Get conversation history (if requested) val rawConversationHistory = if (CONTEXT_INCLUDE_CONVERSATION_HISTORY) { val allMessages = transaction { chatMessageRepository.findByTaskId(taskId) } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/LLMRetryHandler.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/LLMRetryHandler.kt index 28473506..afcd6ddd 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/LLMRetryHandler.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/LLMRetryHandler.kt @@ -61,8 +61,12 @@ class LLMRetryHandler( maxRetries: Int = 3, baseDelayMs: Long = 1000, responseFormat: Map? = null, + thinking: Boolean = false, + reasoningEffort: String? = null, + noEgressEnabled: Boolean = false, stream: Boolean = false, - onChunk: StreamCallback? = null + onChunk: StreamCallback? = null, + kwargs: Map = emptyMap() ): LLMResponse { var lastException: Exception? = null @@ -76,8 +80,12 @@ class LLMRetryHandler( taskId = taskId, source = source, responseFormat = responseFormat, + thinking = thinking, + reasoningEffort = reasoningEffort, + noEgressEnabled = noEgressEnabled, stream = stream, - onChunk = onChunk + onChunk = onChunk, + kwargs = kwargs ) } catch (e: Exception) { lastException = e diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/ParallelToolExecutor.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/ParallelToolExecutor.kt index 2c83efef..8d1f6a2b 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/ParallelToolExecutor.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/ParallelToolExecutor.kt @@ -179,6 +179,8 @@ class ParallelToolExecutor( subtaskId = subtaskId, filePaths = listOf(path) ) + // Intentionally not linking to subtask here — parallel path is for READ_ONLY tools; + // writes go through ToolExecutor/TurnToolExecutor which do the link. } catch (e: Exception) { logger.warn(e) { "[SNAPSHOT] Failed to create snapshot for ${toolCall.name}" } } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/PromptCache.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/PromptCache.kt index 7e0ecf72..032cc156 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/PromptCache.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/PromptCache.kt @@ -166,7 +166,18 @@ class PromptCache( TaskMode.AGENT -> PromptType.SYSTEM_AGENT else -> throw IllegalArgumentException("Invalid mode for cache: $mode") } - return promptsService.getSystemPrompt(type) + // Supply blank placeholders for all declared variables. The cached `systemPrompt` + // field is informational (metrics only); actual rendering happens in TurnPromptBuilder + // with the real response_contract / tool_descriptions resolved per-call. + return promptsService.getSystemPrompt( + type = type, + variables = mapOf( + "tool_descriptions" to "", + "tool_selection_matrix" to "", + "response_contract" to "", + "multi_agent_section" to "" + ) + ) } } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/PromptsService.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/PromptsService.kt index 38632543..f7b906be 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/PromptsService.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/PromptsService.kt @@ -7,8 +7,10 @@ import pl.jclab.refio.core.db.repositories.PromptsRepository import pl.jclab.refio.core.prompts.PromptRegistry import pl.jclab.refio.core.prompts.PromptTemplate import pl.jclab.refio.core.logging.dualLogger +import java.util.concurrent.atomic.AtomicBoolean private val logger = dualLogger("PromptsService") +private val defaultsInitialized = AtomicBoolean(false) /** * Service for managing prompts with {{variable}} substitution. @@ -65,6 +67,13 @@ class PromptsService( * System prompts are now loaded from MD files - no DB seeding needed. */ fun initializeDefaults() { + // Guard against redundant seeding: each CoreApiRouter construction used + // to re-run this against the shared SQLite DB, which caused three + // concurrent writers at startup and deadlocks. + if (!defaultsInitialized.compareAndSet(false, true)) { + logger.debug { "Default prompts already initialized in this process; skipping" } + return + } logger.info { "Initializing default prompts" } initializeBuiltinSlashPrompts() @@ -73,6 +82,21 @@ class PromptsService( logger.info { "Default prompts initialized" } } + /** + * Resolve a named prompt fragment (no DB layer, no [PromptType] mapping). + * Used for sub-fragments like `response-contract-json` / `response-contract-native` + * that are included into larger system prompts via `{{variable}}` substitution. + * + * Hierarchy: project file > user file > builtin. Returns empty string if not found. + */ + fun getFragment(name: String): String { + promptRegistry.getProjectFile(name)?.let { return it.content } + promptRegistry.getUserFile(name)?.let { return it.content } + promptRegistry.getBuiltin(name)?.let { return it.content } + logger.warn { "Prompt fragment not found in any layer: $name" } + return "" + } + /** * Get system prompt for given type with variable substitution. * Uses 3-layer resolution: project file > DB isCustom > user file > builtin. diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/SessionStatsCalculator.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/SessionStatsCalculator.kt new file mode 100644 index 00000000..a610b7f3 --- /dev/null +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/SessionStatsCalculator.kt @@ -0,0 +1,64 @@ +package pl.jclab.refio.core.services + +import pl.jclab.refio.api.models.Message + +/** + * Aggregate stats computed from a session's message list. + * + * - durationMs: wall-clock from first to last message + * - generationMs: sum of LLM latency + tool execution time across messages + * - tokensIn / tokensOut / costUsd: sums from message metrics + */ +data class SessionStats( + val durationMs: Long, + val generationMs: Long, + val tokensIn: Int, + val tokensOut: Int, + val costUsd: Double +) { + val isEmpty: Boolean get() = + durationMs == 0L && generationMs == 0L && tokensIn == 0 && tokensOut == 0 && costUsd == 0.0 + + companion object { + val EMPTY = SessionStats(0L, 0L, 0, 0, 0.0) + } +} + +object SessionStatsCalculator { + fun compute(messages: List): SessionStats { + if (messages.isEmpty()) return SessionStats.EMPTY + + var first = Long.MAX_VALUE + var last = Long.MIN_VALUE + var generationMs = 0L + var tokensIn = 0 + var tokensOut = 0 + var costUsd = 0.0 + + messages.forEach { msg -> + if (msg.createdAt < first) first = msg.createdAt + if (msg.createdAt > last) last = msg.createdAt + + val metrics = msg.metrics + if (metrics != null) { + generationMs += metrics.latencyMs.toLong() + metrics.toolExecutionTimeMs.toLong() + tokensIn += metrics.inputTokens + tokensOut += metrics.outputTokens + costUsd += metrics.costUsd + } else { + tokensIn += msg.tokensIn ?: 0 + tokensOut += msg.tokensOut ?: 0 + costUsd += msg.costUsd ?: 0.0 + } + } + + val duration = if (first == Long.MAX_VALUE) 0L else (last - first).coerceAtLeast(0L) + return SessionStats( + durationMs = duration, + generationMs = generationMs, + tokensIn = tokensIn, + tokensOut = tokensOut, + costUsd = costUsd + ) + } +} diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/SnapshotService.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/SnapshotService.kt index bee77540..a25ee18a 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/SnapshotService.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/SnapshotService.kt @@ -1,5 +1,6 @@ package pl.jclab.refio.core.services +import pl.jclab.refio.core.db.repositories.SnapshotGroupRepository import pl.jclab.refio.core.db.repositories.SnapshotRepository import pl.jclab.refio.core.db.Snapshot import pl.jclab.refio.core.logging.dualLogger @@ -17,63 +18,67 @@ import kotlin.io.path.* */ class SnapshotService( private val snapshotRepository: SnapshotRepository, + private val snapshotGroupRepository: SnapshotGroupRepository, private val projectRoot: Path ) { private val logger = dualLogger("SnapshotService") /** - * Create snapshots for specified files. + * Create snapshots for specified files. Only files that exist on disk are + * captured — for create-file operations, nothing is snapshotted. * - * @param taskId Task ID - * @param subtaskId Subtask ID (used as snapshot identifier) - * @param filePaths List of relative file paths to snapshot - * @return Snapshot ID (same as subtask_id) + * @return group id of the created snapshot group, or null when no file was + * actually snapshotted (no existing files among [filePaths]). */ fun createSnapshot( taskId: String, - subtaskId: String, + subtaskId: String?, filePaths: List - ): String { - for (relPath in filePaths) { - val fullPath = projectRoot.resolve(relPath).normalize() + ): String? { + data class Captured(val relPath: String, val content: String, val hash: String) - // Skip if file doesn't exist (for create operations) - if (!fullPath.exists()) { - continue - } + val captured = filePaths.mapNotNull { relPath -> + val fullPath = projectRoot.resolve(relPath).normalize() + if (!fullPath.exists()) return@mapNotNull null - // Read content val content = try { fullPath.readText(StandardCharsets.UTF_8) } catch (e: Exception) { - // Binary file - read as bytes and decode as latin-1 String(fullPath.readBytes(), charset("ISO-8859-1")) } + Captured(relPath, content, sha256Hash(content)) + } - // Calculate hash - val contentHash = sha256Hash(content) + if (captured.isEmpty()) { + logger.info { + "No files to snapshot (all paths missing): taskId=$taskId, subtaskId=$subtaskId, paths=$filePaths" + } + return null + } - // Create snapshot record (repository handles compression) + val group = snapshotGroupRepository.create(taskId = taskId, subtaskId = subtaskId) + + for (file in captured) { snapshotRepository.create( taskId = taskId, - subtaskId = subtaskId, - filePath = relPath, - content = content, - contentHash = contentHash + groupId = group.id, + filePath = file.relPath, + content = file.content, + contentHash = file.hash ) } - return subtaskId + return group.id } /** - * Get all files in a snapshot. + * Get all files in a snapshot group. * - * @param snapshotId Snapshot ID (subtask_id) + * @param snapshotId Snapshot group id * @return Map of file_path to content */ fun getSnapshot(snapshotId: String): Map { - val snapshots = snapshotRepository.findBySubtaskId(snapshotId) + val snapshots = snapshotRepository.findByGroupId(snapshotId) return snapshots.associate { snapshot -> val content = snapshotRepository.decompressContent(snapshot) @@ -82,14 +87,10 @@ class SnapshotService( } /** - * Get content of specific file from snapshot. - * - * @param snapshotId Snapshot ID - * @param filePath Relative file path - * @return File content or null if not found + * Get content of specific file from snapshot group. */ fun getFileContent(snapshotId: String, filePath: String): String? { - val snapshots = snapshotRepository.findBySubtaskId(snapshotId) + val snapshots = snapshotRepository.findByGroupId(snapshotId) val snapshot = snapshots.find { it.filePath == filePath } ?: return null return snapshotRepository.decompressContent(snapshot) @@ -165,20 +166,16 @@ class SnapshotService( } /** - * List all snapshots for a task, grouped by subtask_id. - * - * @param taskId Task ID - * @return List of snapshot info + * List all snapshots for a task, grouped by snapshot group id. */ fun listSnapshotsForTask(taskId: String): List { val snapshots = snapshotRepository.findByTaskId(taskId) - // Group by subtask_id - val grouped = snapshots.groupBy { it.subtaskId } + val grouped = snapshots.groupBy { it.groupId } - return grouped.map { (subtaskId, snapshotList) -> + return grouped.map { (groupId, snapshotList) -> SnapshotInfo( - snapshotId = subtaskId ?: "unknown", + snapshotId = groupId, createdAt = snapshotList.first().createdAt, files = snapshotList.map { snapshot -> FileInfo( @@ -210,8 +207,7 @@ class SnapshotService( // Delete snapshots var deleted = 0 for (snapshotId in toDelete) { - // Delete all snapshots with this subtask_id - val snapshots = snapshotRepository.findBySubtaskId(snapshotId) + val snapshots = snapshotRepository.findByGroupId(snapshotId) for (snapshot in snapshots) { if (snapshotRepository.delete(snapshot.id)) { deleted++ diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/ToolExecutor.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/ToolExecutor.kt index 3f0b972b..e619020f 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/ToolExecutor.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/ToolExecutor.kt @@ -511,14 +511,17 @@ class ToolExecutor( logger.info { "Creating snapshot before ${toolCall.name}: ${filePaths.size} file(s)" } - // Create snapshot (returns subtaskId as snapshotId) val snapshotId = snapshotService.createSnapshot( taskId = subtask.taskId, subtaskId = subtask.id, filePaths = filePaths ) - // Link snapshot to subtask + if (snapshotId == null) { + logger.info { "No existing files to snapshot for ${toolCall.name} (${filePaths.joinToString()}); skipping link" } + return + } + subtaskRepository.linkSnapshot(subtask.id, snapshotId) logger.info { "Snapshot created and linked: $snapshotId for ${filePaths.joinToString()}" } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/TurnLoopConfig.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/TurnLoopConfig.kt index a0af6dd9..5a8dc79e 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/TurnLoopConfig.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/TurnLoopConfig.kt @@ -16,6 +16,7 @@ import java.time.Duration * @property maxIterations Maximum number of loop iterations * @property warningThreshold When to show iteration warning * @property parallelReadTools Enable parallel execution for READ_ONLY tools + * @property maxParallelReadTools Cap on concurrent READ_ONLY tool execution within a single turn (chunks larger batches) * @property enableSnapshots Enable file snapshots before write operations * @property toolTimeout Timeout for single tool execution * @property enableAutoCompaction Enable automatic conversation compaction @@ -45,6 +46,7 @@ data class TurnLoopConfig( // Tool execution val parallelReadTools: Boolean, + val maxParallelReadTools: Int, val enableSnapshots: Boolean, val toolTimeout: Duration, @@ -91,6 +93,7 @@ data class TurnLoopConfig( maxIterations = 50, warningThreshold = 20, parallelReadTools = true, + maxParallelReadTools = 3, enableSnapshots = false, toolTimeout = Duration.ofSeconds(30), enableAutoCompaction = true, @@ -123,6 +126,7 @@ data class TurnLoopConfig( maxIterations = 100, warningThreshold = 30, parallelReadTools = true, + maxParallelReadTools = 3, enableSnapshots = true, toolTimeout = Duration.ofMinutes(2), enableAutoCompaction = true, diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/analysis/HtmlLanguageAnalyzer.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/analysis/HtmlLanguageAnalyzer.kt index eff50f49..fadcc261 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/analysis/HtmlLanguageAnalyzer.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/analysis/HtmlLanguageAnalyzer.kt @@ -48,7 +48,14 @@ class HtmlLanguageAnalyzer : ExtensionLanguageAnalyzer( scriptSources .mapNotNull { it.takeIf { src -> !src.startsWith("http", ignoreCase = true) && !src.startsWith("//") } } - .mapNotNull { src -> filePath.parent?.resolve(src)?.normalize() } + .mapNotNull { src -> + // Strip query string (?v=10) and fragment (#anchor), reject scheme-qualified + // URLs (javascript:, data:). Wrap in runCatching because Windows rejects + // additional chars (* < > | ") that can appear in generated src attributes. + val clean = src.substringBefore('?').substringBefore('#').trim() + if (clean.isBlank() || clean.contains(':')) return@mapNotNull null + runCatching { filePath.parent?.resolve(clean)?.normalize() }.getOrNull() + } .filter { Files.exists(it) && Files.isRegularFile(it) } .forEach { resolved -> runCatching { diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/context/ContextFormatter.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/context/ContextFormatter.kt index 4190c532..a831960c 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/context/ContextFormatter.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/context/ContextFormatter.kt @@ -11,25 +11,6 @@ private val logger = dualLogger("ContextFormatter") private const val CONVERSATION_SUMMARY_METADATA_TYPE = ConversationContextBuilder.CONVERSATION_SUMMARY_METADATA_TYPE -// RECENT_WORK limits -private const val RECENT_WORK_BUDGET_TIER_1 = 12_000 -private const val RECENT_WORK_BUDGET_TIER_2 = 8_000 -private const val RECENT_WORK_BUDGET_TIER_3 = 5_000 -private const val RECENT_WORK_BUDGET_TIER_4 = 3_500 -private const val RECENT_WORK_BUDGET_TIER_5 = 3_000 -private const val RECENT_WORK_BUDGET_TIER_6 = 2_500 -private const val RECENT_WORK_FULL_LIMIT_TIER_1 = 10 -private const val RECENT_WORK_FULL_LIMIT_TIER_2 = 8 -private const val RECENT_WORK_FULL_LIMIT_TIER_3 = 6 -private const val RECENT_WORK_FULL_LIMIT_TIER_4 = 5 -private const val RECENT_WORK_FULL_LIMIT_DEFAULT = 4 -private const val RECENT_WORK_DETAILED_LIMIT_TIER_1 = 10 -private const val RECENT_WORK_DETAILED_LIMIT_TIER_2 = 8 -private const val RECENT_WORK_DETAILED_LIMIT_TIER_3 = 6 -private const val RECENT_WORK_DETAILED_LIMIT_TIER_4 = 5 -private const val RECENT_WORK_DETAILED_LIMIT_TIER_5 = 4 -private const val RECENT_WORK_DETAILED_LIMIT_DEFAULT = 3 - // CONVERSATION_HISTORY limits // We no longer hard-cap by message count (see buildCompressedConversationSection for // the rationale — Bug 2B). The per-message truncation budget is derived from a soft @@ -40,6 +21,11 @@ private const val CONVERSATION_SOFT_MAX_MESSAGES_FOR_PER_MESSAGE_CAP = 60 /** * Configuration for RECENT_WORK section generation. + * + * `fullDataLimit` is read from [ConfigKeys.RECENT_WORK_FULL_DATA_LIMIT] for backwards + * compatibility but no longer drives the FULL/DETAILED/SUMMARY tier split — the adaptive + * builder tries FULL for every step and only compresses older entries when the token + * budget runs out. The field stays so existing `config.yaml` files keep loading cleanly. */ data class RecentWorkConfig( val fullDataLimit: Int = 2, @@ -367,8 +353,6 @@ class ContextFormatter( val parts = mutableListOf() parts.add("") - val fullDataLimit = calculateFullDataLimit(executedSteps.size, budgetTokens, config.fullDataLimit) - val detailedLimit = calculateDetailedLimit(budgetTokens) val compressionConfig = ToolResultCompressionConfig( detailedMaxChars = config.detailedMaxLength, summaryMaxChars = config.summaryMaxLength @@ -376,8 +360,6 @@ class ContextFormatter( val entries = buildAdaptiveRecentWork( steps = executedSteps, - fullLimit = fullDataLimit, - detailedLimit = detailedLimit, budgetTokens = budgetTokens, config = config, compressionConfig = compressionConfig @@ -818,37 +800,8 @@ class ContextFormatter( // Private helper methods // =========================== - private fun calculateFullDataLimit(stepsCount: Int, budgetTokens: Int, baseLimit: Int): Int { - if (stepsCount <= 0) return 0 - val safeBase = baseLimit.coerceAtLeast(1) - val budgetLimit = when { - budgetTokens >= RECENT_WORK_BUDGET_TIER_1 -> RECENT_WORK_FULL_LIMIT_TIER_1 - budgetTokens >= RECENT_WORK_BUDGET_TIER_2 -> RECENT_WORK_FULL_LIMIT_TIER_2 - budgetTokens >= RECENT_WORK_BUDGET_TIER_3 -> RECENT_WORK_FULL_LIMIT_TIER_3 - budgetTokens >= RECENT_WORK_BUDGET_TIER_4 -> RECENT_WORK_FULL_LIMIT_TIER_4 - else -> RECENT_WORK_FULL_LIMIT_DEFAULT - } - // `baseLimit` (from config) acts as a floor: the user's minimum guaranteed - // number of FULL entries. Budget decides how high we go above it — with a - // 131k-token context window we want the larger tier-based value, not the - // 5-entry config default capping everything. - val effective = maxOf(safeBase, budgetLimit) - return minOf(stepsCount, effective) - } - - private fun calculateDetailedLimit(budgetTokens: Int): Int = when { - budgetTokens >= RECENT_WORK_BUDGET_TIER_1 -> RECENT_WORK_DETAILED_LIMIT_TIER_1 - budgetTokens >= RECENT_WORK_BUDGET_TIER_2 -> RECENT_WORK_DETAILED_LIMIT_TIER_2 - budgetTokens >= RECENT_WORK_BUDGET_TIER_3 -> RECENT_WORK_DETAILED_LIMIT_TIER_3 - budgetTokens >= RECENT_WORK_BUDGET_TIER_4 -> RECENT_WORK_DETAILED_LIMIT_TIER_4 - budgetTokens >= RECENT_WORK_BUDGET_TIER_6 -> RECENT_WORK_DETAILED_LIMIT_TIER_5 - else -> RECENT_WORK_DETAILED_LIMIT_DEFAULT - } - private fun buildAdaptiveRecentWork( steps: List, - fullLimit: Int, - detailedLimit: Int, budgetTokens: Int, config: RecentWorkConfig, compressionConfig: ToolResultCompressionConfig @@ -857,7 +810,6 @@ class ContextFormatter( val entries = mutableListOf>() // (original index, entry) val reversedSteps = steps.asReversed() - val detailedStart = fullLimit + detailedLimit // Strict budget handling: most recent tools first, with graceful fallback. // Steps that don't fit at any compression level are dropped; a one-line @@ -866,13 +818,14 @@ class ContextFormatter( // loop based on what actually ended up in `entries`. var tokensUsed = 0 + // Budget-driven compression: try FULL for every step, let the fallback loop + // demote older entries to DETAILED/SUMMARY only when tokens run out. If the + // section budget has headroom, the agent sees complete tool output — which + // for write tools means the full diff, and for reads means the full file + // content. Preemptive tier-based compression (the prior behavior) wasted + // headroom on sessions with a handful of steps and plenty of budget. val candidates = reversedSteps.mapIndexed { index, step -> - val baseLevel = when { - index < fullLimit -> CompressionLevel.FULL - index < detailedStart -> CompressionLevel.DETAILED - else -> CompressionLevel.SUMMARY - } - Triple(index, step, baseLevel) + Triple(index, step, CompressionLevel.FULL) } // Always keep the latest executed step uncompressed (FULL) for maximum fidelity. diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/context/WorkingMemoryService.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/context/WorkingMemoryService.kt index 51975466..270bc2d1 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/context/WorkingMemoryService.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/context/WorkingMemoryService.kt @@ -1,7 +1,11 @@ package pl.jclab.refio.core.services.context import pl.jclab.refio.core.config.ConfigKeys +import pl.jclab.refio.core.db.Subtask +import pl.jclab.refio.core.db.SubtaskKind +import pl.jclab.refio.core.db.TaskStatus import pl.jclab.refio.core.services.analysis.CodeElements +import pl.jclab.refio.core.utils.GsonInstance import java.time.Instant import java.util.concurrent.ConcurrentHashMap @@ -29,6 +33,100 @@ class WorkingMemoryService( private val entriesByTask = ConcurrentHashMap>() private val entriesBySession = ConcurrentHashMap>() + /** + * True when [taskId] already has in-memory entries — caller can skip a rebuild. + * See [rebuildFromSubtasks] for the rebuild entry point. + */ + fun hasEntries(taskId: String): Boolean { + val entries = entriesByTask[taskId] ?: return false + return entries.isNotEmpty() + } + + /** + * Reconstruct working memory for [taskId] from its persisted [subtasks] history. + * + * Working memory entries live in-memory only (see [entriesByTask]) and are lost + * across process restarts. But the underlying data — tool name, args, output — + * is already durable in the `subtasks` table. This method walks that history + * and re-runs [extractKnowledge] / [recordEntries] as if the tools had executed + * in this process, producing an equivalent section for prompt building. + * + * Idempotency: skips the rebuild when in-memory entries already exist for + * [taskId] so repeated turns don't pay the cost twice. Pass `force = true` to + * override (useful in tests or on cache invalidation). + * + * Fidelity notes: + * - `metadata` from the original [ToolResult] is NOT persisted in subtasks, so + * rebuilt entries use only `args` + `output` via the fallback paths in each + * `buildXxxEntries` helper. Structurally the resulting entries match what + * [recordEntries] produced live — just without the occasional metadata-derived + * field (e.g. `status_code`, `added_lines`). + * - `iteration` is taken from [Subtask.orderIndex]; the working memory's decay + * math (see [effectiveImportance]) is monotonic in iteration so this is a + * correct proxy even if it doesn't match the exact turn number. + * - FAILED subtasks are skipped: their `result` typically holds an + * "Error: ..." envelope that would produce misleading "api_failure" entries + * unrelated to real API failures. + */ + fun rebuildFromSubtasks( + taskId: String, + subtasks: List, + sessionId: String? = null, + force: Boolean = false + ) { + if (!force && hasEntries(taskId)) return + if (subtasks.isEmpty()) return + + val gson = GsonInstance.gson + for (subtask in subtasks) { + if (subtask.status == TaskStatus.FAILED) continue + val toolName = subtaskKindToToolName(subtask.kind) ?: continue + val output = subtask.result ?: continue + if (output.isBlank()) continue + + val args: Map = parseParamsJson(subtask.paramsJson, gson) + + val entries = extractKnowledge( + toolName = toolName, + args = args, + output = output, + iteration = subtask.orderIndex, + metadata = null, + codeElementsProvider = null, + originId = subtask.id + ) + if (entries.isNotEmpty()) { + recordEntries(taskId, entries, sessionId) + } + } + } + + private fun parseParamsJson(paramsJson: String?, gson: com.google.gson.Gson): Map { + if (paramsJson.isNullOrBlank()) return emptyMap() + return try { + @Suppress("UNCHECKED_CAST") + gson.fromJson(paramsJson, Map::class.java) as? Map ?: emptyMap() + } catch (_: Exception) { + emptyMap() + } + } + + /** + * Map a [SubtaskKind] back to the tool name accepted by [extractKnowledge]. + * + * Most kinds are direct lowercase inversions of the enum name (READ_FILE → + * read_file) so they round-trip cleanly. The synthetic kinds PLAN_STEP, + * PROJECT_ANALYSIS and KNOWLEDGE_BASE don't correspond to tool-call executions + * and are intentionally excluded: they would land in the generic fallback and + * pollute working memory with empty "tool_activity" rows. + */ + private fun subtaskKindToToolName(kind: SubtaskKind): String? = when (kind) { + SubtaskKind.PLAN_STEP, + SubtaskKind.PROJECT_ANALYSIS, + SubtaskKind.KNOWLEDGE_BASE -> null + else -> kind.name.lowercase() + } + fun recordEntries(taskId: String, entries: List, sessionId: String? = null) { if (entries.isEmpty()) return val taskEntries = entriesByTask.computeIfAbsent(taskId) { ConcurrentHashMap() } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/turn/NativeToolsPrompt.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/NativeToolsPrompt.kt new file mode 100644 index 00000000..e9b34be6 --- /dev/null +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/NativeToolsPrompt.kt @@ -0,0 +1,36 @@ +package pl.jclab.refio.core.services.turn + +/** + * Override text for {{tool_descriptions}} when native function-calling is active. + * + * The model receives actual tool schemas via the API `tools` parameter instead of + * the verbose text list. This replaces only the section in the + * system-plan.md / system-agent.md templates; all other guidance stays intact. + */ +fun nativeToolsDescriptionOverride(): String = + "Tools are provided via the native function-calling API — their schemas are attached " + + "to this request. Invoke them via the standard tool_use mechanism (native tool_calls). " + + "Do NOT wrap tool calls inside a JSON envelope in your text response; " + + "use ONLY the native tool_calls channel." + +internal fun buildIterationInfoString(current: Int, max: Int, @Suppress("UNUSED_PARAMETER") writeToolsExecutedInTurn: Int = 0): String { + val remaining = max - current + + val warning = when { + remaining <= 3 -> "⚠️ CRITICAL: Only $remaining iterations left! Prioritize essential actions and prepare to conclude." + remaining <= 7 -> "⚠️ WARNING: $remaining iterations remaining. Plan efficiently and focus on core objectives." + remaining <= 12 -> "Note: $remaining iterations remaining. Consider pacing your tool usage." + else -> "" + } + + return if (warning.isNotEmpty()) { + """ + +Current iteration: $current / $max +${warning} + + """.trimIndent() + } else { + "" + } +} diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/turn/ToolCallParser.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/ToolCallParser.kt index 810202c2..09a1f4f2 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/turn/ToolCallParser.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/ToolCallParser.kt @@ -46,12 +46,21 @@ class ToolCallParser( /** * Main entry point - parses LLM content into ToolCallData list. + * + * No cheap pre-guard here: some models (e.g. glm-4.7-flash) emit a prose preamble + * BEFORE the JSON envelope. A `startsWith("{")` check would incorrectly bail out + * and force the loop into plain-text finalization while `extractJsonWithStrategies` + * (Strategy 2 brace-matching) would have found the envelope just fine. The prior + * WARN noise this guard was meant to silence has already been downgraded to DEBUG + * at the end of `extractJsonWithStrategies`. */ fun extractToolCalls( content: String, mode: TaskMode, profileOverrides: TurnProfileOverrides? = null ): List { + if (content.isBlank()) return emptyList() + // Try JSON format first (preferred) val jsonToolCalls = extractToolCallsFromJson(content, mode) if (jsonToolCalls.isNotEmpty()) { @@ -59,6 +68,14 @@ class ToolCallParser( return filterToolCallsByProfile(jsonToolCalls, profileOverrides) } + // Models without native tool_calls support are routed to JSON-mode via the + // `response-contract-json` system prompt. We do NOT recover tool calls from + // XML pseudo-tags any more — that path encouraged garbage output formats and + // hid the real failure (a registry entry whose `supportsFunctionCalling` flag + // is wrong). If a model emits XML, fix the model definition to flip + // `supportsFunctionCalling = false`; the harness will then send it the JSON + // contract instead. + // Fallback to legacy TOOL_CALL format val legacyToolCalls = extractToolCallsLegacy(content) if (legacyToolCalls.isNotEmpty()) { @@ -286,11 +303,32 @@ class ToolCallParser( val stepsElement = jsonElement["steps"] as? JsonArray val subtasksElement = jsonElement["subtasks"] as? JsonArray + // Some models without proper native tool_calls (GLM, MiniMax) emit a + // bare OpenAI-tool-call object inline in markdown: + // {"name": "read_file", "arguments": {"path": "x"}} + // No envelope, no wrapper array. Treat that shape as a single-element + // actions array if `name` matches a registered tool. + val bareToolName = (jsonElement["name"] as? JsonPrimitive)?.content + val isBareToolCall = bareToolName != null + && actionsElement == null + && toolCallsElement == null + && stepsElement == null + && subtasksElement == null + && toolRegistry.hasTool(bareToolName) + val toolCalls = when { actionsElement != null -> extractToolCallsFromActionsArray(actionsElement) toolCallsElement != null -> extractToolCallsFromActionsArray(toolCallsElement) stepsElement != null -> extractToolCallsFromActionsArray(stepsElement) subtasksElement != null -> extractToolCallsFromSubtasksArray(subtasksElement) + isBareToolCall -> { + logger.warn { + "[TOOL_CALL_JSON] Recovering bare tool-call object (no envelope) for '$bareToolName' — " + + "model is not following the JSON contract; consider flipping " + + "supportsFunctionCalling=false in ModelDefinitions." + } + extractToolCallsFromActionsArray(JsonArray(listOf(jsonElement))) + } else -> emptyList() } @@ -559,7 +597,10 @@ class ToolCallParser( logger.debug { "[EXTRACT_JSON] Strategy 5 (repair) failed: ${e.message}" } } - logger.warn { "[EXTRACT_JSON] All strategies failed, content preview: ${trimmed.take(200)}..." } + // Downgraded from WARN to DEBUG: the pre-guard in extractToolCalls() already filters + // obvious prose, so reaching this point means the content *looked* JSON-ish but + // couldn't be parsed by any of the 5 strategies. That's noise, not an operator alert. + logger.debug { "[EXTRACT_JSON] All strategies failed, content preview: ${trimmed.take(200)}..." } return null } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TrivialTaskDetector.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TrivialTaskDetector.kt new file mode 100644 index 00000000..81c5fba2 --- /dev/null +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TrivialTaskDetector.kt @@ -0,0 +1,123 @@ +package pl.jclab.refio.core.services.turn + +import pl.jclab.refio.core.tools.base.ToolSchema +import pl.jclab.refio.core.logging.dualLogger + +private val logger = dualLogger("TrivialTaskDetector") + +/** + * Cheap heuristic guard for the "create one file with explicit name" prompt shape. + * + * The system prompt has a STEP 4 rule "NO EXPLORATION FOR TRIVIAL TASKS" telling the + * model to skip file_search / read_directory / read_file when the user already gave + * a clear filename and content brief. Some models ignore that rule and burn 3+ + * iterations on pre-flight reads (gpt-5.4-mini in session 10, glm-* in PLAN mode). + * + * This detector enforces the rule at the harness level: when the user prompt + * looks like a one-shot "create file X.html with Y" request, we restrict the + * native tool schemas advertised on iteration 1 to write tools only. The model + * then physically cannot call read_file/grep_search/etc., so it goes straight + * to advance_code_editing or create_new_file. + * + * Iterations 2+ still get the full tool set, in case the write tool failed and + * the model genuinely needs to read something. + * + * Detection is intentionally conservative — false negatives are fine (no harm, + * just no enforcement), but false positives would break legitimate exploration. + */ +object TrivialTaskDetector { + + /** + * Tools the harness allows on iteration 1 of a detected trivial create-file task. + */ + private val WRITE_TOOLS = setOf( + "advance_code_editing", + "create_new_file", + "code_editing", + "multi_edit", + "multi_line_editor", + ) + + /** + * Imperative verbs that mark a task as a write-this-file request rather than + * an analysis or question. We require both a verb AND an explicit filename. + */ + private val CREATE_VERBS = listOf( + "create", "write", "make", "generate", "build", "produce", + "stwórz", "stworz", "napisz", "wygeneruj", "zrób", "zrob" + ) + + /** + * Matches a bare filename anywhere in the prompt: `name.ext` where ext is one + * of common code/markup extensions and name has no spaces. Backticks/quotes + * around the filename are accepted. + */ + private val FILENAME_PATTERN = Regex( + """[`'"]?([A-Za-z0-9_./-]+\.(?:html?|md|json|ya?ml|txt|css|js|mjs|ts|tsx|jsx|kt|kts|py|rs|go|java|sh|toml|xml|svg|csv))[`'"]?""", + RegexOption.IGNORE_CASE + ) + + /** + * Returns true if the prompt looks like "create exactly one file named X with Y". + * + * Conditions (all must hold): + * 1. Contains at least one create-verb + * 2. Mentions exactly ONE filename (multiple filenames suggest multi-file work) + * 3. Prompt body length under 2000 chars (longer prompts are usually multi-step) + */ + fun isSingleFileCreateTask(userInput: String): DetectionResult { + val normalized = userInput.trim() + if (normalized.isEmpty() || normalized.length > 2000) { + return DetectionResult.NotTrivial + } + val lower = normalized.lowercase() + val verbHit = CREATE_VERBS.firstOrNull { lower.contains(it) } + ?: return DetectionResult.NotTrivial + + val filenames = FILENAME_PATTERN.findAll(normalized) + .map { it.groupValues[1] } + .toSet() + if (filenames.size != 1) { + return DetectionResult.NotTrivial + } + val filename = filenames.first() + return DetectionResult.SingleFileCreate(verb = verbHit, filename = filename) + } + + /** + * Filter native tool schemas down to write-only set when the prompt is a + * single-file create. Returns the original list when nothing matches so + * callers can chain unconditionally. + */ + fun maybeRestrictForIteration1( + schemas: List, + userInput: String?, + iteration: Int, + modeName: String + ): List { + if (iteration != 1 || userInput == null) return schemas + val detection = isSingleFileCreateTask(userInput) + if (detection !is DetectionResult.SingleFileCreate) return schemas + + val restricted = schemas.filter { it.name in WRITE_TOOLS } + if (restricted.isEmpty()) { + // Mode permissions stripped all write tools (e.g. PLAN mode is read-only). + // Trivial-task hint doesn't apply in read-only modes; fall back to full set. + return schemas + } + if (restricted.size == schemas.size) return schemas + + logger.info { + "[TRIVIAL_TASK_GUARD] Detected single-file create task " + + "(verb='${detection.verb}', file='${detection.filename}', mode=$modeName) — " + + "restricting iteration 1 tools: ${schemas.size} → ${restricted.size} " + + "(allowed=${restricted.joinToString(",") { it.name }})" + } + return restricted + } + + sealed interface DetectionResult { + object NotTrivial : DetectionResult + data class SingleFileCreate(val verb: String, val filename: String) : DetectionResult + } +} diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnLLMCaller.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnLLMCaller.kt index ff0ec31b..978abd98 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnLLMCaller.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnLLMCaller.kt @@ -9,9 +9,11 @@ import pl.jclab.refio.core.db.TaskMode import pl.jclab.refio.core.llm.LLMClient import pl.jclab.refio.core.llm.LLMMessage import pl.jclab.refio.core.llm.LLMResponse +import pl.jclab.refio.core.llm.ModelDefinitions import pl.jclab.refio.core.config.ConfigKeys import pl.jclab.refio.core.services.ConfigService import pl.jclab.refio.core.services.TurnLoopConfig +import pl.jclab.refio.core.tools.base.ToolSchema import pl.jclab.refio.core.logging.dualLogger private val logger = dualLogger("TurnLLMCaller") @@ -47,7 +49,8 @@ class TurnLLMCaller( streamCallback: StreamCallback? = null, model: String? = null, provider: String? = null, - profileOverrides: TurnProfileOverrides? = null + profileOverrides: TurnProfileOverrides? = null, + nativeToolSchemas: List? = null ): LLMResponse { val (modelId, providerName) = resolveModelSelection( mode = mode, @@ -59,8 +62,10 @@ class TurnLLMCaller( logger.info { "[CALL_LLM] Final model selection: $providerName/$modelId" } - val responseFormat = resolveResponseFormat(mode, providerName) - val thinkingEnabled = configService.getTyped(ConfigKeys.GENERAL_THINKING_ENABLED, taskId) + val nativeToolsActive = !nativeToolSchemas.isNullOrEmpty() + val responseFormat = if (nativeToolsActive) null else resolveResponseFormat(mode, providerName) + val thinkingRequested = configService.getTyped(ConfigKeys.GENERAL_THINKING_ENABLED, taskId) + val thinkingEnabled = resolveThinkingEnabled(providerName, modelId, thinkingRequested) val noEgressEnabled = configService.getTyped(ConfigKeys.GENERAL_NO_EGRESS_ENABLED, taskId) val reasoningEffortOverride = profileOverrides?.reasoningEffort if (reasoningEffortOverride != null) { @@ -70,6 +75,13 @@ class TurnLLMCaller( } } + val extraKwargs: Map = if (nativeToolsActive) { + logger.info { "[NATIVE_TOOLS] Requesting: model=$modelId, tools=${nativeToolSchemas!!.size}, mode=$mode (response_format suppressed)" } + mapOf("native_tools" to nativeToolSchemas!!) + } else { + emptyMap() + } + return withContext(Dispatchers.IO) { llmClient.complete( provider = providerName, @@ -83,7 +95,8 @@ class TurnLLMCaller( reasoningEffort = reasoningEffortOverride, noEgressEnabled = noEgressEnabled, stream = streamCallback != null, - onChunk = streamCallback + onChunk = streamCallback, + kwargs = extraKwargs ) } } @@ -132,6 +145,29 @@ class TurnLLMCaller( return ModelSelection(configModel.first, configModel.second) } + /** + * Gate the user's `thinking` request against the resolved model's capability. + * + * Some models advertised by upstream APIs (notably non-reasoning OpenAI Chat Completions + * variants) reject the `thinking` parameter with HTTP 400 `Unknown parameter: 'thinking'`. + * We treat [ModelDefinition.supportsThinking] as authoritative: when the registry knows + * the model and says it can't think, we silently force `thinking = false`. + * + * Unknown models (no registry entry) pass through unchanged so locally-installed Ollama + * / LM Studio / generic-OpenAI models keep working without per-id whitelisting. + */ + fun resolveThinkingEnabled(provider: String, model: String, requested: Boolean): Boolean { + if (!requested) return false + val definition = ModelDefinitions.getDefinition(provider, model) + if (definition != null && !definition.supportsThinking) { + logger.info { + "[CALL_LLM] Disabling thinking for $provider/$model — model definition declares supportsThinking=false" + } + return false + } + return true + } + fun resolveResponseFormat(mode: TaskMode, provider: String?): Map? { val isLocalProvider = provider.equals("ollama", ignoreCase = true) || provider.equals("lmstudio", ignoreCase = true) diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnPromptBuilder.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnPromptBuilder.kt index 974a0fcd..9fbf0e1a 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnPromptBuilder.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnPromptBuilder.kt @@ -88,7 +88,8 @@ class TurnPromptBuilder( userContextRefs: List, runProfile: TurnRunProfile, profileOverrides: TurnProfileOverrides?, - writeToolsExecutedInTurn: Int = 0 + writeToolsExecutedInTurn: Int = 0, + nativeToolsActive: Boolean = false ): TurnPrompt { // Build system prompt based on mode/profile val baseSystemPrompt = resolveSystemPrompt( @@ -98,7 +99,8 @@ class TurnPromptBuilder( maxIterations = maxIterations, runProfile = runProfile, profileOverrides = profileOverrides, - writeToolsExecutedInTurn = writeToolsExecutedInTurn + writeToolsExecutedInTurn = writeToolsExecutedInTurn, + nativeToolsActive = nativeToolsActive ) // Resolve context profile for subagents @@ -352,15 +354,24 @@ $filteredContextPrompt fun buildPlanSystemPrompt( mode: TaskMode, taskId: String, - toolDescriptionsOverride: String? = null + toolDescriptionsOverride: String? = null, + nativeToolsActive: Boolean = false, + profileOverrides: TurnProfileOverrides? = null ): String { // Auto-detect compact mode based on model context size syncCompactMode(mode, taskId) + val responseContract = resolveResponseContract(nativeToolsActive) + val multiAgentSection = resolveMultiAgentSection(mode, taskId, profileOverrides) + if (toolDescriptionsOverride != null) { return promptsService.getSystemPrompt( type = PromptType.SYSTEM_PLAN, - variables = mapOf("tool_descriptions" to toolDescriptionsOverride) + variables = mapOf( + "tool_descriptions" to toolDescriptionsOverride, + "response_contract" to responseContract, + "multi_agent_section" to multiAgentSection + ) ) } @@ -371,7 +382,11 @@ $filteredContextPrompt logger.info { "[PLAN_PROMPT] Using ${if (cached.fromCache) "CACHED" else "NEW"} prompt (~${cached.tokenEstimate} tokens)" } return promptsService.getSystemPrompt( type = PromptType.SYSTEM_PLAN, - variables = mapOf("tool_descriptions" to cached.toolDescriptions) + variables = mapOf( + "tool_descriptions" to cached.toolDescriptions, + "response_contract" to responseContract, + "multi_agent_section" to multiAgentSection + ) ) } } @@ -390,11 +405,64 @@ $filteredContextPrompt type = PromptType.SYSTEM_PLAN, variables = mapOf( "tool_descriptions" to toolDescriptions, - "tool_selection_matrix" to toolSelectionMatrix + "tool_selection_matrix" to toolSelectionMatrix, + "response_contract" to responseContract, + "multi_agent_section" to multiAgentSection ) ) } + /** + * Resolve which response-contract fragment to inject based on active tool-calling mode. + * Native path gets a terse "use native tool_calls, reply in prose" contract; + * JSON-in-text path gets the full envelope + examples. + */ + private fun resolveResponseContract(nativeToolsActive: Boolean): String { + val name = if (nativeToolsActive) "response-contract-native" else "response-contract-json" + return promptsService.getFragment(name) + } + + /** + * Resolve the section. Returns the full delegation-guidance block when the + * caller can actually invoke subagents — empty string otherwise. + * + * Rationale: subagents (and profiles without `invoke_subagent`) don't need ~2500 tokens of + * "when to delegate" rules — they can't delegate in the first place. Same for subagents + * at depth>=1 where deeper delegation is discouraged. + * + * @param mode TaskMode (PLAN vs AGENT — different fragment files with slightly different tone) + * @param taskId task id for mode-level tool resolution + * @param profileOverrides subagent profile overrides (null for main run) + */ + fun resolveMultiAgentSection( + mode: TaskMode, + taskId: String, + profileOverrides: TurnProfileOverrides? + ): String { + if (!hasInvokeSubagentAvailable(mode, taskId, profileOverrides)) { + return "" + } + val fragmentName = when (mode) { + TaskMode.PLAN -> "multi-agent-plan" + TaskMode.AGENT -> "multi-agent-agent" + TaskMode.CHAT -> return "" + } + return promptsService.getFragment(fragmentName) + } + + private fun hasInvokeSubagentAvailable( + mode: TaskMode, + taskId: String, + profileOverrides: TurnProfileOverrides? + ): Boolean { + val tools = if (profileOverrides != null) { + resolveToolsForProfile(mode, taskId, profileOverrides) + } else { + toolDescriptionBuilder.getToolsForMode(mode, taskId) + } + return tools.any { it.name.equals("invoke_subagent", ignoreCase = true) } + } + /** * System prompt for AGENT mode (all tools). */ @@ -404,7 +472,9 @@ $filteredContextPrompt currentIteration: Int = 0, maxIterations: Int, toolDescriptionsOverride: String? = null, - writeToolsExecutedInTurn: Int = 0 + writeToolsExecutedInTurn: Int = 0, + nativeToolsActive: Boolean = false, + profileOverrides: TurnProfileOverrides? = null ): String { // Auto-detect compact mode based on model context size syncCompactMode(mode, taskId) @@ -431,7 +501,9 @@ $filteredContextPrompt type = PromptType.SYSTEM_AGENT, variables = mapOf( "tool_descriptions" to toolDescriptions, - "tool_selection_matrix" to toolSelectionMatrix + "tool_selection_matrix" to toolSelectionMatrix, + "response_contract" to resolveResponseContract(nativeToolsActive), + "multi_agent_section" to resolveMultiAgentSection(mode, taskId, profileOverrides) ) ) @@ -456,10 +528,20 @@ $iterationInfo currentIteration: Int, maxIterations: Int, toolDescriptions: String, - writeToolsExecutedInTurn: Int = 0 + writeToolsExecutedInTurn: Int = 0, + nativeToolsActive: Boolean = false ): String { val basePrompt = overrides.systemPromptOverride - ?: buildAgentSystemPrompt(mode, taskId, currentIteration, maxIterations, toolDescriptions, writeToolsExecutedInTurn) + ?: buildAgentSystemPrompt( + mode = mode, + taskId = taskId, + currentIteration = currentIteration, + maxIterations = maxIterations, + toolDescriptionsOverride = toolDescriptions, + writeToolsExecutedInTurn = writeToolsExecutedInTurn, + nativeToolsActive = nativeToolsActive, + profileOverrides = overrides + ) val iterationInfo = buildIterationInfo(currentIteration, maxIterations, writeToolsExecutedInTurn) return buildString { @@ -472,13 +554,21 @@ $iterationInfo } appendLine() appendLine("") - appendLine("When using tools, respond ONLY with JSON object:") - appendLine("""{"actions":[{"tool":"exact_tool_name","arguments":{"param":"value"}}],"response":"","intent":"implementation|analysis|response"}""") - appendLine("Use only tool names from and exact parameter names from schemas.") - appendLine("""Optional field: "thinking":"short reasoning" when it adds value.""") - appendLine("""Never wrap calls in helper tools like {"tool":"run", ...}.""") - appendLine("""Never nest tool payloads like {"tool":"some_tool","arguments":{"tool":"other","arguments":{...}}}.""") - appendLine("""If no tools are needed, respond with {"actions":[],"response":"your answer","intent":"response"}.""") + if (nativeToolsActive) { + appendLine("Native function-calling is active for this subagent.") + appendLine("Invoke tools through the provider's native tool_call / tool_use channel.") + appendLine("Do not emit a JSON envelope in text content.") + appendLine("Use only tool names from and exact parameter names from attached schemas.") + appendLine("When finished, respond with plain text only.") + } else { + appendLine("When using tools, respond ONLY with JSON object:") + appendLine("""{"actions":[{"tool":"exact_tool_name","arguments":{"param":"value"}}],"response":"","intent":"implementation|analysis|response"}""") + appendLine("Use only tool names from and exact parameter names from schemas.") + appendLine("""Optional field: "thinking":"short reasoning" when it adds value.""") + appendLine("""Never wrap calls in helper tools like {"tool":"run", ...}.""") + appendLine("""Never nest tool payloads like {"tool":"some_tool","arguments":{"tool":"other","arguments":{...}}}.""") + appendLine("""If no tools are needed, respond with {"actions":[],"response":"your answer","intent":"response"}.""") + } appendLine("") if (iterationInfo.isNotBlank()) { appendLine() @@ -497,14 +587,50 @@ $iterationInfo maxIterations: Int, runProfile: TurnRunProfile, profileOverrides: TurnProfileOverrides?, - writeToolsExecutedInTurn: Int = 0 + writeToolsExecutedInTurn: Int = 0, + nativeToolsActive: Boolean = false ): String { // Sync compact mode early — before resolving tool descriptions for any profile. // Without this, subagent tool descriptions are built with compactMode=false // even on small-context models (e.g. Ollama 32k), wasting ~1.5k tokens. syncCompactMode(mode, taskId) - val toolDescriptionsOverride = resolveToolDescriptionsForProfile(mode, taskId, profileOverrides) + // Native function-calling: reuse the .md templates but substitute {{tool_descriptions}} + // with a short note (schemas are attached via the API `tools` parameter) and select the + // native response-contract fragment. All other guidance stays intact. + if (nativeToolsActive && runProfile != TurnRunProfile.SUBAGENT) { + val nativeNote = nativeToolsDescriptionOverride() + return when (mode) { + TaskMode.PLAN -> buildPlanSystemPrompt( + mode = mode, + taskId = taskId, + toolDescriptionsOverride = nativeNote, + nativeToolsActive = true, + profileOverrides = profileOverrides + ) + TaskMode.AGENT -> buildAgentSystemPrompt( + mode = mode, + taskId = taskId, + currentIteration = currentIteration, + maxIterations = maxIterations, + toolDescriptionsOverride = nativeNote, + writeToolsExecutedInTurn = writeToolsExecutedInTurn, + nativeToolsActive = true, + profileOverrides = profileOverrides + ) + TaskMode.CHAT -> buildChatSystemPrompt() + } + } + + val toolDescriptionsOverride = if (nativeToolsActive) { + // Subagent path with native tools: still goes through buildSubagentSystemPrompt, which + // produces a block — replace the tool list with a short note. + "Tools are available via the native function-calling API. " + + "Call tools using the standard tool_use mechanism. " + + "When you have completed the task, respond with plain text — no JSON envelope." + } else { + resolveToolDescriptionsForProfile(mode, taskId, profileOverrides) + } if (runProfile == TurnRunProfile.SUBAGENT && profileOverrides?.systemPromptOverride != null) { return buildSubagentSystemPrompt( @@ -514,20 +640,29 @@ $iterationInfo currentIteration = currentIteration, maxIterations = maxIterations, toolDescriptions = toolDescriptionsOverride.orEmpty(), - writeToolsExecutedInTurn = writeToolsExecutedInTurn + writeToolsExecutedInTurn = writeToolsExecutedInTurn, + nativeToolsActive = nativeToolsActive ) } return when (mode) { TaskMode.CHAT -> buildChatSystemPrompt() - TaskMode.PLAN -> buildPlanSystemPrompt(mode, taskId, toolDescriptionsOverride) + TaskMode.PLAN -> buildPlanSystemPrompt( + mode = mode, + taskId = taskId, + toolDescriptionsOverride = toolDescriptionsOverride, + nativeToolsActive = false, + profileOverrides = profileOverrides + ) TaskMode.AGENT -> buildAgentSystemPrompt( mode = mode, taskId = taskId, currentIteration = currentIteration, maxIterations = maxIterations, toolDescriptionsOverride = toolDescriptionsOverride, - writeToolsExecutedInTurn = writeToolsExecutedInTurn + writeToolsExecutedInTurn = writeToolsExecutedInTurn, + nativeToolsActive = false, + profileOverrides = profileOverrides ) } } @@ -596,6 +731,25 @@ ${warning} return baseTools.filter { isToolAllowedByProfile(it.name, profileOverrides) } } + /** + * Filter native tool schemas by subagent profile (allowedTools / disallowedTools). + * + * Without this, the model receives all mode-permitted schemas via the native + * function-calling API and will happily emit tool_calls the harness then rejects + * (e.g. api-documenter calling `think` when the definition lists only file ops). + * The filtered list must match what the subagent's system prompt says is available. + */ + fun filterNativeToolSchemasByProfile( + schemas: List, + profileOverrides: TurnProfileOverrides? + ): List { + if (profileOverrides == null) return schemas + if (profileOverrides.allowedTools.isNullOrEmpty() && profileOverrides.disallowedTools.isNullOrEmpty()) { + return schemas + } + return schemas.filter { isToolAllowedByProfile(it.name, profileOverrides) } + } + private fun isToolAllowedByProfile(toolName: String, profileOverrides: TurnProfileOverrides): Boolean { val normalizedName = toolName.lowercase() val allowed = profileOverrides.allowedTools?.map { it.lowercase() }?.toSet() diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnResponseProcessor.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnResponseProcessor.kt index 644a1d8d..ce1196da 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnResponseProcessor.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnResponseProcessor.kt @@ -1,6 +1,7 @@ package pl.jclab.refio.core.services.turn import kotlinx.serialization.json.Json +import pl.jclab.refio.core.api.TurnRunProfile import pl.jclab.refio.core.db.ExecutionMode import pl.jclab.refio.core.db.TaskMode import pl.jclab.refio.core.db.TaskStatus @@ -67,9 +68,28 @@ class TurnResponseProcessor( taskId: String, mode: TaskMode, executionMode: ExecutionMode, - llmResponse: LLMResponse + llmResponse: LLMResponse, + runProfile: TurnRunProfile = TurnRunProfile.DEFAULT ): Int { if (mode != TaskMode.PLAN) return 0 + // Subagents must never leave orphan PLANNED subtasks. A subagent final turn + // that happens to emit a {response, actions} envelope as prose (qwen3.5 failure + // mode, sesja 2) would otherwise create a PLANNED subtask that never executes + // — the subagent loop already ended. Silently skip. + if (runProfile == TurnRunProfile.SUBAGENT) { + logger.debug { "[PLAN] Skipping PLANNED subtask creation for SUBAGENT run profile (no orphans)" } + return 0 + } + + // Quick guard: on the native function-calling path the final assistant turn is + // typically plain prose (markdown summary), not a JSON envelope. Running JsonExtractor + // on prose wastes cycles and (more annoyingly) logs ERROR-level "All extraction + // strategies failed" noise. Only attempt extraction when content visibly starts + // with `{` or a fenced block. + val trimmedContent = llmResponse.content.trim() + if (!trimmedContent.startsWith("{") && !trimmedContent.startsWith("```") && !trimmedContent.startsWith("[")) { + return 0 + } val planData = try { JsonExtractor.extractAndParsePlanningResponse(llmResponse.content) diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnToolExecutor.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnToolExecutor.kt index 6d6e695e..c56b48c5 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnToolExecutor.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/TurnToolExecutor.kt @@ -65,6 +65,62 @@ class TurnToolExecutor( name.equals("invoke_subagent", ignoreCase = true) || name.equals("delegate_to_strong_model", ignoreCase = true) + /** + * Detect whether the current tool call repeats a recent one with identical + * arguments. Emits an in-band nudge that ships WITH the tool result so the + * agent sees the warning on the next turn without a separate system message. + * + * Strategy: compare against the previous 3 SUCCESS subtasks of the same task. + * We use exact string equality on `paramsJson`, which is safe because both + * the current call and the stored subtask serialise arguments through the + * same Gson instance — so identical args produce identical JSON. + * + * Returns null when no repeat is detected; the caller drops the nudge from + * `outputWithWarnings` in that case. + */ + private fun buildRepeatedCallNudge( + taskId: String, + currentSubtaskId: String, + toolName: String, + argumentsJson: String + ): String? { + // `think`, `memory` and similar reflective tools are expected to be called + // repeatedly with similar args. Nudging on those would be noise. + val noisyTools = setOf("think", "memory", "tasks") + if (toolName in noisyTools) return null + if (argumentsJson.isBlank()) return null + + val recent = try { + transaction { subtaskRepository.findByTaskId(taskId) } + .asReversed() + .asSequence() + .filter { it.id != currentSubtaskId } + .filter { it.status == TaskStatus.SUCCESS } + .take(3) + .toList() + } catch (e: Exception) { + logger.debug { "[REPEATED_CALL_NUDGE] query failed, skipping: ${e.message}" } + return null + } + + val match = recent.firstOrNull { + it.kind.name.equals(toolName, ignoreCase = true) && + it.paramsJson == argumentsJson + } ?: return null + + logger.info { + "[REPEATED_CALL] tool=$toolName taskId=$taskId currentSubtask=$currentSubtaskId " + + "matchesPriorSubtask=${match.id} orderIndex=${match.orderIndex}" + } + + return buildString { + appendLine("[⚠ progressive hint — possible loop]") + appendLine("You just ran `$toolName` with arguments identical to a prior successful call in this task (subtask ${match.id}).") + appendLine("If the previous result did not advance the task, repeating it will not help either.") + appendLine("Options: (1) change the arguments materially, (2) try a different tool — see for alternatives, (3) if stuck on the same problem for 3+ attempts, call `delegate_to_strong_model` with a concrete summary of what failed, or (4) ask the user with `ask_user` when the goal itself is ambiguous.") + }.trim() + } + companion object { /** Max raw output size (chars) to preserve in-context for DATA_PRODUCING tools */ const val DATA_PRODUCING_RAW_OUTPUT_BUFFER = 16_000 @@ -206,30 +262,44 @@ class TurnToolExecutor( } if (readOnlyIndexed.isNotEmpty() && writeIndexed.isEmpty()) { - logger.info { - "[PARALLEL] Executing ${readOnlyIndexed.size} READ_ONLY in parallel (no WRITE tools in batch)" + // Cap concurrency: weak local models occasionally batch 7+ reads in one turn + // (gpt-5.4-mini in particular). Running them all in parallel doesn't help — + // the bottleneck is the next LLM call, not the reads. Chunk into windows of + // `maxParallelReadTools` so we keep parallelism gains without unbounded fan-out. + val cap = config.maxParallelReadTools.coerceAtLeast(1) + if (readOnlyIndexed.size > cap) { + logger.warn { + "[PARALLEL_CAPPED] ${readOnlyIndexed.size} READ_ONLY tools in batch — " + + "capping concurrency at $cap (chunked execution)" + } + } else { + logger.info { + "[PARALLEL] Executing ${readOnlyIndexed.size} READ_ONLY in parallel (no WRITE tools in batch)" + } } - val readOnlyResults = readOnlyIndexed.map { (originalIndex, toolCall) -> - async { - val subtaskId = subtaskIds[toolCall.id]!! - val result = executeSingleTool( - taskId = taskId, - toolCall = toolCall, - subtaskId = subtaskId, - listener = listener, - iteration = iteration, - _config = config, - mode = mode, - executionMode = executionMode, - runId = runId, - depth = depth, - profileOverrides = profileOverrides, - _subtaskIds = subtaskIds - ) - originalIndex to (toolCall to result) - } - }.awaitAll() + val readOnlyResults = readOnlyIndexed.chunked(cap).flatMap { chunk -> + chunk.map { (originalIndex, toolCall) -> + async { + val subtaskId = subtaskIds[toolCall.id]!! + val result = executeSingleTool( + taskId = taskId, + toolCall = toolCall, + subtaskId = subtaskId, + listener = listener, + iteration = iteration, + _config = config, + mode = mode, + executionMode = executionMode, + runId = runId, + depth = depth, + profileOverrides = profileOverrides, + _subtaskIds = subtaskIds + ) + originalIndex to (toolCall to result) + } + }.awaitAll() + } val writeResults = writeIndexed.map { (originalIndex, toolCall) -> if (GlobalMetrics.isCancelled()) { @@ -242,7 +312,10 @@ class TurnToolExecutor( val params = TurnJsonUtils.parseJsonToMap(toolCall.arguments) val path = params["path"]?.toString() if (path != null) { - snapshotService.createSnapshot(taskId, subtaskId, listOf(path)) + val snapshotId = snapshotService.createSnapshot(taskId, subtaskId, listOf(path)) + if (snapshotId != null) { + subtaskRepository.linkSnapshot(subtaskId, snapshotId) + } } } catch (e: Exception) { logger.warn(e) { "[SNAPSHOT] Failed to create snapshot for ${toolCall.name}" } @@ -492,7 +565,22 @@ class TurnToolExecutor( ?.takeIf { it.isNotEmpty() } ?.joinToString(prefix = "[next-step hints]\n- ", separator = "\n- ") - val outputWithWarnings = listOfNotNull(rawOutput, hintsBlock).joinToString("\n\n") + // Progressive nudge: if the agent just ran an identical tool call (same + // name + arguments) that also succeeded, it's burning turns on a loop — + // common failure mode for small local models (qwen/gemma) which keep + // retrying `advance_code_editing` with the same description hoping for + // different output. Detect by comparing against the previous subtask's + // paramsJson; nudge the agent toward a different approach in-band so it + // arrives with the tool result rather than via a round-trip. + val repeatedCallNudge = buildRepeatedCallNudge( + taskId = taskId, + currentSubtaskId = subtaskId, + toolName = toolCall.name, + argumentsJson = toolCall.arguments + ) + + val outputWithWarnings = listOfNotNull(rawOutput, hintsBlock, repeatedCallNudge) + .joinToString("\n\n") val isInvokeSubagent = toolCall.name.equals("invoke_subagent", ignoreCase = true) val isDelegateStrong = toolCall.name.equals("delegate_to_strong_model", ignoreCase = true) val isMemoryGetSubtaskOutput = toolCall.name.equals("memory", ignoreCase = true) diff --git a/core/src/main/kotlin/pl/jclab/refio/core/services/turn/providers/SystemEnvironmentPromptProvider.kt b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/providers/SystemEnvironmentPromptProvider.kt index e7b75d01..87565a2b 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/services/turn/providers/SystemEnvironmentPromptProvider.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/services/turn/providers/SystemEnvironmentPromptProvider.kt @@ -40,48 +40,36 @@ class SystemEnvironmentPromptProvider( val isWindows = osName.lowercase(Locale.ROOT).contains("windows") val isMac = osName.lowercase(Locale.ROOT).contains("mac") val shell = detectShell(isWindows) - val pathSep = File.pathSeparator - val fileSep = File.separator val cwd = projectRoot?.toAbsolutePath()?.toString() ?: System.getProperty("user.dir") - val home = System.getProperty("user.home") - val platformHint = when { - isWindows -> buildString { - append("**CRITICAL: This is a Windows PowerShell environment.**\n") - append("SHELL SYNTAX RULES (violations will cause command failures):\n") - append("- Do NOT use `&&` to chain commands — use `;` in PowerShell\n") - append("- Do NOT use heredocs (`<<'EOF'`) — not supported in PowerShell\n") - append("- Do NOT use `/dev/null` — use `NUL` or `Out-Null`\n") - append("- Do NOT pass complex Python one-liners via `python -c \"...\"` — ") - append("PowerShell mangles quotes. Write a .py file and run it instead.\n") - append("- Use `where` not `which` to locate binaries\n") - append("- Paths use backslashes: `dir\\file.py` not `dir/file.py`\n") - append("PREFERRED PATTERN: Write scripts to files, then execute them.") - } - isMac -> "macOS — BSD userland (not GNU). Some flags differ from Linux (e.g. `sed -i ''`, `find` predicates). `xargs -r` is unavailable." - else -> "Linux — GNU userland. Standard POSIX + GNU extensions available." - } + + // Split probed tools into have/lack lists. A single "have: a,b,c" line is ~4× + // cheaper in tokens than a per-tool "a: yes\nb: no\n..." table that previously + // cost ~600 tokens by itself — and a flat list is what the model actually needs + // (it wants to know what it can invoke, not each tool's boolean individually). val toolStatus = probeTools(isWindows) + val have = toolStatus.filter { it.second }.map { it.first } + val lack = toolStatus.filter { !it.second }.map { it.first } return buildString { append("\n") - append("os: $osName $osVersion ($osArch)\n") - append("shell: $shell\n") - append("working_directory: $cwd\n") - append("user_home: $home\n") - append("path_separator: \"$pathSep\"\n") - append("file_separator: \"$fileSep\"\n") - append("available_tools:\n") - for ((tool, available) in toolStatus) { - append(" - $tool: ${if (available) "yes" else "no"}\n") - } + append("os: $osName $osVersion ($osArch); shell: $shell; cwd: $cwd\n") + if (have.isNotEmpty()) append("have: ${have.joinToString(",")}\n") + if (lack.isNotEmpty()) append("lack: ${lack.joinToString(",")}\n") + append("Prefer Refio tools over raw shell; if a command is not in `have`, do not assume it is installed.\n") - append("IMPORTANT: Pick shell commands and flags that match the OS above. ") - append("Do NOT assume a tool exists unless it is listed as `yes` in available_tools. ") - append("When uncertain, prefer cross-platform alternatives or use Refio tools instead of raw shell.\n") - - append("\n\n\n") - append(platformHint) - append("\n\n") + // Platform-specific rules: only emit when non-trivial. Linux/GNU defaults + // don't need a block — most shell idioms work out of the box. Mac BSD and + // Windows PowerShell have real divergences that burn the agent if omitted, + // so those sections stay. + when { + isWindows -> { + append("PowerShell: use `;` not `&&` to chain; no heredocs; `NUL` not `/dev/null`; `where` not `which`; backslash paths. For non-trivial Python, write a .py file rather than `python -c \"...\"` (quote mangling).\n") + } + isMac -> { + append("macOS BSD userland: `sed -i ''` (not `-i`), `xargs -r` unavailable, `find` predicate set differs from GNU.\n") + } + // Linux/other: omit — POSIX + GNU defaults are the model's baseline. + } append("") } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/session/MessageDispatcher.kt b/core/src/main/kotlin/pl/jclab/refio/core/session/MessageDispatcher.kt index c70b55b6..6e87cbce 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/session/MessageDispatcher.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/session/MessageDispatcher.kt @@ -134,7 +134,12 @@ class MessageDispatcher( id = if (index == 0) coreMsg.id else "${coreMsg.id}:tc$index", taskId = coreMsg.taskId, createdAt = coreMsg.createdAt, - toolCallInfo = enriched + toolCallInfo = enriched, + // Attach token/cost metrics only to the first tool-call bubble + // so SessionStatsCalculator doesn't double-count. + tokensIn = if (index == 0) coreMsg.tokensIn else null, + tokensOut = if (index == 0) coreMsg.tokensOut else null, + cost = if (index == 0) coreMsg.cost else null ) } } @@ -153,15 +158,19 @@ class MessageDispatcher( taskId = coreMsg.taskId, createdAt = coreMsg.createdAt, toolCallInfo = toolCallInfo, - metadata = coreMsg.metadata + metadata = coreMsg.metadata, + tokensIn = coreMsg.tokensIn, + tokensOut = coreMsg.tokensOut, + cost = coreMsg.cost ) ) } - // Completely empty - filter out - if (coreMsg.metadata.isNullOrBlank()) { - return@flatMap emptyList() - } + // Empty assistant with no usable tool-call info — drop it. + // Native tool-call envelopes (content="", toolCallsJson populated) are + // handled above; anything falling through here would render as an empty + // "Assistant" bubble next to the real tool bubble. + return@flatMap emptyList() } val toolDisplay = ToolMessageDisplayResolver.resolve( @@ -258,11 +267,48 @@ class MessageDispatcher( } else { logger.debug { "[MESSAGES] Unchanged, skipping update (count=${allMessages.size})" } } + + refreshActiveSessionMetrics(currentSession.id) } catch (e: Exception) { logger.error(e) { "Failed to load messages" } } } + /** + * Refresh tokens/cost/status on the active session from the DB so the + * Session Debug Report and any other consumer of `activeSession` reflect + * AgentTurnLoop's per-iteration `incrementMetrics` writes. + */ + private fun refreshActiveSessionMetrics(sessionId: String) { + val current = stateManager.getActiveSession() ?: return + if (current.id != sessionId) return + try { + val task = projectRouter.taskRouter.getTask(sessionId) ?: return + val refreshedStatus = runCatching { + pl.jclab.refio.api.models.TaskStatus.valueOf(task.status) + }.getOrDefault(current.status) + + if (current.tokensIn == task.tokensIn && + current.tokensOut == task.tokensOut && + current.costUsd == task.costUsd && + current.status == refreshedStatus && + current.updatedAt == task.updatedAt + ) return + + stateManager.updateSession( + current.copy( + tokensIn = task.tokensIn, + tokensOut = task.tokensOut, + costUsd = task.costUsd, + status = refreshedStatus, + updatedAt = task.updatedAt + ) + ) + } catch (e: Exception) { + logger.warn(e) { "Failed to refresh active session metrics for $sessionId" } + } + } + /** * Parse ToolCallDisplayInfo from metadata JSON or toolCallsJson. * Prioritizes metadata (already formatted) over toolCallsJson (raw tool calls). @@ -441,7 +487,10 @@ class MessageDispatcher( taskId: String, createdAt: Long, toolCallInfo: ToolCallDisplayInfo, - metadata: String? = ToolCallDisplayInfo.toMetadataJson(toolCallInfo) + metadata: String? = ToolCallDisplayInfo.toMetadataJson(toolCallInfo), + tokensIn: Int? = null, + tokensOut: Int? = null, + cost: Double? = null ): Message { return Message( id = id, @@ -450,7 +499,10 @@ class MessageDispatcher( content = "", toolCallInfo = toolCallInfo, createdAt = createdAt, - metadata = metadata + metadata = metadata, + tokensIn = tokensIn, + tokensOut = tokensOut, + costUsd = cost ) } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/session/SessionLifecycleService.kt b/core/src/main/kotlin/pl/jclab/refio/core/session/SessionLifecycleService.kt index e924f320..dabaa7c9 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/session/SessionLifecycleService.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/session/SessionLifecycleService.kt @@ -3,6 +3,7 @@ package pl.jclab.refio.core.session import pl.jclab.refio.core.session.SessionStateManager import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.Job import kotlinx.coroutines.sync.Mutex import kotlinx.coroutines.launch import kotlinx.coroutines.withContext @@ -32,6 +33,17 @@ class SessionLifecycleService( private val logger = dualLogger("SessionLifecycleService") private var selectedMode: TaskMode = TaskMode.CHAT + // Tracks the initial last-session-restore coroutine so that user-driven + // lifecycle ops (createSession/switchSession/loadSession) can wait for it + // to finish before mutating session state. Without this, restore can race + // with a fast "New Session" click and overwrite the freshly-created session + // (visible right after plugin startup as the old chat reappearing). + private var initializeJob: Job? = null + + private suspend fun awaitInitialization() { + initializeJob?.join() + } + fun getSelectedMode(): TaskMode = selectedMode @Suppress("UNUSED_PARAMETER") @@ -40,7 +52,7 @@ class SessionLifecycleService( subtaskTracker: SubtaskTracker, _executionMonitor: ExecutionMonitor ) { - scope.launchSafe { + initializeJob = scope.launchSafeJob { loadUIState() try { @@ -97,6 +109,7 @@ class SessionLifecycleService( mode: TaskMode, executionMode: ExecutionMode? = null ): Session { + awaitInitialization() val totalTimeStart = System.currentTimeMillis() logger.info { "[PERF] createSession START: name='$name', mode=$mode" } @@ -175,6 +188,7 @@ class SessionLifecycleService( } suspend fun switchSession(sessionId: String, messageDispatcher: MessageDispatcher, subtaskTracker: SubtaskTracker) { + awaitInitialization() try { saveCurrentSessionState() @@ -229,6 +243,7 @@ class SessionLifecycleService( subtaskTracker: SubtaskTracker, _executionMonitor: ExecutionMonitor ) { + awaitInitialization() modeSwitchMutex.lock() try { logger.info { "Loading session: $sessionId" } @@ -844,6 +859,9 @@ private fun CoroutineScope.launchSafe(block: suspend () -> Unit) { this.launch(Dispatchers.IO) { block() } } +private fun CoroutineScope.launchSafeJob(block: suspend () -> Unit): Job = + this.launch(Dispatchers.IO) { block() } + private fun parseExecutionModeValue(value: String?): ExecutionMode { if (value.isNullOrBlank()) { return ExecutionMode.INTERACTIVE diff --git a/core/src/main/kotlin/pl/jclab/refio/core/tools/base/Tool.kt b/core/src/main/kotlin/pl/jclab/refio/core/tools/base/Tool.kt index 097ad99a..cab31386 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/tools/base/Tool.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/tools/base/Tool.kt @@ -78,6 +78,46 @@ interface Tool { } } +/** + * Provider-agnostic tool schema for native function-calling APIs. + * + * Each adapter converts this to its provider-specific format: + * - OpenAI/Ollama: {"type":"function","function":{"name":...,"description":...,"parameters":...}} + * - Anthropic: {"name":...,"description":...,"input_schema":...} + * + * parametersJsonSchema must be a valid JSON Schema draft-7 subset accepted by all providers: + * {"type":"object","properties":{...},"required":[...]} + */ +data class ToolSchema( + val name: String, + val description: String, + val parametersJsonSchema: Map, +) + +/** + * Convert a Tool into a provider-agnostic schema using Tool.getParameterSchema(). + * + * Wraps bare-properties maps in {"type":"object","properties":...} so all providers + * accept the result. Tools returning emptyMap() become a no-parameter schema. + */ +fun Tool.toToolSchema(): ToolSchema { + val schema = getParameterSchema() + val resolved: Map = when { + schema.isEmpty() -> mapOf("type" to "object", "properties" to emptyMap()) + !schema.containsKey("type") -> mapOf( + "type" to "object", + "properties" to schema, + "required" to emptyList() + ) + else -> schema + } + return ToolSchema( + name = name, + description = description, + parametersJsonSchema = resolved, + ) +} + /** * Origin of a tool — where it was registered from. */ @@ -164,7 +204,17 @@ data class ChangeSummary( val replacements: Int? = null, /** Whether the file was newly created (vs. modified in place). */ val created: Boolean = false -) +) { + /** + * True when an edit ran but produced no content change (identical before/after + * content on an existing file). The tool still reports success=true (no I/O error), + * but this flag lets callers and the agent distinguish a genuine edit from a + * silent no-op — typically the LLM editor returned unchanged content, a search + * pattern did not match, or the edit_description was too vague to apply. + */ + val noop: Boolean + get() = !created && addedLines == 0 && removedLines == 0 +} /** * Result of tool execution diff --git a/core/src/main/kotlin/pl/jclab/refio/core/tools/base/ToolRegistry.kt b/core/src/main/kotlin/pl/jclab/refio/core/tools/base/ToolRegistry.kt index 14f5f03d..64dd277e 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/tools/base/ToolRegistry.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/tools/base/ToolRegistry.kt @@ -161,6 +161,18 @@ class ToolRegistry { return permissionsService.filterAvailableTools(getAllTools(), taskMode, taskId) } + /** + * Get tool schemas for native function-calling API. + * Returns the same set of tools that would appear in the system prompt for this mode. + */ + fun getToolSchemas( + taskMode: TaskMode, + permissionsService: ToolPermissionsService, + taskId: String? = null + ): List { + return getAvailableTools(taskMode, permissionsService, taskId).map { it.toToolSchema() } + } + /** * Check if tool exists * diff --git a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/AdvanceCodeEditingTool.kt b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/AdvanceCodeEditingTool.kt index 068e63bd..513cd870 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/AdvanceCodeEditingTool.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/AdvanceCodeEditingTool.kt @@ -64,7 +64,9 @@ class AdvanceCodeEditingTool( "(b) rewrites covering >50% of an existing file, (c) structurally broken files. " + "Generates content via a dedicated LLM call so the agent's own response stays small — " + "avoid stuffing large `content` payloads into `create_new_file`, which inflates the agent " + - "response and risks streaming timeouts. " + + "response and risks streaming timeouts. Returns a diff/change summary so you usually do not need " + + "to re-read the file immediately after writing; if the tool result gets summarized or truncated, " + + "recover the full raw output with memory(action=\"get_subtask_output\", subtask_id=...). " + "For small targeted edits prefer code_editing or multi_line_editor." override val mode = ToolMode.WRITE override val category = ToolCategory.FILE_PRODUCING @@ -382,22 +384,33 @@ class AdvanceCodeEditingTool( ToolResult( success = true, output = buildString { - if (fileExists) { - appendLine("File edited successfully: $pathStr") + if (changeSummary.noop) { + // LLM returned content identical to the existing file — no change applied. + // Surface this explicitly so the agent notices (instead of seeing a bland + // "File edited successfully" with an empty diff and concluding all is fine). + appendLine("⚠ No changes applied: generated content is identical to the existing file ($pathStr).") + appendLine("Likely causes: the edit_description was too vague for the model to act on, the change is already present, or the model refused and returned the file unchanged.") + appendLine("Next step: refine edit_description with concrete before/after snippets, or switch to code_editing / multi_line_editor with an exact string to match.") + appendLine("If you need to inspect the current file state, use read_file(path=\"$pathStr\"). If this tool result was summarized, recover the full raw diff/output with memory(action=\"get_subtask_output\", subtask_id=\"\"). Do NOT retry advance_code_editing with the same description.") + appendLine("Model: $model, Tokens: ${usage.inputTokens}/${usage.outputTokens}, Cost: $${"%.4f".format(cost)}") } else { - appendLine("File created successfully: $pathStr") + if (fileExists) { + appendLine("File edited successfully: $pathStr") + } else { + appendLine("File created successfully: $pathStr") + } + if (fileExists) { + appendLine("Size: $fileSize bytes → $newFileSize bytes") + } else { + appendLine("Size: $newFileSize bytes") + } + appendLine("Model: $model, Tokens: ${usage.inputTokens}/${usage.outputTokens}, Cost: $${"%.4f".format(cost)}") + // Wrap diff in markdown code block for proper UI rendering + appendLine("Diff:") + appendLine("```diff") + diff.lines().forEach { line -> appendLine(line) } + append("```") } - if (fileExists) { - appendLine("Size: $fileSize bytes → $newFileSize bytes") - } else { - appendLine("Size: $newFileSize bytes") - } - appendLine("Model: $model, Tokens: ${usage.inputTokens}/${usage.outputTokens}, Cost: $${"%.4f".format(cost)}") - // Wrap diff in markdown code block for proper UI rendering - appendLine("Diff:") - appendLine("```diff") - diff.lines().forEach { line -> appendLine(line) } - append("```") }, bytesRead = originalContent.toByteArray().size, bytesWritten = newContent.toByteArray().size, @@ -410,6 +423,7 @@ class AdvanceCodeEditingTool( "diff_lines" to diff.lines().size, "added_lines" to addedLines, "removed_lines" to removedLines, + "noop" to changeSummary.noop, "edit_description" to editDescription, "model" to model, "provider" to provider, @@ -517,14 +531,20 @@ class AdvanceCodeEditingTool( ), "edit_description" to mapOf( "type" to "string", - "description" to "What to edit or create, in natural language." + "description" to "Natural-language description of the change. " + + "Provide this OR the (old_string, new_string) pair — not both." + ), + "old_string" to mapOf( + "type" to "string", + "description" to "Exact substring to replace. Use together with new_string " + + "as an alternative to edit_description." + ), + "new_string" to mapOf( + "type" to "string", + "description" to "Replacement text for old_string." ), ), - "required" to listOf("path"), - "oneOf" to listOf( - mapOf("required" to listOf("edit_description")), - mapOf("required" to listOf("old_string", "new_string")) - ) + "required" to listOf("path") ) } diff --git a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/CodeEditingTool.kt b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/CodeEditingTool.kt index 1951b53e..285ab590 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/CodeEditingTool.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/CodeEditingTool.kt @@ -214,14 +214,22 @@ class CodeEditingTool( ToolResult( success = true, output = buildString { - appendLine("File edited successfully: $pathStr ($replacements replacements made)") - if (replaceAll && replacements > 1) { - appendLine("Preview shows one replacement pattern applied across all matches.") + if (changeSummary.noop) { + // Replacement applied but before/after content is identical — usually means + // `old_string` == `new_string`. Flag explicitly so the agent doesn't assume + // progress was made. + appendLine("⚠ No changes applied: $replacements replacement(s) matched but old_string and new_string produce identical content ($pathStr).") + appendLine("Next step: verify that new_string actually differs from old_string, or re-read the file to confirm whether the desired state is already present.") + } else { + appendLine("File edited successfully: $pathStr ($replacements replacements made)") + if (replaceAll && replacements > 1) { + appendLine("Preview shows one replacement pattern applied across all matches.") + } + appendLine("Diff:") + appendLine("```diff") + diff.lines().forEach { line -> appendLine(line) } + append("```") } - appendLine("Diff:") - appendLine("```diff") - diff.lines().forEach { line -> appendLine(line) } - append("```") }, bytesRead = content.toByteArray().size, bytesWritten = newContent.toByteArray().size, @@ -235,6 +243,7 @@ class CodeEditingTool( "new_length" to newContent.length, "added_lines" to changeSummary.addedLines, "removed_lines" to changeSummary.removedLines, + "noop" to changeSummary.noop, "diff" to diff ) ) diff --git a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/CreateNewFileTool.kt b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/CreateNewFileTool.kt index c04db3ef..68936912 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/CreateNewFileTool.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/CreateNewFileTool.kt @@ -44,8 +44,11 @@ class CreateNewFileTool( override val mode = ToolMode.WRITE override val category = ToolCategory.FILE_MODIFYING override val selectionHint = - "Small new files (configs, stubs, short snippets) where you already have the full content. " + - "For >~50 lines, HTML/classes, or generated code, prefer advance_code_editing." + "ONLY for small new files (≤50 lines): configs, stubs, short snippets where you already have the exact final content. " + + "For HTML pages, full classes, scripts, games, or anything >50 lines: STOP — use `advance_code_editing` instead. " + + "Stuffing 200–900 lines into the `content` parameter blows your output-token budget (10K+ wasted tokens), " + + "risks streaming truncation, and bloats every subsequent turn's conversation history. " + + "`advance_code_editing` delegates generation to the editing model so your agent response stays small." override fun validateParams(params: Map) { validatePathParam(params) diff --git a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/HttpRequestTool.kt b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/HttpRequestTool.kt index c66c2538..f816529c 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/HttpRequestTool.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/HttpRequestTool.kt @@ -574,8 +574,8 @@ class HttpRequestTool( "enum" to ALLOWED_METHODS ), "body" to mapOf( - "type" to listOf("string", "object", "array"), - "description" to "Request body for POST/PUT. May be a JSON string OR a raw object/array — objects/arrays are auto-serialized to JSON via Gson. Mutually exclusive with body_file." + "type" to "string", + "description" to "Request body for POST/PUT as a string. For JSON payloads, pass a JSON-serialized string (e.g. \"{\\\"key\\\":\\\"value\\\"}\"). Mutually exclusive with body_file." ), "body_file" to mapOf( "type" to "string", diff --git a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/InvokeSubagentTool.kt b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/InvokeSubagentTool.kt index f0ec65a0..b432417e 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/InvokeSubagentTool.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/InvokeSubagentTool.kt @@ -174,12 +174,73 @@ class InvokeSubagentTool( ) } + /** + * Parse and normalize the `context_refs` parameter from an `invoke_subagent` call. + * + * Behavior: + * - Drops blank / null entries. + * - Normalizes leading `./` and collapses redundant `/` so equivalent paths + * that vary only in surface form deduplicate cleanly. + * - Deduplicates while preserving insertion order (first occurrence wins). + * - Hard-caps at [MAX_CONTEXT_REFS]; remaining entries are dropped with a + * single warning. Without this guard, parallel `invoke_subagent` calls + * that all pass the project's full file list (observed: 21 paths × 3 + * parallel agents in session 11) duplicate the same string into every + * subagent's prompt and inflate context cost linearly. + */ private fun parseContextRefs(raw: Any?): List { val refs = raw as? List<*> ?: return emptyList() - return refs.mapNotNull { value -> - val path = value?.toString()?.trim().orEmpty() - if (path.isBlank()) null else ContextReference.file(path) + if (refs.isEmpty()) return emptyList() + + val seen = LinkedHashSet(refs.size) + var blank = 0 + var duplicate = 0 + var capped = 0 + for (value in refs) { + val asString = value?.toString()?.trim().orEmpty() + if (asString.isBlank()) { + blank++ + continue + } + val normalized = normalizePath(asString) + if (!seen.add(normalized)) { + duplicate++ + continue + } + if (seen.size >= MAX_CONTEXT_REFS) { + capped = refs.size - refs.indexOf(value) - 1 + break + } + } + if (duplicate > 0 || capped > 0 || blank > 0) { + logger.info { + "[INVOKE_SUBAGENT] context_refs sanitized: kept=${seen.size}, " + + "duplicates=$duplicate, blank=$blank, dropped_over_cap=$capped, " + + "cap=$MAX_CONTEXT_REFS" + } } + return seen.map { ContextReference.file(it) } + } + + private fun normalizePath(path: String): String { + var p = path + while (p.startsWith("./")) p = p.removePrefix("./") + // Collapse `//` runs but preserve a leading `//` (UNC paths) — though sandbox + // rejects absolute paths anyway, this is just defensive normalization. + val leading = if (p.startsWith("//")) "//" else "" + val body = p.removePrefix(leading).replace(Regex("/+"), "/") + return leading + body + } + + private companion object { + /** + * Upper bound for context_refs per `invoke_subagent` call. Generous enough + * for real multi-file investigations (a code review across 30 modules) but + * stops a model from blindly forwarding the entire project listing. When + * exceeded the trailing entries are dropped — the model sees the first + * [MAX_CONTEXT_REFS] paths in declaration order. + */ + const val MAX_CONTEXT_REFS = 32 } private fun buildDynamicDescription(): String { diff --git a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/LlmCallTool.kt b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/LlmCallTool.kt index 1f14ef8f..accf2bbc 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/LlmCallTool.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/LlmCallTool.kt @@ -31,9 +31,13 @@ private val logger = dualLogger("LlmCallTool") * Optionally reads a file and appends its contents to the prompt. * Optionally saves the LLM response to a file instead of returning it in context. * + * At least one of `data`, `file_path`, `image_path`/`image_base64`, or `save_to_file` + * must be set. With only `save_to_file`, the tool runs in pure generation mode — the + * model writes directly to disk from the prompt. + * * Parameters: * - prompt: System prompt — instructions, role, or task description - * - data: Inline data or text to analyze (optional if file_path is provided) + * - data: Inline data or text to analyze (optional if file_path or save_to_file is provided) * - file_path: Optional file whose contents are used as data input * - image_path: Optional image file path to include in the prompt (requires vision-capable model) * - image_base64: Optional base64-encoded image data (requires vision-capable model, use with image_media_type) @@ -73,8 +77,17 @@ class LlmCallTool( // Build final prompt: data + optional file contents val hasImage = !imagePath.isNullOrBlank() || !imageBase64.isNullOrBlank() + val hasSaveTarget = !saveToFile.isNullOrBlank() val finalPrompt = buildFinalPrompt(userPrompt, filePath) - ?: if (hasImage) "" else return ToolResult.error("Either 'data', 'file_path', or 'image_path'/'image_base64' is required") + ?: if (hasImage || hasSaveTarget) "" else return ToolResult.error( + "Need at least one of: 'data', 'file_path', 'image_path'/'image_base64', or 'save_to_file'. " + + "For prompt-only generation that writes to disk, set 'save_to_file' and put instructions in 'prompt'." + ) + if (finalPrompt.isBlank() && !hasImage && systemPrompt.isNullOrBlank()) { + return ToolResult.error( + "Empty request: need 'prompt' (with optional 'save_to_file'), 'data'/'file_path', or image input." + ) + } // Resolve model val (model, provider) = resolveModel(params["model"]?.toString()) @@ -385,7 +398,8 @@ class LlmCallTool( "save_to_file" to mapOf( "type" to "string", "description" to "Save LLM response to this file path instead of returning full content. " + - "Returns a summary with preview." + "Returns a summary with preview. Sufficient on its own: when set, you can call the tool " + + "with only 'prompt' + 'save_to_file' to generate new content (no 'data'/'file_path' needed)." ) ) ) diff --git a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/MultiEditTool.kt b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/MultiEditTool.kt index 62a70790..6d574dd5 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/MultiEditTool.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/MultiEditTool.kt @@ -96,9 +96,18 @@ class MultiEditTool( logger.info { "Successfully applied ${edits.size} edits to ${filesChanged.size} files, ${duration}ms (+$totalAdded/-$totalRemoved)" } + val allNoop = results.isNotEmpty() && results.all { it.changeSummary.noop } return ToolResult( success = true, - output = formatResults(results), + output = if (allNoop) { + buildString { + appendLine("⚠ No changes applied: all ${results.size} edit(s) matched but produced identical content.") + appendLine("Per-file: ${results.joinToString(", ") { "${it.path} (${it.replacements} match)" }}") + appendLine("Next step: verify new_string differs from old_string, or re-read the files to confirm whether the desired state is already present.") + } + } else { + formatResults(results) + }, durationMs = duration, filesChanged = filesChanged, changeSummary = aggregatedSummary, @@ -108,6 +117,7 @@ class MultiEditTool( "total_replacements" to totalReplacements, "added_lines" to totalAdded, "removed_lines" to totalRemoved, + "noop" to allNoop, "diffs" to results.associate { it.path to (it.changeSummary.unifiedDiff ?: "") } ) ) diff --git a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/MultiLineEditorTool.kt b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/MultiLineEditorTool.kt index 6484032e..e48da566 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/MultiLineEditorTool.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/MultiLineEditorTool.kt @@ -353,15 +353,22 @@ class MultiLineEditorTool( ToolResult( success = true, output = buildString { - appendLine("File edited successfully: $pathStr") - appendLine("Applied ${edits.size} edits to file") - appendLine("Size: $fileSize bytes → $newFileSize bytes") - appendLine("Model: $model, Tokens: ${usage.inputTokens}/${usage.outputTokens}, Cost: $${"%.4f".format(cost)}") - // Wrap diff in markdown code block for proper UI rendering - appendLine("Diff:") - appendLine("```diff") - diff.lines().forEach { line -> appendLine(line) } - append("```") + if (changeSummary.noop) { + appendLine("⚠ No changes applied: regenerated content is identical to the existing file ($pathStr).") + appendLine("Likely causes: the edit_description was too vague for the model to locate the region to change, the change is already present, or the model returned the file unchanged.") + appendLine("Next step: refine edit_description with concrete snippets (before/after text, line numbers, function names), or use code_editing for a targeted search/replace. Do NOT retry multi_line_editor with the same description.") + appendLine("Model: $model, Tokens: ${usage.inputTokens}/${usage.outputTokens}, Cost: $${"%.4f".format(cost)}") + } else { + appendLine("File edited successfully: $pathStr") + appendLine("Applied ${edits.size} edits to file") + appendLine("Size: $fileSize bytes → $newFileSize bytes") + appendLine("Model: $model, Tokens: ${usage.inputTokens}/${usage.outputTokens}, Cost: $${"%.4f".format(cost)}") + // Wrap diff in markdown code block for proper UI rendering + appendLine("Diff:") + appendLine("```diff") + diff.lines().forEach { line -> appendLine(line) } + append("```") + } }, bytesRead = originalContent.toByteArray().size, bytesWritten = newContent.toByteArray().size, @@ -374,6 +381,7 @@ class MultiLineEditorTool( "edits_count" to edits.size, "added_lines" to addedLines, "removed_lines" to removedLines, + "noop" to changeSummary.noop, "edit_description" to editDescription, "model" to model, "provider" to provider, diff --git a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/TasksTool.kt b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/TasksTool.kt index 91df44b0..75b8bc07 100644 --- a/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/TasksTool.kt +++ b/core/src/main/kotlin/pl/jclab/refio/core/tools/implementations/TasksTool.kt @@ -39,14 +39,20 @@ The plan is visible to the orchestrating agent.""" ), "steps" to mapOf( "type" to "array", - "description" to "List of plan steps in execution order. Required for plan action.", + "description" to "List of plan steps in execution order. Required for plan action. " + + "Each step may be either a plain string (used as the step title) or an object with 'title' and optional 'description'.", "items" to mapOf( - "type" to "object", - "properties" to mapOf( - "title" to mapOf("type" to "string", "description" to "Short step title"), - "description" to mapOf("type" to "string", "description" to "What this step involves") - ), - "required" to listOf("title") + "anyOf" to listOf( + mapOf("type" to "string"), + mapOf( + "type" to "object", + "properties" to mapOf( + "title" to mapOf("type" to "string", "description" to "Short step title"), + "description" to mapOf("type" to "string", "description" to "What this step involves") + ), + "required" to listOf("title") + ) + ) ) ), "step_index" to mapOf( @@ -87,13 +93,19 @@ The plan is visible to the orchestrating agent.""" ?: return ToolResult.error("steps required for plan action") val planSteps = steps.mapIndexed { index, step -> - val map = step as? Map<*, *> - ?: return ToolResult.error("Invalid step at index $index") + val (title, description) = when (step) { + is String -> step to null + is Map<*, *> -> { + val t = step["title"] as? String + ?: return ToolResult.error("Step $index missing title") + t to (step["description"] as? String) + } + else -> return ToolResult.error("Invalid step at index $index: expected string or object with 'title'") + } AgentPlanStep( index = index, - title = map["title"] as? String - ?: return ToolResult.error("Step $index missing title"), - description = map["description"] as? String, + title = title, + description = description, status = PlanStepStatus.PENDING ) } diff --git a/core/src/main/resources/prompts/multi-agent-agent.md b/core/src/main/resources/prompts/multi-agent-agent.md new file mode 100644 index 00000000..5c4786c0 --- /dev/null +++ b/core/src/main/resources/prompts/multi-agent-agent.md @@ -0,0 +1,40 @@ +--- +name: multi-agent-agent +type: system +description: Multi-agent / delegation guidance for AGENT mode. Included via {{multi_agent_section}} when invoke_subagent is available; rendered empty otherwise. +--- + + +Use `invoke_subagent` only when it saves turns. It is expensive: a full extra turn loop. + +**Answer directly. Do not delegate** for informational questions, simple 1-3 file changes, or anything you can finish after a small amount of reading. + +**Delegate only when ALL hold:** +1. The work has 2+ independent sub-problems or needs a specialist. +2. Doing it yourself would likely take >15 tool calls. +3. A matching subagent exists in the `invoke_subagent` tool description. +4. You can write a self-contained `goal`. + +**Do not delegate** just because you are stuck. Use `delegate_to_strong_model` for that. + +**Subagents are blind.** They see only `goal` and optional `context_refs`. +Your `goal` must include: +- exact task +- relevant files / symbols +- what is already known or ruled out +- required output format + +Use `context_refs` for specific files instead of pasting content. Avoid vague goals like "review the code". + +**Do not re-do subagent work.** Treat its report as authoritative unless you see a concrete inconsistency. + +**Parallel:** multiple `invoke_subagent` calls in one `actions` array run concurrently. + +**Pipeline:** if B depends on A, run B in the next turn after reading A's output. + +**Coordinator:** use `multi-agent-coordinator` only as a last resort when the delegation plan itself is unclear. + +**No deep chains:** if already inside a subagent, do not invoke `multi-agent-coordinator`; ask the parent to orchestrate. + +Use `tasks(action="plan")` for 4+ step work and `memory(action="write")` for facts that must survive compaction. + diff --git a/core/src/main/resources/prompts/multi-agent-plan.md b/core/src/main/resources/prompts/multi-agent-plan.md new file mode 100644 index 00000000..b56e440e --- /dev/null +++ b/core/src/main/resources/prompts/multi-agent-plan.md @@ -0,0 +1,33 @@ +--- +name: multi-agent-plan +type: system +description: Multi-agent / delegation guidance for PLAN mode. Included via {{multi_agent_section}} when invoke_subagent is available; rendered empty otherwise. +--- + + +Use `invoke_subagent` only when it saves turns. It is expensive. + +**Answer directly. Do not delegate** for informational questions or focused questions answerable in a few reads. + +**Delegate only when ALL hold:** +1. Doing it yourself would likely take >15 tool calls in one domain. +2. A matching specialist exists. +3. You can write a self-contained `goal`. + +**Do not delegate** before reading the relevant code yourself. + +**Subagents are blind.** They see only `goal` and optional `context_refs`. +Your `goal` should include: +- exact question +- relevant files / symbols +- what is already known or ruled out +- required output format + +Use `context_refs` for files instead of pasting content. Avoid vague goals. + +**Do not re-do subagent work.** Re-query only if you find a concrete inconsistency. + +**Parallel:** multiple `invoke_subagent` calls in one turn run concurrently. + +**No deep chains:** if already inside a subagent, ask the parent to orchestrate. + diff --git a/core/src/main/resources/prompts/multi-line-editing-system.md b/core/src/main/resources/prompts/multi-line-editing-system.md index 25a3640d..5a5f817d 100644 --- a/core/src/main/resources/prompts/multi-line-editing-system.md +++ b/core/src/main/resources/prompts/multi-line-editing-system.md @@ -34,49 +34,7 @@ FORMAT (return ONLY this JSON, no explanations): ] } -EXAMPLES: - -Example 1 - Add null check: -File has: - 10: function parseUser(data) { - 11: return JSON.parse(data) - 12: } - -Edit: "Add null check for data parameter" - -Response: -{ - "changes": [ - { - "line_start": 11, - "line_end": 11, - "new_content": " if (!data) throw new Error('data is required')\n return JSON.parse(data)", - "description": "Add null check before parsing" - } - ] -} - -Example 2 - Delete unused import: -File has: - 1: import java.util.List - 2: import java.util.Map - 3: import java.util.Set - -Edit: "Remove unused Set import" - -Response: -{ - "changes": [ - { - "line_start": 3, - "line_end": 3, - "new_content": "", - "description": "Remove unused Set import" - } - ] -} - -Example 3 - Multiple changes: +EXAMPLE — multiple changes in one file: File has: 10: function calculate(a, b) { 11: return a + b diff --git a/core/src/main/resources/prompts/response-contract-json.md b/core/src/main/resources/prompts/response-contract-json.md new file mode 100644 index 00000000..75e70568 --- /dev/null +++ b/core/src/main/resources/prompts/response-contract-json.md @@ -0,0 +1,72 @@ +--- +name: response-contract-json +type: system +description: Response contract fragment — JSON-envelope path (no native function-calling). Included via {{response_contract}} in system-plan.md / system-agent.md. +--- + + +**MANDATORY JSON RESPONSE FORMAT** +Respond with valid JSON only. No text before or after. + +**WHEN USING TOOLS (analysis / implementation in progress):** +```json +{ + "response": "Brief explanation of what you're doing", + "actions": [ + {"tool": "exact_tool_name", "args": {"param": "value"}} + ] +} +``` + +**WHEN FINISHED (ready to provide final answer / recommendations):** +```json +{ + "response": "Your complete answer or recommendations here...", + "actions": [] +} +``` + +**ERROR RESPONSE (when no tools available):** +```json +{ + "response": "Cannot proceed - no tools available. The available_tools list is empty.", + "actions": [] +} +``` + +**FIELD REQUIREMENTS:** +- `actions` (array, required): Tool calls to execute. Empty array when finished. + - `tool` (string): Exact tool name from ``. + - `args` (object): Parameters with exact names from the tool schema. +- `response` (string, required): Explanation during work OR final answer when done. +- `intent` (AGENT mode only): `implementation` | `analysis` | `response`. PLAN mode ignores this field. +- `thinking` (optional): short reasoning string. + +**JSON escaping:** `\\` backslash, `\"` quote, `\n` newline. Regex: `\\.html` not `\.html`. + +**CRITICAL:** Plain text does NOT execute actions. Only the JSON envelope above creates or reads files. +**FORBIDDEN:** Returning prose responses outside of the JSON envelope. + +**BATCH independent reads in ONE envelope.** When you need several files or searches that don't depend on each other, list multiple entries in `actions` — the harness runs READ-only actions concurrently. 5 `read_file` calls in one envelope cost 1 LLM round-trip, not 5. Don't drip-feed one action per turn while exploring. + + + +**EXAMPLE — batched read + search in one envelope** +```json +{ + "response": "Reading UserService and grepping for unsafe null assertions.", + "actions": [ + {"tool": "read_file", "args": {"path": "src/services/UserService.kt"}}, + {"tool": "grep_search", "args": {"pattern": "!!\\.", "path": "src"}} + ] +} +``` + +**EXAMPLE — final answer (empty actions)** +```json +{ + "response": "Found 3 unsafe `!!` operators in UserService.kt at lines 45, 78, 123.", + "actions": [] +} +``` + diff --git a/core/src/main/resources/prompts/response-contract-native.md b/core/src/main/resources/prompts/response-contract-native.md new file mode 100644 index 00000000..7e86bf9a --- /dev/null +++ b/core/src/main/resources/prompts/response-contract-native.md @@ -0,0 +1,37 @@ +--- +name: response-contract-native +type: system +description: Response contract fragment — native function-calling path. Included via {{response_contract}} in system-plan.md / system-agent.md. +--- + + +**NATIVE FUNCTION-CALLING IS MANDATORY.** + +Tool schemas are attached to this request via the provider's native `tools` parameter. +Invoke tools through the native `tool_calls` / `tool_use` channel. Text content is optional — keep it minimal (a one-line status max). + +❌ THE FOLLOWING WILL BE IGNORED (silent failure — your work will not run): +``` +foo.html... +src/foo.kt +{"response": "...", "actions": [{"tool": "...", "args": {...}}]} +```json +{"tool_use": ...} +``` + +✅ CORRECT: emit a native tool call via your API's `tool_calls` / `tool_use` field. +The harness ONLY executes calls from that channel — never from text content. + +**BATCH independent reads in ONE turn.** When several files/searches don't depend on each other, emit multiple native tool calls in the SAME response — READ-only calls run concurrently, so 3 `read_file` calls in one turn cost 1 round-trip, not 3. Don't drip-feed one tool per turn while exploring. + +**When finished** (no more tools needed): reply with plain prose — your final answer or summary. No JSON wrapping, no XML tags, no fenced envelopes. + +**FORBIDDEN:** +- Emitting tool calls as text (XML tags, JSON envelope, fenced JSON) — they will not execute. +- Inventing tool names not present in the attached schemas. +- Emitting both a native tool call AND a text envelope describing the same call. + +**Parameter hygiene:** +- Use exact parameter names from each tool's JSON Schema. +- Paths are relative to the project root, forward slashes, no `..`. + diff --git a/core/src/main/resources/prompts/system-agent.md b/core/src/main/resources/prompts/system-agent.md index 040bc307..7197305f 100644 --- a/core/src/main/resources/prompts/system-agent.md +++ b/core/src/main/resources/prompts/system-agent.md @@ -6,6 +6,8 @@ mode: AGENT variables: - tool_descriptions - tool_selection_matrix + - response_contract + - multi_agent_section --- You are an autonomous coding agent with full read/write access. @@ -20,8 +22,10 @@ Verification prevents wrong patches. Reading before editing, probing assumptions **STEP 0 — VERIFY ASSUMPTIONS BEFORE ACTING.** If code or scripts embed facts about external systems (APIs, schemas, protocols) — verify from the authoritative source FIRST. For APIs: call help/docs endpoints before writing logic. Don't assume sync/async behavior or hardcode thresholds — retrieve them. Hard-coded constants may BE the bug. -**STEP 1 — READ BEFORE EDITING.** -Each `read_file` must answer a specific question. Re-read after stale-file warnings. +**STEP 1 — READ BEFORE EDITING (and ONLY ONCE).** +Each `read_file` must answer a specific question. After any write tool (`create_new_file`, `code_editing`, `multi_edit`, `multi_line_editor`, `advance_code_editing`), the `changeSummary` in its result IS your verification — added/removed lines + unified diff + hashes. Treat it as authoritative. + +Re-read ONLY if a LATER tool (build/test/lint/run_terminal_command) reports a concrete error pointing at that file. NEVER re-read to "validate", "double-check", or "see how it turned out" — a follow-up `read_file` on a file you just wrote wastes thousands of tokens and tells you nothing new. **STEP 2 — FILE CREATION PRE-CHECK.** `create_new_file` HARD FAILS on existing paths. Always pre-check in a PRIOR turn: @@ -30,15 +34,22 @@ Turn N: `file_search` alone → Turn N+1: create if not found, else read+edit. **STEP 3 — STOP-AND-RETHINK after 2+ failed attempts.** Same failure after 2 attempts = wrong mental model. Use `think` to separate facts from assumptions. Re-read the original user message for missed hints. Then go back to STEP 0. Never rewrite from scratch — diagnose what specifically failed. -**ESCALATE after 4+ consecutive failures of the same operation.** Same tool + same error code + same symptom four times means you are the wrong tool for this problem. STOP grinding. Call `delegate_to_strong_model` with: (1) the original task, (2) what you have already tried and why it failed, (3) the exact error message. Do NOT keep retrying variations — escalation is cheaper than 20 failed turns. This also applies when you feel "stuck in a loop" re-analyzing the same facts without progress. +**ESCALATE after 4+ consecutive failures of the same operation.** Same tool + same error code + same symptom four times means you are the wrong tool for this problem. STOP grinding. Call `delegate_to_strong_model` with: (1) the original task, (2) what you have already tried and why it failed, (3) the exact error message. Do NOT keep retrying variations — escalation is cheaper than 20 failed turns. + +**STEP 4 — NO EXPLORATION FOR TRIVIAL TASKS.** +If the user asks to create a single file with a clear name and clear content (e.g. "create snake.html with a Snake game", "write config.yaml with X"): +- Go STRAIGHT to `create_new_file` or `advance_code_editing` in turn 1. +- Skip `file_search`, `read_directory`, `grep_search`, and reading sibling files — they add nothing. +- The only allowed pre-check is ONE `file_search` to confirm the target name doesn't already exist (STEP 2). +- Skip `tasks(action="plan")` and `think` — the plan IS implicit in the user message. Reserve them for 4+ step problems where the order isn't obvious. +- In `edit_description` for `advance_code_editing`: describe ONLY what to generate. Do NOT write phrases like "use existing files as stylistic references" or "match the style of sibling files" — that triggers the editing model to pull in context it doesn't need and inflates output. -**STEP 4 — MATCH TASK SCALE.** -Trivial fix: 1-2 turns. Complex bug with external dependencies: 5-15 turns of verification — that's normal. +Each redundant read on a trivial task costs 5K–25K tokens of context. Trivial fix budget: 1–2 turns. Complex bugs with external dependencies: 5–15 turns of verification is normal. -**STEP 5 — VALIDATE YOUR WORK.** -After making changes, verify they actually work — don't assume success. For code: run tests, compile, check output. For API tasks: submit and check the response. For multi-field problems with a verification endpoint: submit early with best-guess values to identify which fields need fixing, then iterate. The cost of one extra validation call is always less than 10 turns of blind analysis. +**STEP 5 — VALIDATE YOUR WORK, BUT NOT FOR STATIC CONTENT.** +Validate when there is a runnable check available: tests, compilation, lint, an API call that returns a status, a script you can execute. The cost of one extra validation call is less than 10 turns of blind analysis. -**DO NOT RE-READ A FILE YOU JUST WROTE.** Write tools return a `changeSummary` (added/removed lines, unified diff, hashes) inside the tool result. That IS your verification — treat it as authoritative. A follow-up `read_file` on the file you just created/edited wastes thousands of tokens and gives you no new information. Only re-read if a LATER tool call (build, test, lint) reports a concrete problem you need to inspect. +For STATIC content (HTML pages, CSS, config files, plain text, single-file games meant to be opened in a browser) the `changeSummary` diff IS your validation. Do NOT run `read_file`, `(Get-Item).Length`, `run_code` regex/feature-checkers, or any "did the file get written correctly" scripts on output that has no runtime to fail in. Move on to the final answer. **CODING DISCIPLINE:** - Understand before editing. Prefer minimal, focused changes. @@ -50,184 +61,13 @@ After making changes, verify they actually work — don't assume success. For co **WHEN TO ASK:** Ambiguous scope, multiple valid paths with trade-offs, change expanding beyond request, or need info that can't be inferred. Don't ask when only one path exists. - -**YOU decide whether to delegate. No external orchestrator. No automatic multi-agent mode.** - -The `invoke_subagent` tool spawns a specialized agent with its own system prompt, tool access, and turn loop. Each invocation is EXPENSIVE — a full LLM turn loop, typically 2-10× the cost of a single tool call. Use it when it saves turns, not to look busy. - -**RULE 0 — INFORMATIONAL QUESTIONS: ANSWER DIRECTLY. NO DELEGATION. NO TOOLS.** -Questions like "what does this project do?", "what's in file X?", "summarize the architecture", "what do you know?" — these have answers in your existing context (project summary, file listing, patterns, key components). Return `intent: response`, `actions: []`, fill `response` with the answer. **Do NOT invoke subagents for these.** Spinning up `multi-agent-coordinator` for a 2-sentence factual answer is the #1 failure mode — it costs a full turn loop and produces worse output than you'd write yourself from the context already in front of you. - -**DELEGATE (`invoke_subagent`) when ALL of these hold:** -1. The task has ≥2 *independent* sub-problems that a specialist handles better than you (e.g. security audit + arch review + perf analysis). -2. You would otherwise need >15 tool calls to cover all angles yourself. -3. A matching subagent exists — check the names listed in the `invoke_subagent` tool description. -4. You have already scoped the problem enough to write a *self-contained* goal (see SUBAGENTS ARE BLIND below). - -**DO NOT DELEGATE when:** -- Informational/explanatory answers (see RULE 0). -- Simple 1-3 file edits where you already know what to change. -- You have not yet read the relevant code — delegate *after* scoping, not instead of scoping. -- You're stuck and tempted to offload thinking — that's what `delegate_to_strong_model` is for (cheaper, single-shot, no tool loop). - -**SUBAGENTS ARE BLIND.** The subagent does NOT see your conversation, tool results, memory, or project context — ONLY the `goal` string you pass (plus optional `context_refs`). Write `goal` as if briefing a new contractor: -- What specifically to do (file paths, symbol names, concrete question). -- What's already been ruled out. -- Expected output format ("bullet list", "JSON", "file:line citations"). - -Vague goals ("review the code", "check security") cost 10× more turns because the subagent re-scopes from scratch, often in the wrong direction. Use `context_refs: ["path/to/file.kt"]` to attach specific files without bloating `goal` — cheaper than pasting content. - -**DO NOT RE-DO A SUBAGENT'S WORK.** When a subagent returns a report, treat it as authoritative — it just burned 5-20 turns producing it. Don't re-run the same greps/reads "to verify". Only re-query when you spot a concrete inconsistency in the report itself, and then ask via a new `invoke_subagent` call with a sharper `goal` — not by duplicating the work yourself. - -**PARALLEL execution** — multiple `invoke_subagent` calls in the SAME `actions` array run concurrently. See EXAMPLE 4 below. - -**PIPELINE** (A → B → C) — run the next stage in the NEXT turn with the previous subagent's output pasted into the new `goal`. Don't try to chain in one turn; you need to see output #1 before formulating input #2. - -**LLM-DRIVEN PLANNING** — when the task is complex but you're unsure which subagents to spin up, delegate the planning itself: -```json -{"tool": "invoke_subagent", "args": {"subagent_name": "multi-agent-coordinator", "goal": "Plan and execute: . Spawn whatever sub-specialists are needed and summarize their outputs."}} -``` -Use this as a LAST resort when direct delegation is unclear — it's the most expensive path because it spawns meta-delegation (coordinator → sub-specialists). - -**NO DEEP CHAINS.** The system enforces depth ≤ 3, but cost explodes at depth 2 (2-10× per level). If you're already inside a subagent-spawned turn, do NOT invoke `multi-agent-coordinator` — use `send_message(to='parent', type='question', ...)` so the parent orchestrates. The parent has full history; you don't. - -**Also**: `tasks(action="plan")` for 4+ step work, `memory(action="write")` for cross-turn facts. - +{{multi_agent_section}} {{tool_descriptions}} - -**ALWAYS RETURN JSON. Never plain text.** - -```json -{ - "thinking": "thinking", - "intent": "implementation", - "response": "What you are doing and why", - "actions": [{"tool": "tool_name", "args": {"param": "value"}}] -} -``` - -Fields: -- `thinking` (optional): short reasoning -- `intent` (required): `implementation` | `analysis` | `response` -- `response` (required, non-empty): user-facing status message -- `actions` (array, may be empty): tool calls to execute - -**intent values:** -- `implementation`: code work including read+verify turns leading to edits (default for fix/create tasks) -- `analysis`: pure understanding tasks, no edits expected ("explain this", "review this") -- `response`: talking to user — question, blocker, final summary (used when `actions` is empty) - -**Empty actions:** `response` must contain meaningful answer/question. For "no changes needed": include `NO_CHANGES_NEEDED` with concrete evidence. - -**JSON escaping:** `\\` backslash, `\"` quote, `\n` newline. Regex: `\\.html` not `\.html` - -**CRITICAL:** Plain text does NOT execute actions. Only JSON tool calls create/edit files. - - - -**EXAMPLE 1 — Verification-first read (STEP 1: READ BEFORE EDITING)** -```json -{ - "thinking": "User reports the retry loop never gives up. Before touching anything I need to see the actual retry logic.", - "intent": "implementation", - "response": "Reading LLMRetryHandler.kt to understand the retry loop.", - "actions": [ - {"tool": "read_file", "args": {"path": "core/src/main/kotlin/pl/jclab/refio/core/services/LLMRetryHandler.kt"}} - ] -} -``` - -**EXAMPLE 2 — Parallel information gathering** -```json -{ - "thinking": "I need both the call sites of getWithPrecedence and the ConfigRepository implementation itself — they're independent so I fetch them in the same turn.", - "intent": "implementation", - "response": "Gathering ConfigRepository usage and implementation in parallel.", - "actions": [ - {"tool": "grep_search", "args": {"pattern": "getWithPrecedence", "path": "core/src/main/kotlin"}}, - {"tool": "read_file", "args": {"path": "core/src/main/kotlin/pl/jclab/refio/core/db/repositories/ConfigRepository.kt"}} - ] -} -``` - -**EXAMPLE 3 — Edit after reading (combined read + write discipline)** -```json -{ - "thinking": "Confirmed line 47 hardcodes 3 instead of reading config.maxRetries. Applying the minimal fix.", - "intent": "implementation", - "response": "Replacing the hardcoded retry count with the config value.", - "actions": [ - {"tool": "code_editing", "args": { - "path": "core/src/main/kotlin/pl/jclab/refio/core/services/LLMRetryHandler.kt", - "old_string": "val maxRetries = 3", - "new_string": "val maxRetries = config.maxRetries" - }} - ] -} -``` - -**EXAMPLE 4 — Parallel subagents for independent specialist work** -```json -{ - "intent": "implementation", - "response": "Dispatching security + architecture reviews in parallel — independent concerns, each a 10+ turn analysis.", - "actions": [ - {"tool": "invoke_subagent", "args": {"subagent_name": "security-engineer", "goal": "Review core/src/main/kotlin/pl/jclab/refio/core/security/ for path traversal + symlink risks. Focus on PathSandbox.kt and FileLimits.kt. Report findings as a bullet list with file:line references and concrete repro steps.", "context_refs": ["core/src/main/kotlin/pl/jclab/refio/core/security/PathSandbox.kt", "core/src/main/kotlin/pl/jclab/refio/core/security/FileLimits.kt"]}}, - {"tool": "invoke_subagent", "args": {"subagent_name": "architect-reviewer", "goal": "Evaluate the core/agents/orchestration package against the router composition pattern in core/api/modules/DomainRouters.kt. Answer one question: is the structure consistent with the rest of the codebase? Cite 2-3 files as evidence."}} - ] -} -``` - -**EXAMPLE 5 — Stop-and-rethink after repeated failure (STEP 3)** -```json -{ - "thinking": "Two create_new_file calls failed with 'file already exists'. Wrong mental model — the file already exists, I should read and edit it, not create it.", - "intent": "analysis", - "response": "Stopping to rethink — file exists, switching to read + multi_edit.", - "actions": [ - {"tool": "think", "args": {"thought": "Two create_new_file failures on AgentTurnLoop.kt mean the file is already there. Plan: read the file, locate the iteration guard site, apply multi_edit. Original task was to add a max-iterations check, not to create a new file."}} - ] -} -``` - -**EXAMPLE 6 — Persisting a learned fact before context compaction** -```json -{ - "intent": "implementation", - "response": "Saving the discovered API quirk to memory so it survives compaction, then continuing with the fix.", - "actions": [ - {"tool": "memory", "args": { - "action": "write", - "key": "findings.ollama_keepalive", - "value": "OllamaAdapter requires keep_alive='-1' to pin the model in GPU memory; default unloads after 5 minutes and causes the 30s cold-start we saw.", - "importance": 9 - }} - ] -} -``` - -**EXAMPLE 7 — Final summary (response intent, empty actions)** -```json -{ - "intent": "response", - "response": "Done. LLMRetryHandler.kt:47 now reads config.maxRetries and logs the resolved value at line 62. Grepped the codebase — no other hardcoded retry constants remain.", - "actions": [] -} -``` - -**EXAMPLE 8 — NO_CHANGES_NEEDED with evidence** -```json -{ - "intent": "response", - "response": "NO_CHANGES_NEEDED — reviewed ContextService.kt:128 and ConversationContextBuilder.kt:201. The TTL is already 5 minutes and matches the spec. No drift from the requested value, nothing to patch.", - "actions": [] -} -``` - +{{response_contract}} **When-to-use-what (only tools currently enabled appear below):** diff --git a/core/src/main/resources/prompts/system-orchestrator.md b/core/src/main/resources/prompts/system-orchestrator.md index 7ba9811b..490d40a7 100644 --- a/core/src/main/resources/prompts/system-orchestrator.md +++ b/core/src/main/resources/prompts/system-orchestrator.md @@ -134,98 +134,31 @@ ACTION TYPES: -### Example 1: Add missing step +### Example 1: Continue when step succeeds -**Input:** -- Step: "Create UserService class" -- Result: ERROR - Import error: module 'bcrypt' not found +**Input:** Step "Create UserRepository class" → SUCCESS, file created, tests pass. **Output:** { - "decision": "MODIFY_PLAN", - "reasoning": "Need to install bcrypt dependency before service can work. This is a standard fix for missing dependencies.", - "analysis": "Service code is correct but missing runtime dependency. The import statement requires bcrypt package which isn't installed.", - "actions": [ - { - "type": "add_step", - "after_step": 2, - "description": "Install bcrypt package", - "kind": "run_terminal_command", - "suggested_params": {"command": "npm install bcrypt"} - }, - { - "type": "retry_step", - "step": 3, - "reason": "Will succeed after installing dependency" - } - ] + "decision": "CONTINUE", + "reasoning": "Step completed successfully and plan is on track.", + "analysis": "Repository created with proper structure. No issues detected." } -### Example 2: Skip unnecessary step +### Example 2: Modify plan to fix a missing dependency -**Input:** -- Step: "Read config file" -- Result: ERROR - File not found: config.yaml (fresh installation detected) +**Input:** Step "Create UserService" → ERROR: module 'bcrypt' not found. **Output:** { "decision": "MODIFY_PLAN", - "reasoning": "This is a fresh install with no existing config. Migration steps are not needed for fresh installations.", - "analysis": "Config file missing indicates clean installation state. Steps 3-4 are designed for migrating existing data, which doesn't apply here.", + "reasoning": "Missing runtime dependency must be installed before service can run.", + "analysis": "Service code is correct; only the import target is unavailable.", "actions": [ - { - "type": "add_step", - "after_step": 1, - "description": "Create default config file", - "kind": "create_new_file", - "suggested_params": { - "path": "config.yaml", - "content": "default configuration content" - } - }, - { - "type": "skip_step", - "step": 3, - "reason": "No existing schema to migrate in fresh install" - }, - { - "type": "skip_step", - "step": 4, - "reason": "No existing data to migrate in fresh install" - } + {"type": "add_step", "after_step": 2, "description": "Install bcrypt", "kind": "run_terminal_command", "suggested_params": {"command": "npm install bcrypt"}}, + {"type": "retry_step", "step": 3, "reason": "Will succeed after install"} ] } - -### Example 3: Ask user for guidance - -**Input:** -- Step: "Update authentication system" -- Result: SUCCESS - Tests failing due to session timeout config change - -**Output:** -{ - "decision": "ASK_USER", - "reasoning": "Two valid approaches with different implications. User should decide based on their requirements.", - "analysis": "Session timeout is now configurable but tests use old hardcoded value. Both approaches are valid - updating tests maintains new flexibility, hardcoding preserves backwards compatibility.", - "question": "Tests are failing because session timeout is now configurable. How should I handle this?", - "question_options": [ - "Update tests to use new config format", - "Keep timeout hardcoded for backwards compatibility" - ] -} - -### Example 4: Continue with plan - -**Input:** -- Step: "Create UserRepository class" -- Result: SUCCESS - File created with 80 lines, all tests passing - -**Output:** -{ - "decision": "CONTINUE", - "reasoning": "Step completed successfully and plan is on track. Next step (Create SessionRepository) is ready to execute.", - "analysis": "Repository created successfully with proper structure. No issues detected. The plan is working as expected." -} diff --git a/core/src/main/resources/prompts/system-plan.md b/core/src/main/resources/prompts/system-plan.md index a244c0b7..dc002039 100644 --- a/core/src/main/resources/prompts/system-plan.md +++ b/core/src/main/resources/prompts/system-plan.md @@ -5,6 +5,8 @@ description: System prompt for PLAN mode - read-only analysis with tools mode: PLAN variables: - tool_descriptions + - response_contract + - multi_agent_section --- You are an expert AI planning assistant with READ-ONLY access to the codebase. @@ -26,51 +28,12 @@ Tools are executed immediately - this is active analysis, not just planning. **🛑 BEFORE DOING ANYTHING:** -1. Check section at the bottom -2. If it is EMPTY → return error JSON immediately -3. If tools exist → proceed using ONLY those exact tool names +1. Check `` / the attached native tool schemas. +2. If no tools are available → return a brief answer saying so and stop. +3. If tools exist → proceed using ONLY those exact tool names. - -**MANDATORY JSON RESPONSE FORMAT** -Respond with valid JSON only. No text before or after. - -**WHEN USING TOOLS (analysis in progress):** -```json -{ - "response": "Brief explanation of what you're analyzing", - "actions": [ - {"tool": "exact_tool_name", "args": {"param": "value"}} - ] -} -``` - -**WHEN FINISHED ANALYZING (ready to provide recommendations):** -```json -{ - "response": "Your complete analysis and recommendations here...", - "actions": [] -} -``` - -**ERROR RESPONSE (when no tools available):** -```json -{ - "response": "Cannot analyze - no tools available. The available_tools list is empty.", - "actions": [] -} -``` - -**FIELD REQUIREMENTS:** -- "actions" (array, required): Tool calls to execute. Empty array when finished. - - "tool" (string): Exact tool name from - - "args" (object): Parameters with exact names from tool definition -- "response" (string, required): Explanation during analysis OR final recommendations when done - -**Note on `intent` field:** PLAN mode does NOT use the `intent` field. If a SYSTEM -nudge complains about a missing `intent`, that nudge is targeted at AGENT mode — -ignore it in PLAN. Just respond with `actions` + `response` as shown above. - +{{response_contract}} **USE EXACT PARAMETER NAMES:** @@ -88,43 +51,19 @@ ignore it in PLAN. Just respond with `actions` + `response` as shown above. -1. Analyze user request -2. Use READ tools to understand the codebase (actions array with tool calls) -3. After gathering information, provide analysis (empty actions array, response with findings) -4. Recommend next steps (user can switch to AGENT mode to execute changes) +1. Analyze user request. +2. Use READ tools to understand the codebase. +3. After gathering enough information, provide analysis (no more tool calls). +4. Recommend next steps (user can switch to AGENT mode to execute changes). - -**YOU decide whether to delegate specialist research.** `invoke_subagent` is EXPENSIVE (spawns a full sub-turn-loop, 2-10× the tokens of a single `read_file` + reasoning). Reach for it only when it saves turns. - -**RULE 0 — INFORMATIONAL QUESTIONS: ANSWER DIRECTLY. NO DELEGATION.** -"What does this project do?", "What's in file X?", "Summarize the architecture" — these are answerable from your existing context (project summary, file listing, patterns). Return `actions: []`, fill `response` with the answer. Delegating a 2-sentence factual answer to a specialist is the #1 failure mode in PLAN mode. - -**DELEGATE (`invoke_subagent`) to a specialist when ALL hold:** -1. You'd otherwise need >15 tool calls in one domain (deep security audit, full API-surface review, competitive research pass). -2. A specialist has knowledge you lack (e.g. `security-engineer` for threat modeling, `architect-reviewer` for pattern evaluation, `research-analyst` for external research) — check the `invoke_subagent` tool description for the current list. -3. You have already scoped the problem enough to write a *self-contained* goal. - -**DO NOT DELEGATE when:** -- Informational questions (see RULE 0). -- The user asked a focused question answerable in 3-5 reads. -- You haven't even looked at the relevant file yet — delegation is NOT a substitute for your own reading. -- You just want a second opinion on your own work — finish your analysis first. - -**SUBAGENTS ARE BLIND.** The subagent sees ONLY the `goal` string (plus optional `context_refs`) — not your conversation, not your tool results, not the project context. Write `goal` as if briefing a new contractor: concrete file paths, what's already been ruled out, expected output format. Attach files via `context_refs: ["path/to/file.kt"]` instead of pasting content. Vague goals cost 10× more turns. - -**DO NOT RE-DO A SUBAGENT'S WORK.** When a subagent returns a report, treat it as authoritative — it burned 5-20 turns producing it. Don't re-run the same greps/reads to double-check. Only re-query if you spot a concrete inconsistency, and do it via a new `invoke_subagent` with a sharper goal. - -**PARALLEL dispatch** — multiple `invoke_subagent` calls in SAME `actions` array run concurrently. Good for independent review angles (security + architecture + performance). - -**NO DEEP CHAINS.** The system enforces depth ≤ 3, but cost explodes at depth 2. If you're already inside a subagent context, use `send_message(to='parent', type='question', ...)` — the parent has full history, you don't. - +{{multi_agent_section}} **ALLOWED:** - Using READ-ONLY tools (read_file, read_directory, grep_search, file_search, view_diff, rag_search) - Making multiple tool calls to gather information -- Providing analysis and recommendations in response +- Providing analysis and recommendations when done **Search tool choice:** - `grep_search` — exact text / regex match, returns line numbers. Always prefer when you have a concrete identifier. @@ -135,80 +74,16 @@ ignore it in PLAN. Just respond with `actions` + `response` as shown above. - `think` is a no-op slot that echoes your `thought` back so the reasoning becomes part of the turn history. It does NOT read files or run code. - **Required parameter:** `thought` — a NON-EMPTY, concrete string with actual reasoning. `think({})` and `think({"thought": ""})` are INVALID and waste a turn. - Use it when: a tool result was complex and you need to extract findings before continuing, you face 2+ plausible interpretations of the code and need to commit to one, or a nudge tells you to stop and reason. Do NOT use it as filler before every action. -- Example: `{"tool": "think", "args": {"thought": "UserService.kt:78 uses !! on getUserById result. The method is nullable per its return type, so this can NPE when the user is missing. Next: grep for other callers of getUserById to see if they handle null."}}` **FORBIDDEN:** - Using WRITE tools — explicitly: `code_editing`, `create_new_file`, `multi_edit`, `multi_line_editor`, `advance_code_editing`, `run_terminal_command`, `run_code`, and `http_request` with `method` other than `GET`. These will be rejected by the harness in PLAN mode. -- Inventing tool names not in ``. +- Inventing tool names not in the available tools list / attached schemas. - Using placeholder values in arguments (e.g. `"path": ""`). -- Returning prose responses outside of the JSON envelope. - -**EXAMPLE 1: Starting analysis** -```json -{ - "response": "Starting analysis by examining project structure and Kotlin files.", - "actions": [ - {"tool": "read_directory", "args": {"path": ".", "recursive": true, "max_depth": 2}}, - {"tool": "file_search", "args": {"pattern": "*.kt"}} - ] -} -``` - -**EXAMPLE 2: Continuing analysis** -```json -{ - "response": "Reading UserService and searching for unsafe null assertions.", - "actions": [ - {"tool": "read_file", "args": {"path": "src/services/UserService.kt"}}, - {"tool": "grep_search", "args": {"pattern": "!!\\.", "path": "src"}} - ] -} -``` - -**EXAMPLE 3: Finished analyzing** -```json -{ - "response": "## Analysis Complete\n\nI found the following issues:\n1. UserService.kt has 3 unsafe !! operators at lines 45, 78, 123\n2. Related service files: AuthService.kt, ProfileService.kt\n\n**Recommendations:**\n- Replace !! with safe calls (?.) or null checks\n- Add proper null handling in getUserById()\n\nSwitch to AGENT mode to implement these fixes.", - "actions": [] -} -``` - -**EXAMPLE 4: Consolidating findings with `think` before finalizing** -```json -{ - "response": "Consolidating findings before producing recommendations — two adapters bypass the central retry handler and I want to commit to an interpretation before writing it up.", - "actions": [ - {"tool": "think", "args": {"thought": "Confirmed: LLMRetryHandler.kt:47 hardcodes maxRetries=3. Separately, OllamaAdapter:89 and OpenAIAdapter:112 have their own retry loops that never call LLMRetryHandler. So the reported symptom (retry count ignored) has two root causes: the hardcode AND the duplicated logic. Final recommendation should list both; fixing only the hardcode will leave the two adapters still broken."}} - ] -} -``` - -**EXAMPLE 5: Targeted follow-up read after grep returned multiple hits** -```json -{ - "response": "grep_search surfaced 4 matches for `maxRetries` across 3 files. Reading AgentTurnLoop.kt to see the call site that actually wires the retry handler into the turn loop.", - "actions": [ - {"tool": "read_file", "args": {"path": "core/src/main/kotlin/pl/jclab/refio/core/services/AgentTurnLoop.kt"}} - ] -} -``` - -**EXAMPLE 6: Ambiguous scope — stop and ask instead of guessing** -```json -{ - "response": "I need clarification before finalizing the plan.\n\nThe request says \"add caching to ConfigRepository\", but `ConfigRepository.getWithPrecedence()` is called from 3 distinct scopes (TASK, PROJECT, APP) with different invalidation needs:\n\n1. **App-level cache** — fastest, but needs invalidation on every config write\n2. **Per-session cache** — safer, invalidated on session end\n3. **Request-scoped memo** — no invalidation needed, but doesn't help cross-turn\n\nWhich scope did you have in mind? The right choice changes the file set that needs to be touched.", - "actions": [] -} -``` - - -**🔍 ONLY tools listed below can be used. If this section is empty, respond with error JSON.** - {{tool_descriptions}} diff --git a/core/src/main/resources/prompts/system-step-planner.md b/core/src/main/resources/prompts/system-step-planner.md index ab35acd5..35ba5aaf 100644 --- a/core/src/main/resources/prompts/system-step-planner.md +++ b/core/src/main/resources/prompts/system-step-planner.md @@ -133,38 +133,6 @@ EXAMPLE RESPONSE FOR grep_search: 2. If file doesn't exist, tool will report error gracefully - -**⚠️ TOOL SELECTION RULES (CRITICAL - MAY OVERRIDE SUGGESTED TOOL):** - -When the suggested tool is for code editing, you MAY override it based on these principles: - -**PRIORITY ORDER FOR EDITING EXISTING FILES:** -1. ⭐ Prefer tools that handle multiple targeted changes efficiently -2. Use simple search/replace tools for single exact string replacement -3. Use full-file-rewrite tools ONLY for major rewrites (>50% of file) - -**WHEN TO USE SIMPLER TOOL:** -- If file EXISTS and changes are targeted (not >50% rewrite) -- If edit_description describes specific changes to existing code -- If task is about adding/modifying/fixing specific parts of file - -**WHEN FULL-FILE TOOLS ARE CORRECT:** -- File does NOT exist (creating new file) -- Need to rewrite >50% of file content -- Major structural refactoring - -**DECISION LOGIC:** -``` -IF suggested_tool is full-file editing: - IF file does NOT exist → KEEP full-file tool - ELSE IF changes are targeted (not >50% rewrite): - → Consider simpler editing tool if available - ELSE → KEEP full-file tool -``` - -Check for exact tool names and their descriptions to understand which tools are available. - - **CODE QUALITY:** - Follow KISS, YAGNI, SRP, DRY principles diff --git a/core/src/test/kotlin/pl/jclab/refio/core/db/repositories/SnapshotRepositoryTest.kt b/core/src/test/kotlin/pl/jclab/refio/core/db/repositories/SnapshotRepositoryTest.kt index 41f5bfd9..1db5eb41 100644 --- a/core/src/test/kotlin/pl/jclab/refio/core/db/repositories/SnapshotRepositoryTest.kt +++ b/core/src/test/kotlin/pl/jclab/refio/core/db/repositories/SnapshotRepositoryTest.kt @@ -10,19 +10,18 @@ import pl.jclab.refio.testutil.TestDatabase import kotlin.test.assertEquals import kotlin.test.assertNotNull -/** - * Testy dla SnapshotRepository. - */ class SnapshotRepositoryTest { private lateinit var db: TestDatabase.SharedInMemoryDb private lateinit var repository: SnapshotRepository + private lateinit var groupRepository: SnapshotGroupRepository private lateinit var taskRepository: TaskRepository @BeforeEach fun setup() { db = TestDatabase.createSharedInMemory() repository = SnapshotRepository() + groupRepository = SnapshotGroupRepository() taskRepository = TaskRepository() } @@ -31,60 +30,60 @@ class SnapshotRepositoryTest { db.keepAlive.close() } - private fun createTestTask(id: String = "task-123"): Task { - return transaction { - taskRepository.create( - name = "Test Task", - mode = TaskMode.AGENT, - projectId = "project-123", - projectPath = "/test/project", - id = id - ) - } + private fun createTestTask(id: String = "task-123"): Task = transaction { + taskRepository.create( + name = "Test Task", + mode = TaskMode.AGENT, + projectId = "project-123", + projectPath = "/test/project", + id = id + ) } + private fun createGroup(taskId: String, subtaskId: String? = null): SnapshotGroup = + groupRepository.create(taskId = taskId, subtaskId = subtaskId) + @Nested inner class CreateTests { @Test fun `should create snapshot`() { transaction { - // Given val task = createTestTask() + val group = createGroup(task.id) - // When val snapshot = repository.create( taskId = task.id, + groupId = group.id, filePath = "/path/to/file.txt", content = "original content", contentHash = "hash123" ) - // Then assertNotNull(snapshot.id) assertEquals(task.id, snapshot.taskId) + assertEquals(group.id, snapshot.groupId) assertEquals("/path/to/file.txt", snapshot.filePath) } } @Test - fun `should create snapshot with subtask`() { + fun `should create snapshot with subtask-tagged group`() { transaction { - // Given val task = createTestTask() + val group = createGroup(task.id, subtaskId = "subtask-123") - // When val snapshot = repository.create( taskId = task.id, + groupId = group.id, filePath = "/file.txt", content = "content", - contentHash = "hash456", - subtaskId = "subtask-123" + contentHash = "hash456" ) - // Then assertNotNull(snapshot.id) - assertEquals("subtask-123", snapshot.subtaskId) + assertEquals(group.id, snapshot.groupId) + assertEquals("subtask-123", groupRepository.findById(group.id)?.subtaskId) } } } @@ -95,19 +94,18 @@ class SnapshotRepositoryTest { @Test fun `should find snapshot by ID`() { transaction { - // Given val task = createTestTask() + val group = createGroup(task.id) val created = repository.create( taskId = task.id, + groupId = group.id, filePath = "/file.txt", content = "content", contentHash = "hash" ) - // When val found = repository.findById(created.id) - // Then assertNotNull(found) assertEquals("/file.txt", found.filePath) } @@ -116,15 +114,13 @@ class SnapshotRepositoryTest { @Test fun `should find snapshots by task ID`() { transaction { - // Given val task = createTestTask() - repository.create(taskId = task.id, filePath = "/file1.txt", content = "c1", contentHash = "h1") - repository.create(taskId = task.id, filePath = "/file2.txt", content = "c2", contentHash = "h2") + val group = createGroup(task.id) + repository.create(taskId = task.id, groupId = group.id, filePath = "/file1.txt", content = "c1", contentHash = "h1") + repository.create(taskId = task.id, groupId = group.id, filePath = "/file2.txt", content = "c2", contentHash = "h2") - // When val snapshots = repository.findByTaskId(task.id) - // Then assertEquals(2, snapshots.size) } } @@ -132,15 +128,13 @@ class SnapshotRepositoryTest { @Test fun `should find snapshots by file path`() { transaction { - // Given val task = createTestTask() - repository.create(taskId = task.id, filePath = "/file.txt", content = "v1", contentHash = "h1") - repository.create(taskId = task.id, filePath = "/file.txt", content = "v2", contentHash = "h2") + val group = createGroup(task.id) + repository.create(taskId = task.id, groupId = group.id, filePath = "/file.txt", content = "v1", contentHash = "h1") + repository.create(taskId = task.id, groupId = group.id, filePath = "/file.txt", content = "v2", contentHash = "h2") - // When val snapshots = repository.findByFilePath(task.id, "/file.txt") - // Then assertEquals(2, snapshots.size) } } @@ -152,19 +146,18 @@ class SnapshotRepositoryTest { @Test fun `should delete snapshot`() { transaction { - // Given val task = createTestTask() + val group = createGroup(task.id) val snapshot = repository.create( taskId = task.id, + groupId = group.id, filePath = "/file.txt", content = "content", contentHash = "hash" ) - // When val deleted = repository.delete(snapshot.id) - // Then assertEquals(true, deleted) } } diff --git a/core/src/test/kotlin/pl/jclab/refio/core/llm/adapters/LLMAdapterSmokeTest.kt b/core/src/test/kotlin/pl/jclab/refio/core/llm/adapters/LLMAdapterSmokeTest.kt index f8438e53..0a457807 100644 --- a/core/src/test/kotlin/pl/jclab/refio/core/llm/adapters/LLMAdapterSmokeTest.kt +++ b/core/src/test/kotlin/pl/jclab/refio/core/llm/adapters/LLMAdapterSmokeTest.kt @@ -26,10 +26,13 @@ import pl.jclab.refio.core.config.ConfigKeys import pl.jclab.refio.core.db.ConfigScope import pl.jclab.refio.core.llm.LLMContentPart import pl.jclab.refio.core.llm.LLMMessage +import pl.jclab.refio.core.tools.base.ToolSchema import pl.jclab.refio.core.services.ConfigService import pl.jclab.refio.testutil.TestDatabase import pl.jclab.refio.core.utils.GsonInstance.gson as appGson import kotlin.test.assertEquals +import kotlin.test.assertFalse +import kotlin.test.assertNotNull import kotlin.test.assertTrue class LLMAdapterSmokeTest { @@ -258,6 +261,214 @@ class LLMAdapterSmokeTest { assertEquals(22, response.usage.totalTokens) } + @Test + fun `openai responses tools should disable strict for non-strict-compatible schema`() = runTest { + val configService = mockProviderConfig( + key = ConfigKeys.PROVIDER_OPENAI_API_KEY.key, + apiKey = "test-openai-key" + ) + + var requestJson = "" + val adapter = OpenAIAdapter( + model = "gpt-5.1-codex", + configService = configService, + baseUrlOverride = "https://mock.openai.test/v1", + httpClientOverride = mockHttpClient { request -> + requestJson = request.bodyText() + respondJson( + """ + { + "id": "resp_test", + "model": "gpt-5.1-codex", + "output": [ + { "type": "message", "content": [ { "type": "output_text", "text": "ok" } ] } + ], + "usage": { "input_tokens": 5, "output_tokens": 2, "total_tokens": 7 } + } + """.trimIndent() + ) + } + ) + + val dynamicSchema = ToolSchema( + name = "http_request_like", + description = "Dynamic map schema", + parametersJsonSchema = mapOf( + "type" to "object", + "properties" to mapOf( + "headers" to mapOf( + "type" to "object", + "additionalProperties" to mapOf("type" to "string") + ) + ), + "required" to listOf() + ) + ) + + adapter.chat( + messages = listOf(LLMMessage(role = "user", content = "hi")), + systemMessages = emptyList(), + maxTokens = 128, + temperature = 0.2, + streaming = false, + onStreamChunk = null, + kwargs = mapOf("native_tools" to listOf(dynamicSchema)) + ) + + val requestBody = parseJsonMap(requestJson) + @Suppress("UNCHECKED_CAST") + val tools = requestBody["tools"] as List> + val tool = tools.first() + assertEquals(false, tool["strict"]) + @Suppress("UNCHECKED_CAST") + val parameters = tool["parameters"] as Map + @Suppress("UNCHECKED_CAST") + val properties = parameters["properties"] as Map + @Suppress("UNCHECKED_CAST") + val headers = properties["headers"] as Map + assertNotNull(headers["additionalProperties"]) + } + + @Test + fun `openai responses tools should keep strict for strict-compatible schema`() = runTest { + val configService = mockProviderConfig( + key = ConfigKeys.PROVIDER_OPENAI_API_KEY.key, + apiKey = "test-openai-key" + ) + + var requestJson = "" + val adapter = OpenAIAdapter( + model = "gpt-5.1-codex", + configService = configService, + baseUrlOverride = "https://mock.openai.test/v1", + httpClientOverride = mockHttpClient { request -> + requestJson = request.bodyText() + respondJson( + """ + { + "id": "resp_test", + "model": "gpt-5.1-codex", + "output": [ + { "type": "message", "content": [ { "type": "output_text", "text": "ok" } ] } + ], + "usage": { "input_tokens": 5, "output_tokens": 2, "total_tokens": 7 } + } + """.trimIndent() + ) + } + ) + + val strictSchema = ToolSchema( + name = "read_exact", + description = "Strict-compatible schema", + parametersJsonSchema = mapOf( + "type" to "object", + "properties" to mapOf( + "path" to mapOf("type" to "string") + ), + "required" to listOf("path"), + "additionalProperties" to false + ) + ) + + adapter.chat( + messages = listOf(LLMMessage(role = "user", content = "hi")), + systemMessages = emptyList(), + maxTokens = 128, + temperature = 0.2, + streaming = false, + onStreamChunk = null, + kwargs = mapOf("native_tools" to listOf(strictSchema)) + ) + + val requestBody = parseJsonMap(requestJson) + @Suppress("UNCHECKED_CAST") + val tools = requestBody["tools"] as List> + val tool = tools.first() + assertEquals(true, tool["strict"]) + } + + @Test + fun `gemini tools should convert nullable json schema to openapi-like schema`() = runTest { + val configService = mockProviderConfig( + key = ConfigKeys.PROVIDER_GEMINI_API_KEY.key, + apiKey = "test-gemini-key" + ) + + var requestJson = "" + val adapter = GeminiAdapter( + model = "gemini-2.5-flash", + configService = configService, + baseUrlOverride = "https://mock.gemini.test/v1beta", + httpClientOverride = mockHttpClient { request -> + requestJson = request.bodyText() + respondJson( + """ + { + "candidates": [ + { + "content": { "parts": [ { "text": "ok" } ] }, + "finishReason": "STOP" + } + ], + "usageMetadata": { + "promptTokenCount": 2, + "candidatesTokenCount": 1, + "totalTokenCount": 3 + } + } + """.trimIndent() + ) + } + ) + + val nullableSchema = ToolSchema( + name = "read_file_like", + description = "Nullable params", + parametersJsonSchema = mapOf( + "type" to "object", + "properties" to mapOf( + "path" to mapOf("type" to "string"), + "detail" to mapOf( + "type" to listOf("string", "null"), + "enum" to listOf("summary", "full"), + "default" to "summary" + ) + ), + "required" to listOf("path"), + "additionalProperties" to false + ) + ) + + adapter.chat( + messages = listOf(LLMMessage(role = "user", content = "hi")), + systemMessages = emptyList(), + maxTokens = 128, + temperature = 0.2, + streaming = false, + onStreamChunk = null, + kwargs = mapOf("native_tools" to listOf(nullableSchema)) + ) + + val requestBody = parseJsonMap(requestJson) + @Suppress("UNCHECKED_CAST") + val tools = requestBody["tools"] as List> + @Suppress("UNCHECKED_CAST") + val declarations = (tools.first()["functionDeclarations"] as List>) + @Suppress("UNCHECKED_CAST") + val parameters = declarations.first()["parameters"] as Map + @Suppress("UNCHECKED_CAST") + val properties = parameters["properties"] as Map + @Suppress("UNCHECKED_CAST") + val detail = properties["detail"] as Map + + assertEquals("OBJECT", parameters["type"]) + assertEquals("STRING", detail["type"]) + assertEquals(true, detail["nullable"]) + assertFalse(detail.containsKey("default")) + assertFalse(parameters.containsKey("additionalProperties")) + } + private fun mockProviderConfig(key: String, apiKey: String): ConfigService { val configService = mockk() every { configService.get(any(), any(), any(), any()) } returns null diff --git a/core/src/test/kotlin/pl/jclab/refio/core/services/SnapshotServiceTest.kt b/core/src/test/kotlin/pl/jclab/refio/core/services/SnapshotServiceTest.kt index 6656e5fc..719ad2fe 100644 --- a/core/src/test/kotlin/pl/jclab/refio/core/services/SnapshotServiceTest.kt +++ b/core/src/test/kotlin/pl/jclab/refio/core/services/SnapshotServiceTest.kt @@ -6,6 +6,8 @@ import org.junit.jupiter.api.Nested import org.junit.jupiter.api.Test import org.junit.jupiter.api.io.TempDir import pl.jclab.refio.core.db.Snapshot +import pl.jclab.refio.core.db.SnapshotGroup +import pl.jclab.refio.core.db.repositories.SnapshotGroupRepository import pl.jclab.refio.core.db.repositories.SnapshotRepository import java.nio.file.Path import kotlin.io.path.createDirectories @@ -18,12 +20,22 @@ class SnapshotServiceTest { lateinit var tempDir: Path private lateinit var snapshotRepository: SnapshotRepository + private lateinit var snapshotGroupRepository: SnapshotGroupRepository private lateinit var service: SnapshotService @BeforeEach fun setup() { snapshotRepository = mockk(relaxed = true) - service = SnapshotService(snapshotRepository, tempDir) + snapshotGroupRepository = mockk(relaxed = true) + every { snapshotGroupRepository.create(any(), any()) } answers { + SnapshotGroup( + id = "group-generated", + taskId = firstArg(), + subtaskId = secondArg(), + createdAt = 0L + ) + } + service = SnapshotService(snapshotRepository, snapshotGroupRepository, tempDir) } @Nested @@ -35,12 +47,14 @@ class SnapshotServiceTest { file.parent.createDirectories() file.writeText("fun main() {}") - service.createSnapshot("task-1", "subtask-1", listOf("src/main.kt")) + val groupId = service.createSnapshot("task-1", "subtask-1", listOf("src/main.kt")) + assertEquals("group-generated", groupId) verify { + snapshotGroupRepository.create(taskId = "task-1", subtaskId = "subtask-1") snapshotRepository.create( taskId = "task-1", - subtaskId = "subtask-1", + groupId = "group-generated", filePath = "src/main.kt", content = "fun main() {}", contentHash = any() @@ -49,30 +63,32 @@ class SnapshotServiceTest { } @Test - fun `should skip nonexistent files`() { - service.createSnapshot("task-1", "subtask-1", listOf("nonexistent.kt")) + fun `should return null when no existing files`() { + val result = service.createSnapshot("task-1", "subtask-1", listOf("nonexistent.kt")) + assertNull(result) verify(exactly = 0) { + snapshotGroupRepository.create(any(), any()) snapshotRepository.create(any(), any(), any(), any(), any()) } } @Test - fun `should return subtaskId as snapshot id`() { + fun `should return null for empty path list`() { val result = service.createSnapshot("task-1", "snap-42", emptyList()) - assertEquals("snap-42", result) + assertNull(result) } @Test - fun `should snapshot multiple files`() { + fun `should snapshot multiple files under one group`() { tempDir.resolve("a.kt").writeText("A") tempDir.resolve("b.kt").writeText("B") - service.createSnapshot("task-1", "sub-1", listOf("a.kt", "b.kt")) + val groupId = service.createSnapshot("task-1", "sub-1", listOf("a.kt", "b.kt")) - verify(exactly = 2) { - snapshotRepository.create(any(), any(), any(), any(), any()) - } + assertEquals("group-generated", groupId) + verify(exactly = 1) { snapshotGroupRepository.create(any(), any()) } + verify(exactly = 2) { snapshotRepository.create(any(), any(), any(), any(), any()) } } @Test @@ -86,7 +102,6 @@ class SnapshotServiceTest { service.createSnapshot("task-1", "sub-1", listOf("test.txt")) - // SHA-256 of "hello" is well-known assertEquals( "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824", hashSlot.captured @@ -99,18 +114,14 @@ class SnapshotServiceTest { @Test fun `should return file contents map`() { - val snapshot1 = mockk { - every { filePath } returns "a.kt" - } - val snapshot2 = mockk { - every { filePath } returns "b.kt" - } + val snapshot1 = mockk { every { filePath } returns "a.kt" } + val snapshot2 = mockk { every { filePath } returns "b.kt" } - every { snapshotRepository.findBySubtaskId("snap-1") } returns listOf(snapshot1, snapshot2) + every { snapshotRepository.findByGroupId("grp-1") } returns listOf(snapshot1, snapshot2) every { snapshotRepository.decompressContent(snapshot1) } returns "content-A" every { snapshotRepository.decompressContent(snapshot2) } returns "content-B" - val result = service.getSnapshot("snap-1") + val result = service.getSnapshot("grp-1") assertEquals(2, result.size) assertEquals("content-A", result["a.kt"]) @@ -119,7 +130,7 @@ class SnapshotServiceTest { @Test fun `should return empty map for unknown snapshot`() { - every { snapshotRepository.findBySubtaskId("unknown") } returns emptyList() + every { snapshotRepository.findByGroupId("unknown") } returns emptyList() val result = service.getSnapshot("unknown") assertTrue(result.isEmpty()) @@ -131,24 +142,20 @@ class SnapshotServiceTest { @Test fun `should return content for existing file`() { - val snapshot = mockk { - every { filePath } returns "target.kt" - } - every { snapshotRepository.findBySubtaskId("snap-1") } returns listOf(snapshot) + val snapshot = mockk { every { filePath } returns "target.kt" } + every { snapshotRepository.findByGroupId("grp-1") } returns listOf(snapshot) every { snapshotRepository.decompressContent(snapshot) } returns "file content" - val result = service.getFileContent("snap-1", "target.kt") + val result = service.getFileContent("grp-1", "target.kt") assertEquals("file content", result) } @Test fun `should return null for missing file`() { - val snapshot = mockk { - every { filePath } returns "other.kt" - } - every { snapshotRepository.findBySubtaskId("snap-1") } returns listOf(snapshot) + val snapshot = mockk { every { filePath } returns "other.kt" } + every { snapshotRepository.findByGroupId("grp-1") } returns listOf(snapshot) - val result = service.getFileContent("snap-1", "target.kt") + val result = service.getFileContent("grp-1", "target.kt") assertNull(result) } } @@ -158,13 +165,11 @@ class SnapshotServiceTest { @Test fun `should restore files to project root`() { - val snapshot = mockk { - every { filePath } returns "restored.kt" - } - every { snapshotRepository.findBySubtaskId("snap-1") } returns listOf(snapshot) + val snapshot = mockk { every { filePath } returns "restored.kt" } + every { snapshotRepository.findByGroupId("grp-1") } returns listOf(snapshot) every { snapshotRepository.decompressContent(snapshot) } returns "restored content" - val result = service.restoreSnapshot("snap-1") + val result = service.restoreSnapshot("grp-1") assertTrue(result.success) assertEquals(listOf("restored.kt"), result.restoredFiles) @@ -176,11 +181,11 @@ class SnapshotServiceTest { val snap1 = mockk { every { filePath } returns "a.kt" } val snap2 = mockk { every { filePath } returns "b.kt" } - every { snapshotRepository.findBySubtaskId("snap-1") } returns listOf(snap1, snap2) + every { snapshotRepository.findByGroupId("grp-1") } returns listOf(snap1, snap2) every { snapshotRepository.decompressContent(snap1) } returns "A" every { snapshotRepository.decompressContent(snap2) } returns "B" - val result = service.restoreSnapshot("snap-1", filePaths = listOf("a.kt")) + val result = service.restoreSnapshot("grp-1", filePaths = listOf("a.kt")) assertTrue(result.success) assertEquals(listOf("a.kt"), result.restoredFiles) @@ -189,13 +194,11 @@ class SnapshotServiceTest { @Test fun `should create parent directories`() { - val snapshot = mockk { - every { filePath } returns "deep/nested/dir/file.kt" - } - every { snapshotRepository.findBySubtaskId("snap-1") } returns listOf(snapshot) + val snapshot = mockk { every { filePath } returns "deep/nested/dir/file.kt" } + every { snapshotRepository.findByGroupId("grp-1") } returns listOf(snapshot) every { snapshotRepository.decompressContent(snapshot) } returns "deep content" - val result = service.restoreSnapshot("snap-1") + val result = service.restoreSnapshot("grp-1") assertTrue(result.success) assertTrue(tempDir.resolve("deep/nested/dir/file.kt").toFile().exists()) diff --git a/core/src/test/kotlin/pl/jclab/refio/testutil/TestDatabase.kt b/core/src/test/kotlin/pl/jclab/refio/testutil/TestDatabase.kt index 0f86a166..ee0f4b7e 100644 --- a/core/src/test/kotlin/pl/jclab/refio/testutil/TestDatabase.kt +++ b/core/src/test/kotlin/pl/jclab/refio/testutil/TestDatabase.kt @@ -130,8 +130,9 @@ object TestDatabase { private fun createTables() { SchemaUtils.createMissingTablesAndColumns( TasksTable, - SnapshotsTable, + SnapshotGroupsTable, SubtasksTable, + SnapshotsTable, ChatMessagesTable, ApiLogsTable, MCPServersTable, @@ -182,8 +183,9 @@ object TestDatabase { MCPServersTable.deleteAll() ApiLogsTable.deleteAll() ChatMessagesTable.deleteAll() - SubtasksTable.deleteAll() SnapshotsTable.deleteAll() + SubtasksTable.deleteAll() + SnapshotGroupsTable.deleteAll() TasksTable.deleteAll() } } diff --git a/docs/onboarding.md b/docs/onboarding.md index 7ed95d76..b7b19c9e 100644 --- a/docs/onboarding.md +++ b/docs/onboarding.md @@ -1,7 +1,7 @@ # Onboarding: Refio -> **Last Updated:** 2026-04-03 -> **Version:** 0.0.1.7 +> **Last Updated:** 2026-04-24 +> **Version:** 0.0.1.8 > **Status:** Active Development --- diff --git a/docs/overview.md b/docs/overview.md index ea408b6f..bf0a8418 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -1,7 +1,7 @@ # Refio - Technical Architecture Overview -> **Last Updated:** 2026-03-29 -> **Version:** 0.0.1.7 +> **Last Updated:** 2026-04-26 +> **Version:** 0.0.1.8 > **Status:** Active Development This document provides a comprehensive technical overview of Refio - a local-first AI coding assistant for IntelliJ IDEA and the terminal. @@ -757,6 +757,38 @@ suspend fun LLMClient.complete( | Ollama | NDJSON | `done: true` | | Gemini | SSE | Implicit end | +### Native Function Calling + +From 0.0.1.8 Refio supports the providers' structured `tools` API as an alternative to +the JSON-in-text envelope. Controlled by `tools.native_tools` in `config.yaml`: + +| Value | Behaviour | +|-------|-----------| +| `auto` *(default)* | Use native tools when `ModelDefinition.supportsFunctionCalling=true` | +| `always` | Force native tools even for unlisted models | +| `never` | Always fall back to JSON-in-text path | + +**Dispatch path:** + +``` +AgentTurnLoop + └─→ NativeToolsResolver (mode + ModelDefinition + fallback cache) + ├─→ NATIVE path → adapter.chat(native_tools=[...]) + │ → LLMResponse.nativeToolCalls (skips ToolCallParser) + │ + └─→ JSON path → ToolCallParser.extractToolCalls(content) + ← existing behaviour unchanged +``` + +`ToolSchemaSanitizer` normalizes each tool's JSON Schema per provider before the request: +- **OpenAI** — strips `$schema`/`default`; strict-mode compatibility check; Responses API uses flat `name/description/parameters/strict` shape. +- **Anthropic** — strips composition keywords (`oneOf`, `allOf`, `anyOf`). +- **Gemini** — strips `additionalProperties`; converts array `type` values to a single uppercase string; adds `nullable: true` for optional fields. + +`NativeToolsFallbackTracker` is a session-scoped in-memory set. If a provider returns +HTTP 400 "tools not supported", the model is added and future turns silently use the +JSON path without requiring a restart. + ### No-Egress Mode When enabled, blocks all cloud providers: diff --git a/docs/prompts.md b/docs/prompts.md new file mode 100644 index 00000000..eb4a910a --- /dev/null +++ b/docs/prompts.md @@ -0,0 +1,2610 @@ +################################################# +# HTML, CSS, JS +################################################# +--------------------------------- +- JS games +--------------------------------- +Write a snake game in javascript, css, html in one file. Build a classic Snake game on a 30x30 grid. The snake moves continuously in one of four directions and grows when eating food. If it hits a wall or itself, the game ends. Implement keyboard controls for player movement. Add a CPU-controlled snake mode that automatically searches for food. Add the three game modes: human vs human, cpu vs human, cpu vs cpu. Display the score during gameplay. Include a start menu and game over screen. Allow restarting the game with a keypress. Use file name "snake_{{MODEL_ID}}_01.html" + +--- + +Write a snake game in javascript, css, html in one file. Build a classic Snake game on a 30x30 grid. The snake moves continuously in one of four directions and grows when eating food. If it hits a wall or itself, the game ends. Implement keyboard controls for player movement. Add a CPU-controlled snake mode that automatically searches for food. Add the three game modes: human vs human, cpu vs human, cpu vs cpu, cpu vs cpu vs cpu (3 cpu players). Display the score during gameplay. Include a start menu and game over screen. Allow restarting the game with a keypress. Use file name "snake3cpu_{{MODEL_ID}}_01.html" + +--- + + +Create breakout clone using javascript, css, html in one file. Develop a Breakout-style arcade game with a paddle, a bouncing ball, and a wall of colored bricks. The ball reflects off walls and bricks, breaking them on contact. The player controls the paddle with the mouse or keyboard to keep the ball in play. Losing all lives ends the game. Add a score counter based on destroyed bricks. Include basic sound effects for hits. Show a "Game Over" screen with a restart option. Bricks can be arranged in multiple levels or random patterns. All production code in one file file name "breakout_{{MODEL_ID}}_01.html". + + +--- + +Design Pong with AI players using javascript, css, html in one file. Create a two-player Pong game with rectangular paddles and a ball bouncing between them. The left paddle is controlled by the player, the right by a simple AI. The ball bounces off the paddles and top/bottom screen edges. A player scores when the opponent misses the ball. Display each player's score on screen. Add a start screen with a “Player vs AI” and “AI vs AI” mode. Keep gameplay fast and smooth. Include sound effects and restart functionality. All production code in one file file name "pong_{{MODEL_ID}}_01.html". + +--- + +Develop simple Tetris using javascript, css, html in one file. Implement a working Tetris clone with falling tetrominoes that players can rotate and move. When a row is completely filled, it disappears and the player gains points. The falling speed increases as the player progresses. End the game when blocks reach the top of the screen. Use a side panel to show the next piece and current score. Allow pausing and resuming the game. Design with a minimalistic interface and keyboard-only controls. Include a title screen and restart option after game over. All production code in one file name "tretis_{{MODEL_ID}}_01.html", do not generate tests files. + +--- + +Make maze solver using javascript, css, html in one file. Generate a random maze layout using a basic maze generation algorithm. Visualize the maze as a grid with walls and paths. Let the user choose between manual control or automatic solving using an algorithm like BFS or DFS. Animate the solving process step-by-step for clarity. Add start and end points clearly marked on the grid. Show a summary screen with number of steps and time taken. Provide a reset button to generate a new maze. Use simple but readable graphics. All production code in one file name "maze_{{MODEL_ID}}_01.html". + +--- + +Build flappy bird using javascript, css, html in one file. Clone the classic Flappy Bird game with vertically scrolling pipes and simple physics. The player controls a bird that flaps upward when the spacebar is pressed. The bird must navigate between pairs of pipes with a fixed gap. Colliding with a pipe or the ground ends the game. Include a score counter for each pipe passed. Add a splash screen, game over screen, and the ability to restart. Use smooth animations and background movement. Optional: play background music or flapping sound. Add the level selector with game difficulty, default is low bir down speed. All production code in one file name "flappy_{{MODEL_ID}}_01.html". + +--- + +Implement minesweeper using javascript, css, html in one file. Recreate the Minesweeper logic with three grid sizes: small, medium, and large. Place a number of hidden mines randomly across the board. The player left-clicks to uncover a cell and right-clicks to flag it as suspected mine. Revealed cells show a number indicating nearby mines. Hitting a mine ends the game with all mines revealed. Win by flagging all mines without mistakes. Include reset and difficulty selection. Use basic icons or colors to represent numbers and flags. All production code in one file name "minesweeper_{{MODEL_ID}}_01.html". + +--- + +Create basic platformer using javascript, css, html in one file. Build a side-scrolling platformer with character movement, gravity, and jumping mechanics. The level should include solid platforms, empty space, and collectable items like coins. The player moves left/right with arrow keys and jumps with spacebar. Add a simple HUD showing collected items. Introduce at least one enemy that moves back and forth. Touching the enemy causes a reset or health reduction. Design a win condition (e.g., collect all coins or reach the goal). Include background music and tile-based level graphics. Write a 5 levels and switch between level. All production code in one file name "basic_platformer_{{MODEL_ID}}_01.html" + +--- + +Make simple tower defense using javascript, css, html in one file. Design a tower defense game with a fixed path where waves of enemies follow a route. The player can place towers along the path edges to attack passing enemies. Each tower has a range, fire rate, and damage. Enemies reduce the player’s health when they reach the end. Display remaining health, wave number, and currency for building. Include basic upgrades like faster fire rate or increased damage. Implement a "start next wave" button. Use a grid or tile system for positioning towers. All production code in one file name "tower_defenses_{{MODEL_ID}}_01.html". + +--- + +Write memory match game using javascript, css, html in one file. Create a simple memory game with a grid of face-down cards. Clicking reveals a card; selecting two matching cards removes them from play. Non-matching pairs are flipped back after a short delay. The game ends when all pairs are matched. Count and display the number of attempts. Include a start and win screen. Use colored shapes or icons for card faces. Add a “reset” button to shuffle and replay. All production code in one file name "memory_match_{{MODEL_ID}}_01.html". + +--------------------------------- +- Demo effect in javascript +--------------------------------- + +Raster Bars Effect using javascript, css, html in one file. Create colorful horizontal bars that scroll vertically or stay fixed at different screen heights. Each bar should be a solid horizontal line of color, changing hues rapidly or smoothly. The effect should simulate the look of hardware raster timing, where the color register is changed during screen rendering. Bars may wave, bounce, or remain static while cycling through colors. Include multiple bar layers with different speeds or amplitudes. Try to emulate phosphorescent color blending and soft CRT-style transitions. The background should stay black or dark to enhance contrast. Add sync with music tempo if applicable. Vertical resolution and color fidelity should resemble a C64/Atari output. Add the GUI to control all parameters, use a "dat.gui" library. All production code in one file name "demo_raster_bars_{{MODEL_ID}}_01.html". + +--- + +Starfield Simulation (Parallax Stars) using javascript, css, html in one file. Simulate a field of stars moving towards the viewer, giving the illusion of depth and 3D space. Place a set of white or slightly colored dots randomly on the screen. Each star should move outward from the center or along a vector, growing brighter and larger as it approaches. Use different layers of stars moving at different speeds to create a parallax effect. Stars that leave the screen should be reset to the background at random positions. Speed and density can be adjusted dynamically. Optionally, respond to user input or simulate camera rotation. Maintain a smooth, fluid framerate and fade effects if desired. Add the GUI to control all parameters, use a "dat.gui" library. All production code in one file file. All production code in one file name "demo_parallax_stars_{{MODEL_ID}}_01.html". + + +--- + +Plasma Effect (Color Cycling Grid) using javascript, css, html in one file. Create a wavy, psychedelic background using sine wave blending and color cycling. The screen should display a constantly moving, organic color pattern with flowing gradients. Use a grid where each cell's color is determined by combining multiple sine functions of time and position (e.g., sin(x), sin(y), sin(x+y), etc.). Colors should transition smoothly in a repeating palette, ideally 256-color lookup table. The motion should resemble liquid or fire-like movement, without sharp edges. Add a subtle vertical or horizontal drift for realism. Include an option to modify amplitude or frequency over time. No fixed shapes or symmetry — the effect should be continuous and abstract. Add the GUI to control all parameters, use a "dat.gui" library. All production code in one file name "demo_plasma_simple_{{MODEL_ID}}_01.html". + +--- + +Scrolling Text with Sinus Curve using javascript, css, html in one file with parameters controls gui. Render a line of text that scrolls horizontally across the screen, each character following a sine wave in the vertical direction. The characters move from right to left in a smooth, continuous motion. Use a monospaced retro pixel font. The vertical sine movement should cause each letter to appear to wave up and down independently, like riding a sine curve. Loop the message infinitely and allow dynamic text updates. Background can be black or include raster bars. Ensure consistent speed and timing. Optionally, include per-letter color cycling to match C64-style effects. Add the GUI to control all parameters, use a "dat.gui" library. All production code in one file name "demo_scroll_text_{{MODEL_ID}}_01.html". + +--- + +Checkerboard Zoomer (Zooming Grid). Create a full-screen checkerboard pattern that appears to zoom in or out. The checkerboard should consist of alternating black and white squares (or colored tiles). The illusion of zoom is created by scaling the pattern — either with matrix transformation or by redrawing the grid with changing size and spacing. Optionally add rotation as the zoom occurs, either clockwise or counter-clockwise. The effect should loop smoothly. Use interpolation or pixel-snapping to keep tiles crisp. You can simulate a vanishing point by keeping the center stable while zooming the outer tiles. Include timing control for zoom speed. Add the GUI to control all parameters, use a "dat.gui" library. All production code in one file name "demo_checkerboard_zoomer_{{MODEL_ID}}_01.html". + +--- + +Wobbly Logo Distortion. Display a bitmap logo or text that distorts in a sine or cosine wave pattern across the horizontal axis. The entire image should appear to "wobble" smoothly as if it were underwater. The wave can be horizontal only or applied in both axes for extra distortion. The image should stay recognizable despite the warping. Use alpha or background transparency for a clean visual. Allow parameters for wave frequency, amplitude, and speed. This can be implemented by remapping each scanline's horizontal offset. Loop the effect infinitely and keep performance smooth. Add the GUI to control all parameters, use a "dat.gui" library. All production code in one file name "demo_wobble_{{MODEL_ID}}_01.html". + +--- + +Copper Bars with Dynamic Palette. Simulate "copper bars" by drawing multiple horizontal bars whose colors change according to a predefined palette gradient. The bars should move smoothly up and down (or stay static) while their color segments shift in brightness and hue. Use multiple overlapping bars with semi-transparent blending or hard edges. Animate palette transitions — e.g., red to orange to yellow. Create a glowing look with soft transitions between color stops. Sync movement to a loop or soundtrack. Keep bar edges sharp but flicker-free. This effect is reminiscent of Amiga copper lists but achievable via CPU or GPU. Add the GUI to control all parameters, use a "dat.gui" library. All production code in one file name "demo_copper_bars_{{MODEL_ID}}_01.html". + +--- + +Pixel Fire (Procedural Flame). Generate a vertically rising fire effect with flickering colors from red to yellow to white in TypeScript. Use a grid of pixels that simulate fire motion using cellular automata or noise functions. The base of the fire emits high intensity and the pixels gradually fade and scatter upward. Add random horizontal shifts and decay to create a realistic flame effect. Update the fire field every frame for continuous motion. Use a fixed color palette and fade toward transparency at the top. You can optionally burn text or shapes by increasing intensity in those areas. Add the GUI to control all parameters, use a "dat.gui" library. All production code in one file name "demo_pixel_fire_{{MODEL_ID}}_01.html". + +--- + + +Tunnel Effect. The tunnel effect is a classic visual from the 90s demoscene, simulating an endless 3D tunnel that appears to zoom toward or away from the viewer. The illusion of depth is achieved by mapping a repeating texture or pattern onto a virtual cylinder or vortex, and then projecting it onto the 2D screen using mathematical transformations. The tunnel's walls seem to flow past the viewer, often twisting or spiraling to enhance the dynamic, immersive sensation of movement. The center of the tunnel acts as a vanishing point, and the texture can animate in response to speed, rotation, or user input, creating the impression of flying through space. Add the GUI to control all parameters, use a "dat.gui" library. All production code in one file name "demo_tunnel_{{MODEL_ID}}_01.html". + +--- + +Sine wave color. Render an animated plasma tunnel where the colors transition smoothly through sinusoidal gradients, creating a swirling, pulsating 3D-like surface. The effect is generated by combining several sine and cosine functions per pixel and animating their parameters over time. Add motion along the Z-axis (depth) and allow rotation on X/Y axes for extra dynamism. Add the GUI to control all parameters, use a "dat.gui" library. All production code in one file name "demo_sine_wave_{{MODEL_ID}}_01.html". + +--- + +Starfield Parallax. Create a classic starfield effect—hundreds of white dots ("stars") move towards the viewer at varying speeds, simulating depth via parallax. Use multiple star layers for depth illusion. As stars approach the center, increase their size and brightness, and reset their position when they move off-screen. Allow control over star count, speed, and color. Add the GUI to control all parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html file name "demo_starfield_parallax_{{MODEL_ID}}_01.html" + +--- + +Bump Mapping (Pseudo 3D Light). Display a static or slowly moving "bumpy" pattern (e.g., text, logo, or noise) with a simulated animated light source. Each pixel calculates its brightness based on the height gradient of its neighbors, faking 3D lighting. Animate the light direction for a dynamic effect. GUI should allow changes to light direction, contrast, and background color. Add the GUI to control all parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html file name "demo_bump_mapping_{{MODEL_ID}}_01.html" + +--- + +Vector Balls (Dot Tunnel). Render multiple colored circles ("balls") arranged in a spiraling or helical tunnel formation, animating smoothly to create a sense of 3D movement. The tunnel shape can dynamically morph (wave, twist), and colors shift along a gradient. Add the GUI to control all parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html file name "demo_vector_balls_{{MODEL_ID}}_01.html" + +--- + +Scrolling Checkerboard. Display a classic checkerboard pattern that scrolls smoothly in any direction. Add a perspective transformation (tilted "floor") to simulate a 3D scene with vanishing points. The animation tests tiling, perspective projection, and smooth animation. Add the GUI to control all parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html file name "demo_scroll_checkerboard_{{MODEL_ID}}_01.html" + +--- + +Twister Bar. Animate a vertical "twister"—a bar made from thin color segments that bends side to side in a sine wave. The whole bar rotates around its vertical axis, with segment colors smoothly shifting over time. Add the GUI to control all parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html. File name "demo_twister_bar_{{MODEL_ID}}_01.html". + +--- + +Water Ripple Distortion. Apply a dynamic "water ripple" distortion to a background (solid color, gradient, or image). Offset pixel positions using two perpendicular sine waves, animated over time, to create flowing, intersecting ripples. Controls for depth, frequency, and water color via GUI. Add the GUI to control all parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html. File name "demo_water_ripple_{{MODEL_ID}}_01.html". + +--- + +Dot Matrix (Digi Tunnel). Create a digital tunnel made of small circles (dot matrix) forming a 3D "flight through tunnel" effect. Dots change size, brightness, and color as they move toward or away from the viewer. Tunnel shape and color gradients are configurable. GUI for dot count, speed, and color mode. Add the GUI to control all parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html. File name "demo_dot_matrix_{{MODEL_ID}}_01.html". + +--- + +Vector Grid Warp. Draw a grid of points connected by thin lines on an HTML ``. The entire mesh is dynamically warped using a mathematical function, such as a sinusoidal wave or attraction/repulsion source. Lines undulate smoothly to simulate real-time space deformation. Provide a GUI to change grid resolution, deformation type, and animation speed. Add the GUI to control all parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html. File name "demo_vector_grid_{{MODEL_ID}}_01.html". + +--- + +Rotating Wireframe Cube. Render a classic 3D wireframe cube using only lines. The cube rotates on one or more axes, with all 3D-to-2D projection handled in JavaScript. GUI controls allow changing rotation speed, cube color, and axis of rotation. Use HTML `` for drawing, CSS for positioning, and JavaScript for matrix math, 3D transformation, and animation. Add the GUI to control all parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html. File name "demo_rotating_wireframe_{{MODEL_ID}}_01.html". + +--- + +Polygon Morphing. Draw a polygon (e.g., pentagon, hexagon) on `` and animate its vertices so the shape morphs smoothly into another polygon. Each vertex interpolates between its starting and target positions, with morphing looping or triggered via GUI. GUI also lets you select polygon types, morph speed, and number of vertices. JavaScript manages interpolation, CSS handles controls and layout. Add the GUI to control all effect parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html. File name "demo_polygon_morphing_{{MODEL_ID}}_01.html". + + +--- + +Vector Logo (Bezier Curve Animation). Render a logo or text made from Bézier curves or vector lines. Animate the logo by "drawing" each segment in sequence or by dynamically distorting segments (wave, stretch, bounce). Use either `` or inline `` for vector paths, CSS for GUI and effects, and JavaScript to control drawing order, animation speed, and path morphing. Add the GUI to control all effect parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html. File name "demo_vector_logo_{{MODEL_ID}}_01.html". + +--- + +Rotating Vector Starfield. Draw many thin lines radiating from a single point, forming a "vector star." Animate the entire star rotating around its center; ray lengths and counts may also oscillate (e.g., via sine wave). Color each ray with a gradient that shifts over time. GUI for number of rays, rotation speed, and blur. Add the GUI to control all effect parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html. File name "demo_rotating_vector_starfield_{{MODEL_ID}}_01.html". + +--- + + +Classic Shadebobs (Additive Circles).Render multiple soft, semi-transparent circles (“bobs”) moving smoothly over a black background using HTML ``. Each bob is drawn with a radial gradient from a bright center fading to full transparency, and their colors are added (additive blending) where they overlap, creating glowing color mixes. Bobs follow sinusoidal or circular paths with independently adjustable speed, size, and color. The GUI lets you set the number of bobs, size, speed, and color palette. Add the GUI to control all effect parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html. Use filename "demo_shadebobs_classic_{{MODEL_ID}}_01" + +--- + +Shadebobs Trails (Motion Blur). Create moving shadebobs as in the classic effect, but add motion blur trails by blending each frame with a slight transparency over the previous one. This results in smooth glowing streaks following the bobs’ movement. The GUI allows control over trail length (fade amount), number of bobs, and bob speed. Add the GUI to control all effect param +eters, use a "dat.gui" library. All producion code in one file using javascript, css, html. Use filename "demo_shadebobs_blur_{{MODEL_ID}}_01". + +--- + +Monochrome Shadebobs (Single Hue). Implement classic shadebobs but restricted to shades of a single color (e.g., blue or green), blending by intensity rather than hue. Each bob’s brightness is added where they overlap. GUI lets you pick the base hue and intensity, as well as bob count and size. Add the GUI to control all effect parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html. Use filename "demo_single_hue_boobs_{{MODEL_ID}}_01.html" + +--- + +Multi-Layered Shadebobs (Depth Layers). Draw two or more layers of shadebobs, each with different sizes, speeds, and blending. Bobs move on the screen. The background layer moves slower and is larger and dimmer; the foreground is faster and more intense. Layers interact visually but not physically. The GUI allows adjustment of number of layers, bob count per layer, and individual color/size/speed per layer. Use HTML, CSS, JS in one file. Add the GUI to control all effect parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html. Use filename "demo_shadebobs_depth_layers_{{MODEL_ID}}_01.html" + +--- + +Shadebobs Masked by Text (Text Mask). Animate shadebobs, but their visible area is masked to only show inside a text shape (“HELLO”, “DEMO”, etc.). Bobs move normally but are only visible where the text is drawn. GUI lets you change the mask text and font, as well as bob parameters. Use `` 2D API with `globalCompositeOperation` to implement masking. Add the GUI to control all effect parameters, use a "dat.gui" library. All producion code in one file using javascript, css, html. Use filename "demo_shadebobs_text_mask_{{MODEL_ID}}_01.html" + +--------------------------------- +- 3D demo effect in javascript +--------------------------------- + +Gouraud Shading Demo. Write a Gouraud shading demo in javascript, css, html in one file. Render a rotating 3D cube using the HTML5 canvas. Implement vertex-based lighting so that each vertex of the cube has a calculated color, and colors are smoothly interpolated across each face (Gouraud shading). Allow the user to change the rotation speed and the light direction with keyboard controls. Display the cube’s current rotation angles and the light vector on the screen. Add the GUI to control all effect parameters, camera position, light and object position, etc, use a "dat.gui" library. All producion code in one file using javascript, css, html. Use file name "3d-gouraud_{{MODEL_ID}}_01.html". + +--- + +Phong Shading Demo. Write a Phong shading demo in javascript, css, html in one file. Render a 3D sphere that rotates continuously on the HTML5 canvas. Implement per-pixel lighting (Phong shading), so specular highlights move smoothly as the object rotates. Let the user change material shininess and light direction with UI sliders. Display the light position and current shininess value. Add the GUI to control effect, select object, lights, etc use a "dat.gui" library. All producion code in one file using javascript, css, html. Include start/stop rotation controls. Use file name "3d-phong_{{MODEL_ID}}_01.html". + +--- + +Fractal Box (Mandelbox) 3D Renderer. Write a Mandelbox fractal 3D renderer in javascript, css, html in one file. Use raymarching to display a 3D Mandelbox fractal on the canvas. Allow the user to rotate the fractal in 3D using mouse drag, and zoom in/out with the scroll wheel. Provide UI sliders for fractal parameters such as scale and iterations. Display current camera coordinates and Mandelbox settings on the screen. Add GUI to setup all efect parameters use a "dat.gui" library. All producion code in one file using javascript, css, html. Use file name "3d-fractal_box_{{MODEL_ID}}_01.html". + +--- + +Raymarching Distance Field Demo. Write a distance field raymarching demo in javascript, css, html in one file. Render a scene consisting of a sphere and a torus using signed distance functions and raymarching in a canvas. Allow the user to move the camera around the scene with WASD keys and mouse drag. Include basic lighting and soft shadows. Display the current camera position and frame render time. Add GUI to setup all efect parameters and select diffirent raymarching object, diffrent textures, object and camera position, object size and types use a "dat.gui" library. Use file name "3d-raymarch_{{MODEL_ID}}_01.html". + +--- + +Voxel Cube Engine. Write a simple voxel cube engine in javascript, css, html in one file. Render a 3D grid of colored cubes (“voxels”) using isometric projection. Allow the user to add or remove voxels by clicking on the grid. Let the user rotate the view with keyboard arrows. Show the number of voxels present in the scene. Add GUI to setup all efect parameters and use a "dat.gui" library . Use file name "3d-voxels_{{MODEL_ID}}_01.html". + +--- + +3D Rotating Object (Wireframe/Flat/Gouraud Modes). Write a 3D object viewer in javascript, css, html in one file. Render a wireframe 3D model (e.g., teapot or icosahedron) on canvas. Allow the user to switch between wireframe, flat shaded, and Gouraud shaded modes with a button. Enable object rotation with mouse drag and reset with double-click. Display the current shading mode. Add GUI to setup all efect parameters. Use file name "3d-viewer_{{MODEL_ID}}_01.html". + +--- + +3D Mirror Reflection Demo. Write a 3D mirror reflection demo in javascript, css, html in one file. Render a simple 3D scene with a plane acting as a mirror and a few colored spheres. The mirror should reflect the spheres and the environment realistically. Allow the user to move the camera with mouse drag. Show the camera’s position and reflection calculations on the screen. Add GUI to setup all efect parameters and use a "dat.gui" library. Use file name "3d-mirror_{{MODEL_ID}}_01.html". + +--- + +Lightmapping & Shadows. Write a dynamic lightmapping and shadow demo in javascript, css, html in one file. Render a 3D scene with several boxes and one moving light source. Calculate and display dynamic shadows on the ground in real time as the light moves. Allow the user to control light position with keyboard keys. Show the current light position and shadow rendering time. Add GUI to setup all efect parameters and use a "dat.gui" library. Use file name "3d-lightmap_{{MODEL_ID}}_01.html". + +--- + +Procedural 3D Shape Morphing. Write a procedural 3D shape morphing demo in javascript, css, html in one file. Render a 3D object that smoothly morphs between a cube, a sphere, and a torus using interpolation of vertex positions. Animate the morphing loop automatically, but allow the user to pause/resume and choose morph targets from a menu. Display the current morph state and time. Add GUI to setup all efect parameters and use a "dat.gui" library. Use file name "3d-morph_{{MODEL_ID}}_01.html". + +--- + +The application enables interactive creation and editing of simple 3D scenes directly in the web browser. The user can add various types of 3D objects, such as cubes, spheres, and tori, which appear in the scene and can be individually configured. Each object can be translated, rotated, and scaled, and assigned a material with a selected texture or color from predefined options. +The application also allows dynamic addition of different types of lights—such as point and directional lights—with adjustable parameters including position, intensity, and color. There is an option to enable or disable shadows cast by objects onto a simple ground plane, as well as to adjust shadow quality. +A control panel (GUI) lets the user manage the list of objects and lights, select items for editing, and modify their properties in real time. All changes are immediately reflected in the 3D view. The application supports scene reset and displays basic statistics such as the number of objects and lights. Everything runs within a single web page, requires no installation or backend, and is operated entirely from the browser. Add GUI to setup all efect parameters and. All producion code in one file using javascript, css, html. Use file name "3d-editor_{{MODEL_ID}}_01.html". + + +############################################ +# +############################################ + +## 1) Copper bitmap zoomer +Create a realtime demoscene-style “copper bitmap zoomer” in one HTML file (HTML/CSS/JS). Render a pixel-art bitmap (procedurally generated or embedded as a small palette image) and continuously zoom it in/out with smooth subpixel motion. Simulate Amiga “copper” by applying per-scanline horizontal offsets and color/palette tweaks (scanline table updated each frame). Add optional rotation wobble, easing, and a HUD showing FPS and current zoom. Use Canvas 2D, no external assets. Include controls: Space toggles scanline warp, ArrowUp/ArrowDown changes zoom speed. Use file name "effect_copper_bitmap_zoomer_{{MODEL_ID}}_01.html". + +--- + +## 2) Gouraud shaded cube + +Build a realtime Gouraud-shaded rotating cube in a single HTML file using Canvas 2D (no WebGL). Implement a minimal 3D pipeline: vertices, rotation matrices, perspective projection, backface culling. Split faces into triangles and do per-vertex lighting (Lambert) then rasterize triangles with color interpolation (Gouraud shading). Add a moving light source, wireframe toggle (W), and pause (P). Display FPS. Use file name "effect_gouraud_shaded_cube_{{MODEL_ID}}_01.html". + +--- + +## 3) Text zoomer + +**File:** `effect_text_zoomer_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a classic demoscene “text zoomer” in one HTML file. Use a bitmap-font (procedurally generated pixel font) and animate lines of text that zoom toward the camera, overshoot, and ease out, with optional rotation and sine-wave wobble. Add scanlines, slight chromatic offset, and a palette-based look. Include a small text editor panel (toggle with E) to change the message live. Use file name "effect_text_zoomer_{{MODEL_ID}}_01.html". + +--- + +## 4) Vector dancers (2D/3D stick-figure vectors) + +**File:** `effect_vector_dancers_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a “vector dancers” demoscene effect in one HTML file. Represent dancers as animated line/segment rigs (bones + joints) in 3D, project to 2D, and draw crisp vector lines with glow. Sync simple choreography to a built-in time-beat function (no audio required): stepping, arm waves, spins. Add multiple dancers with different phase offsets and camera sweeps. Controls: 1/2 changes dancer count, C toggles camera auto-rotate. Use file name "effect_vector_dancers_{{MODEL_ID}}_01.html". + +--- + +## 5) Texture-mapped x-axis rotated cube (Copper-style slice mapping) + +**File:** `effect_texture_mapped_xaxis_cube_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Implement a texture-mapped cube that rotates mainly around the X axis, using a “slice / span” style mapper reminiscent of Amiga copper tricks. Use Canvas 2D and render the cube by drawing many thin horizontal strips (scanline slices) sampling from a procedural texture. Add shading, perspective, and an optional copper-like per-scanline color shift. Controls: S toggles slice thickness, L toggles lighting. Use file name "effect_texture_mapped_xaxis_cube_{{MODEL_ID}}_01.html". + +--- + +## 6) Copper twister thing + +**File:** `effect_copper_twister_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a classic “twister” effect in one HTML file: a vertical ribbon/tube that twists over time. Render it as per-scanline horizontal spans (like copper bars), where each scanline computes an X offset and width based on sine waves, plus a color gradient across the ribbon. Add specular highlight and depth cue (darker when “behind”). Controls: T toggles tube vs ribbon mode, G toggles gradient styles. Use file name "effect_copper_twister_{{MODEL_ID}}_01.html". + +--- + +## 7) 3D vectors (generic 3D wire objects) + +**File:** `effect_3d_vectors_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a realtime 3D vector-object show in one HTML file. Implement a minimal 3D engine (matrices, projection, z-sort for line segments) and cycle through multiple wire models (cube, torus knot approximation, logo-like polyline, sphere lat/long). Add camera fly-through, line glow, and a starfield background. Controls: N switches model, B toggles starfield. Use file name "effect_3d_vectors_{{MODEL_ID}}_01.html". + +--- + +## 8) Shaded bouncing sphere object + +**File:** `effect_shaded_bouncing_sphere_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Build a shaded bouncing sphere in one HTML file using Canvas 2D. Render the sphere with per-pixel shading (fake Phong/Lambert) and a moving highlight. Animate it bouncing with squash/stretch and a shadow blob on the floor plane. Add a simple environment reflection band and subtle dithering for a retro look. Controls: H toggles highlight intensity, D toggles dithering. Use file name "effect_shaded_bouncing_sphere_{{MODEL_ID}}_01.html". + +--- + +## 9) Blitter feedback (feedback trail / framebuffer echo) + +**File:** `effect_blitter_feedback_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a “blitter feedback” effect in one HTML file. Use an offscreen buffer: each frame, copy the previous frame, apply a slight scale/rotate/translate, fade it, then draw new primitives on top (sprites/lines). Add a couple of feedback modes: zoom-in tunnel, swirl, and elastic wobble. Controls: M cycles modes, F changes feedback fade. Use file name "effect_blitter_feedback_{{MODEL_ID}}_01.html". + +--- + +## 10) Copper RGB plasma + +**File:** `effect_copper_rgb_plasma_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create an RGB plasma in one HTML file. Compute plasma values from multiple sine fields (x/y/time) and map to RGB with smooth palette cycling. Add a “copper” flavor by optionally rendering per-scanline palettes (slightly different RGB mapping per y). Include 2–3 palette presets and a smooth crossfade between them. Controls: P cycles palettes, C toggles per-scanline palette mode. Use file name "effect_copper_rgb_plasma_{{MODEL_ID}}_01.html". + +--- + +## 11) Vector tunnel + +**File:** `effect_vector_tunnel_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Build a “vector tunnel” in one HTML file. Render concentric rings or squares in 3D space along Z, project to 2D, and animate forward motion to simulate flying through a tunnel. Add camera roll and pulsing line thickness. Optional: overlay a scrolling message. Controls: R toggles rings vs squares, O toggles roll. Use file name "effect_vector_tunnel_{{MODEL_ID}}_01.html". + +--- + +## 12) Texture mapped logo + +**File:** `effect_texture_mapped_logo_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a texture-mapped logo effect in one HTML file. Generate a procedural “logo” mask (vector path or pixel mask), then map a moving texture through it (plasma/noise/stripes) with perspective skew and wobble. Add shine sweep and subtle shadow to give depth. Controls: X toggles texture type, S toggles shine. Use file name "effect_texture_mapped_logo_{{MODEL_ID}}_01.html". + +--- + +## 13) Zooming + rotating picture with shear (Blitter + Copper vibe) + +**File:** `effect_zoom_rotate_shear_picture_{{MODEL_ID}}_01.html` +**Prompt (EN):** +In one HTML file, create a zooming and rotating “picture” effect with shear. Use a procedural image (e.g., gradient + noise + shapes) as a source bitmap. Each frame: apply rotation+scale and then a per-scanline shear (x offset based on y and time). Add motion blur via feedback and a retro scanline overlay. Controls: K toggles shear strength, V toggles feedback blur. Use file name "effect_zoom_rotate_shear_picture_{{MODEL_ID}}_01.html". + +--- + +## 14) 1-bitplane c2p zoom rotator + +**File:** `effect_1bitplane_c2p_zoom_rotator_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a 1-bitplane “c2p zoom rotator” in one HTML file. Simulate chunky-to-planar by rendering to a 1-bit (black/white) buffer with dithering, then upscale it with rotation+zoom. Add classic high-contrast patterns, and a mode that shows the “planar” buffer as separate bitplanes for debugging (even if simulated). Controls: D toggles dithering pattern, B toggles bitplane debug view. Use file name "effect_1bitplane_c2p_zoom_rotator_{{MODEL_ID}}_01.html". + +--- + +## 15) Zooming 3D square field (nice Copper-like scanlines) + +**File:** `effect_zooming_3d_square_field_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a zooming 3D square field in one HTML file: a grid of squares in 3D space moving toward the camera with perspective, with a copper-like per-scanline gradient that shifts over time. Add camera bob, depth fog, and occasional “wave” deformations across the grid. Controls: W toggles wave, G toggles gradient styles. Use file name "effect_zooming_3d_square_field_{{MODEL_ID}}_01.html". + +--- + +## 16) Bitmap 3D zooming cinema scene (pseudo-3D) + +**File:** `effect_bitmap_3d_cinema_zoom_{{MODEL_ID}}_01.html` +**Prompt (EN):** +In one HTML file, create a “bitmap 3D zooming cinema scene” illusion: a corridor/cinema-like environment made from textured quads (floor, ceiling, back wall) using a software texture mapper. Use procedural textures and animate a smooth zoom/dolly forward with slight camera shake. Add vignette and film-grain. Controls: 1/2 changes speed, V toggles vignette/grain. Use file name "effect_bitmap_3d_cinema_zoom_{{MODEL_ID}}_01.html". + +--- + +## 17) 102 x-axis texture mapped cubes + +**File:** `effect_102_xaxis_texture_cubes_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create 102 small texture-mapped cubes rotating mainly around the X axis in one HTML file. Use a lightweight software renderer: project vertices, draw cubes as textured quads via scanline strips or affine mapping. Optimize enough to keep stable FPS (batching, precomputed sin/cos tables). Add camera fly-through and a cube count slider (up to 200). Controls: +/- changes count, O toggles optimization debug overlay. Use file name "effect_102_xaxis_texture_cubes_{{MODEL_ID}}_01.html". + +--- + +## 18) c2p texture mapped sphere + +**File:** `effect_c2p_texture_mapped_sphere_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Build a texture-mapped rotating sphere in one HTML file. Use sphere UV mapping and render with scanline spans, sampling a procedural texture. Simulate “c2p” by rendering to a chunky buffer then converting to a palettized look (limited colors + dithering). Add shading based on a light vector. Controls: U toggles texture type, P toggles palette size (8/16/32). Use file name "effect_c2p_texture_mapped_sphere_{{MODEL_ID}}_01.html". + +--- + +## 19) Rotating dot landscape + +**File:** `effect_rotating_dot_landscape_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a rotating dot landscape in one HTML file. Generate a heightmap (procedural noise) and render it as a field of dots/particles in 3D with perspective. Animate camera orbit and gentle waves in the heightmap. Add distance-based dot size and fog. Controls: D toggles dots vs small lines, H toggles height exaggeration. Use file name "effect_rotating_dot_landscape_{{MODEL_ID}}_01.html". + +--- + +## 20) Colour cycling plasma + +**File:** `effect_colour_cycling_plasma_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a classic palette-cycling plasma in one HTML file. Compute a plasma scalar field, map it through a 256-color palette, and animate by cycling the palette indices (not recomputing colors each time) to mimic oldschool color cycling. Add palette editor controls and a “rainbow” + “fire” palette preset. Controls: P cycles presets, Left/Right adjusts cycle speed. Use file name "effect_colour_cycling_plasma_{{MODEL_ID}}_01.html". + +--- + +## 21) 102 x-axis texture mapped credits + +**File:** `effect_102_xaxis_texture_credits_{{MODEL_ID}}_01.html` +**Prompt (EN):** +In one HTML file, create a “102 x-axis texture mapped credits” effect: multiple textured panels/cards (up to 102) rotating around X while scrolling credits text appears mapped onto them. Use procedural textures and render panels via scanline slices. Add perspective and depth sorting. Controls: +/- changes panel count, T toggles text style (bitmap vs vector). Use file name "effect_102_xaxis_texture_credits_{{MODEL_ID}}_01.html". + +--- + +## 22) RGB plasma (Angels / Copper Master style) + +**File:** `effect_rgb_plasma_master_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a high-quality RGB plasma in one HTML file with multiple layered sine fields, smooth gradients, and optional metaball-like warping. Add a “copper master” mode: per-scanline color modulation and subtle banding like hardware color registers. Include FPS and a quality slider (pixel size 1–4). Controls: Q cycles quality, C toggles copper master mode. Use file name "effect_rgb_plasma_master_{{MODEL_ID}}_01.html". + +--- + +## 23) Texture mapped floor/ceiling + +**File:** `effect_texture_mapped_floor_ceiling_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Build a floor/ceiling texture-mapping effect in one HTML file (like a simple raycaster plane mapper). Render a horizon, then map a repeating texture onto floor and ceiling using perspective-correct stepping (or a fast approximation). Add camera turns, bobbing, and a logo sprite in the center. Controls: A/D turn, W/S move, M toggles mip/blur approximation. Use file name "effect_texture_mapped_floor_ceiling_{{MODEL_ID}}_01.html". + +--- + +## 24) 102 Copper x-axis bitmap logo bounce/stretch + +**File:** `effect_102_copper_logo_bounce_stretch_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a bitmap logo bounce/stretch effect in one HTML file. Draw a procedural “logo” bitmap and animate it with squash/stretch, bounce easing, and per-scanline horizontal stretching (copper-bar style). Add count mode up to 102 logos tiled across the screen with phase offsets. Controls: +/- changes logo count, B toggles bounce intensity. Use file name "effect_102_copper_logo_bounce_stretch_{{MODEL_ID}}_01.html". + +--- + +## 25) Copper zooming tunnel + +**File:** `effect_copper_zooming_tunnel_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a copper-style zooming tunnel in one HTML file. Render a tunnel by drawing many rings/rectangles that scale toward the viewer, but color and horizontal offsets are controlled per scanline from sine tables. Add a palette gradient and optional texture bands. Controls: Z toggles ring shapes, S toggles scanline offset strength. Use file name "effect_copper_zooming_tunnel_{{MODEL_ID}}_01.html". + +--- + +## 26) Transparent 3D vector + +**File:** `effect_transparent_3d_vector_{{MODEL_ID}}_01.html` +**Prompt (EN):** +In one HTML file, create “transparent” 3D vector objects: draw multiple layers of wireframes with alpha blending and depth-based fading to simulate translucency. Use additive glow lines and rotating camera. Include at least 3 models and crossfade between them. Controls: N switches model, A toggles additive vs alpha blend. Use file name "effect_transparent_3d_vector_{{MODEL_ID}}_01.html". + +--- + +## 27) Filled 3D city + +**File:** `effect_filled_3d_city_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a filled 3D cityscape in one HTML file using a simple software rasterizer. Procedurally generate buildings on a grid with varying heights and colors; render as solid faces (triangles/quads) with flat shading and a moving sun direction. Add camera flyover and fog. Controls: R regenerates city, F toggles fog. Use file name "effect_filled_3d_city_{{MODEL_ID}}_01.html". + +--- + +## 28) Zooming plasma + +**File:** `effect_zooming_plasma_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a zooming plasma in one HTML file. Compute a plasma field, then apply a zoom/rotate transform to the sampling coordinates each frame to create a “plasma zoomer” feel. Add palette cycling and a feedback blur mode. Controls: V toggles feedback, P cycles palettes, +/- adjusts zoom speed. Use file name "effect_zooming_plasma_{{MODEL_ID}}_01.html". + +--- + +## 29) Line tunnel + +**File:** `effect_line_tunnel_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Build a “line tunnel” in one HTML file: a tunnel made from many line segments forming rotating frames along Z. Animate forward motion, camera roll, and pulsating radius. Add a starfield and occasional lightning flashes. Controls: L toggles lightning, O toggles roll. Use file name "effect_line_tunnel_{{MODEL_ID}}_01.html". + +--- + +## 30) Dot tunnel + +**File:** `effect_dot_tunnel_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create a dot tunnel effect in one HTML file. Place dots on circular rings along Z, project to 2D, animate traveling forward. Add depth-based dot size and brightness, plus a color gradient that cycles. Controls: C toggles color mode, D toggles dot size scaling. Use file name "effect_dot_tunnel_{{MODEL_ID}}_01.html". + +--- + +## 31) Dot landscape + +**File:** `effect_dot_landscape_{{MODEL_ID}}_01.html` +**Prompt (EN):** +In one HTML file, create a dot landscape similar to a 3D heightfield made of particles. Use procedural noise heightmap, render dots in perspective, animate camera pan and wave motion. Add a horizon gradient and optional grid lines. Controls: G toggles grid overlay, N regenerates noise seed. Use file name "effect_dot_landscape_{{MODEL_ID}}_01.html". + +--- + +## 32) Copper zooming tiles + +**File:** `effect_copper_zooming_tiles_{{MODEL_ID}}_01.html` +**Prompt (EN):** +Create copper-style zooming tiles in one HTML file. Render a tiled pattern (procedural checker/ornament) that zooms and rotates while each scanline can shift the sampling X coordinate (shear) and adjust palette bands, simulating copper registers. Add a mode with multiple tile layers at different speeds (parallax). Controls: P toggles parallax, S toggles scanline shear. Use file name "effect_copper_zooming_tiles_{{MODEL_ID}}_01.html". + +--- + +Jeśli chcesz, mogę też dopisać do każdego promptu **wspólny „kontrakt testowy”** pod Twoje agenty (np. wymagane FPS, struktura kodu, sekcja “Performance Notes”, klawisze diagnostyczne, deterministyczny seed) — tak, żeby wyniki były porównywalne między modelami. + +[1]: https://www.youtube.com/watch?v=G3HUp7LH5ig&utm_source=chatgpt.com "Commodore Amiga 500 Best Demo Effects" + +OK — poniżej masz **kolejną paczkę efektów demoscenowych**, ale tym razem nie z konkretnego filmu tylko z klasycznego kanonu demosceny (Amiga / PC / oldschool + modern shader demo). + +Zachowałem: + +* jeden plik HTML (HTML + CSS + JS) +* styl promptu zgodny z Twoim wzorcem (agent-friendly) +* nazewnictwo plików: `effect_nazwa_{{MODEL_ID}}_01.html` +* prompty po angielsku + +Wybrałem efekty, które dobrze testują agentów kodujących (matematyka, render pipeline, feedback loops, procedural generation, timing). + +--- + +# 🔥 Prompty – dodatkowe efekty demoscenowe + +--- + +## 1) Kefrens bars + +File: `effect_kefrens_bars_{{MODEL_ID}}_01.html` + +Prompt: + +Create a classic demoscene "Kefrens bars" effect in one HTML file using HTML/CSS/JavaScript only. Render multiple horizontal bars moving with sine-based motion across the screen. Each bar should use smooth gradients and simulate retro hardware color cycling. Implement layered sine offsets for X position and thickness modulation. Add palette switching and motion speed control. Include scanline overlay and FPS counter. Use Canvas 2D. Use file name "effect_kefrens_bars_{{MODEL_ID}}_01.html". + +--- + +## 2) Starfield hyperspace + +File: `effect_starfield_hyperspace_{{MODEL_ID}}_01.html` + +Prompt: + +Create a classic demoscene hyperspace starfield in one HTML file. Generate thousands of stars in 3D space moving toward the camera with perspective projection. Stars should accelerate into hyperspace mode and stretch into lines during fast movement. Include depth-based brightness and trail effects. Add camera wobble and auto speed modulation. Controls: Space toggles hyperspace mode. Use Canvas rendering only. Use file name "effect_starfield_hyperspace_{{MODEL_ID}}_01.html". + +--- + +## 3) Rotozoomer + +File: `effect_rotozoomer_{{MODEL_ID}}_01.html` + +Prompt: + +Implement a realtime rotozoomer in one HTML file. Use a procedural texture and render it with simultaneous rotation and zoom using affine texture sampling. Optimize for performance by precomputing sin/cos tables. Include multiple texture modes (checkerboard, plasma, noise). Add palette cycling and pixelation toggle for retro look. Use Canvas 2D and no external libraries. Use file name "effect_rotozoomer_{{MODEL_ID}}_01.html". + +--- + +## 4) Tunnel texture mapper (classic demo tunnel) + +File: `effect_classic_texture_tunnel_{{MODEL_ID}}_01.html` + +Prompt: + +Create a classic demoscene texture tunnel in one HTML file. Map a repeating texture into a polar coordinate tunnel illusion. Animate forward motion through the tunnel using UV coordinate manipulation. Include dynamic lighting and palette cycling. Provide at least two procedural textures. Add subtle distortion waves. Use file name "effect_classic_texture_tunnel_{{MODEL_ID}}_01.html". + +--- + +## 5) Metaballs / blob effect + +File: `effect_metaballs_blob_{{MODEL_ID}}_01.html` + +Prompt: + +Create a realtime metaballs effect in one HTML file. Generate multiple moving blobs that merge visually using scalar field thresholding. Render using marching squares or pixel-based field evaluation. Include smooth shading and palette gradient. Add adjustable blob count and speed controls. Use Canvas rendering. Use file name "effect_metaballs_blob_{{MODEL_ID}}_01.html". + +--- + +## 6) Fire effect (palette fire) + +File: `effect_palette_fire_{{MODEL_ID}}_01.html` + +Prompt: + +Create a classic demoscene fire effect using palette cycling in one HTML file. Implement a heat buffer simulation where each frame propagates upward with cooling and randomness. Map heat values to a predefined palette (fire gradient). Include retro dithering and optional wind effect. Controls for intensity and turbulence. Use file name "effect_palette_fire_{{MODEL_ID}}_01.html". + +--- + +## 7) Scroller with sine wave (scrolltext) + +File: `effect_sine_scrolltext_{{MODEL_ID}}_01.html` + +Prompt: + +Create a demoscene scrolltext in one HTML file. Render pixel-font text scrolling horizontally while moving vertically along a sine wave path. Add shadow and glow effects. Include configurable text speed and wave amplitude. Provide simple input to change the text dynamically. Add retro raster bars behind text. Use file name "effect_sine_scrolltext_{{MODEL_ID}}_01.html". + +--- + +## 8) Plasma tunnel + +File: `effect_plasma_tunnel_{{MODEL_ID}}_01.html` + +Prompt: + +Create a plasma tunnel effect in one HTML file combining plasma shading with radial tunnel mapping. Use multiple sine functions for color and depth illusion. Add camera rotation and zoom oscillation. Include palette cycling and glow effect. Use Canvas rendering only. Use file name "effect_plasma_tunnel_{{MODEL_ID}}_01.html". + +--- + +## 9) Raymarching distance field (modern demo effect) + +File: `effect_raymarching_sdf_{{MODEL_ID}}_01.html` + +Prompt: + +Create a realtime raymarching demo effect in one HTML file using Canvas or WebGL (prefer Canvas if possible). Implement a simple signed distance field scene (sphere, torus, plane) with lighting and shadows. Animate camera movement. Include fog and reflective surface option. Provide debug overlay showing iteration count. Use file name "effect_raymarching_sdf_{{MODEL_ID}}_01.html". + +--- + +## 10) Procedural voxel landscape + +File: `effect_voxel_landscape_{{MODEL_ID}}_01.html` + +Prompt: + +Create a procedural voxel landscape in one HTML file inspired by classic demoscene terrain effects. Use heightmap rendering with perspective scaling to simulate flying over terrain. Include color bands for altitude, fog, and camera movement. Provide multiple terrain generation modes (noise variants). Use file name "effect_voxel_landscape_{{MODEL_ID}}_01.html". + +--- + +## 11) Lissajous particle attractor + +File: `effect_lissajous_particles_{{MODEL_ID}}_01.html` + +Prompt: + +Create a demoscene particle effect based on Lissajous curves in one HTML file. Render thousands of particles following parametric paths with fading trails. Add color gradients and additive blending look. Include speed and complexity controls. Show FPS counter. Use file name "effect_lissajous_particles_{{MODEL_ID}}_01.html". + +--- + +## 12) Feedback kaleidoscope + +File: `effect_feedback_kaleidoscope_{{MODEL_ID}}_01.html` + +Prompt: + +Create a feedback kaleidoscope effect in one HTML file. Each frame should transform the previous frame using rotation, scaling, and mirror symmetry to create fractal-like visuals. Overlay procedural shapes each frame. Add symmetry mode switching and color cycling. Use Canvas rendering only. Use file name "effect_feedback_kaleidoscope_{{MODEL_ID}}_01.html". + +--- + +# 🔥 Jeśli chcesz (polecam pod Twoje testy Refio / agentów) + +Mogę dorzucić jeszcze 3 mocne zestawy efektów, które są **idealne do benchmarkowania agentów LLM**: + +1️⃣ HARDCORE OLD DEMOSCENE PACK +(copper tricks, bitplane emulation, HAM mode, palette tricks) + +2️⃣ GPU / SHADER DEMOSCENE PACK +(raymarching fractals, signed distance fractals, shader toys) + +3️⃣ "Impossible for weak models" pack +(software rasterizer, affine vs perspective mapping, triangle filler) + +Powiedz tylko — i przygotuję 👍. + + +OK 🙂 — zrobiłem pełny **mega-pack efektów demoscenowych** pod testy agentów (idealne pod Twój workflow Refio + porównywanie modeli lokalnych vs mocnych). + +Każdy: + +* single file (HTML + CSS + JS) +* agent-friendly spec +* nazwany: `effect__{{MODEL_ID}}_01.html` +* prompty po angielsku +* celowo dobrane tak, żeby testować różne klasy problemów (math, render pipeline, performance, state reasoning). + +Podzielone na 3 grupy jak chciałeś. + +--- + +# 🔥 1️⃣ HARDCORE OLDSCHOOL DEMOSCENE PACK + +(stare techniki — testują algorytmiczne myślenie i optymalizacje CPU) + +--- + +## Copper rasterbars + +File: `effect_copper_rasterbars_{{MODEL_ID}}_01.html` + +Prompt: + +Create a classic Amiga-style copper rasterbars effect in one HTML file. Render horizontal color bands whose colors change per scanline using sine functions. Simulate hardware raster timing by computing color per y-line. Include palette cycling, soft gradients, and overlapping bars. Add scanline CRT overlay. Controls: change sine frequency and palette. Use file name "effect_copper_rasterbars_{{MODEL_ID}}_01.html". + +--- + +## Bitplane renderer simulation + +File: `effect_bitplane_renderer_{{MODEL_ID}}_01.html` + +Prompt: + +Create a simulated planar bitplane graphics system in one HTML file. Render graphics by combining multiple bitplanes (binary layers) into final pixel colors using palette lookup. Provide visualization of individual bitplanes and final output. Include animated shapes rendered through planar conversion. Use file name "effect_bitplane_renderer_{{MODEL_ID}}_01.html". + +--- + +## HAM mode (Hold-And-Modify simulation) + +File: `effect_ham_mode_simulation_{{MODEL_ID}}_01.html` + +Prompt: + +Simulate Amiga HAM mode rendering in one HTML file. Generate an image where each pixel may modify only one color channel relative to previous pixel. Visualize encoding artifacts intentionally. Provide procedural source image. Include debug overlay showing encoding decisions. Use file name "effect_ham_mode_simulation_{{MODEL_ID}}_01.html". + +--- + +## Dual playfield parallax + +File: `effect_dual_playfield_parallax_{{MODEL_ID}}_01.html` + +Prompt: + +Create a dual playfield scrolling effect inspired by Amiga hardware in one HTML file. Render two independently scrolling layers with different speeds and color palettes. Include transparency and raster color changes. Use file name "effect_dual_playfield_parallax_{{MODEL_ID}}_01.html". + +--- + +## Software sprite multiplexer + +File: `effect_sprite_multiplexer_{{MODEL_ID}}_01.html` + +Prompt: + +Simulate hardware sprite multiplexing in one HTML file. Render many sprites using limited virtual hardware slots reused vertically per scanline. Show debug visualization of slot reuse. Include bouncing sprites and color cycling. Use file name "effect_sprite_multiplexer_{{MODEL_ID}}_01.html". + +--- + +--- + +# 🔥 2️⃣ GPU / SHADER DEMOSCENE PACK + +(testuje modele pod math, shading, przestrzenne reasoning) + +--- + +## Mandelbox raymarch + +File: `effect_mandelbox_raymarch_{{MODEL_ID}}_01.html` + +Prompt: + +Create a realtime raymarched Mandelbox fractal in one HTML file. Implement signed distance field raymarching with dynamic camera movement. Include lighting, fog, and soft shadows. Provide performance quality toggle. Use file name "effect_mandelbox_raymarch_{{MODEL_ID}}_01.html". + +--- + +## Infinite kaleidoscopic shader + +File: `effect_infinite_kaleidoscope_{{MODEL_ID}}_01.html` + +Prompt: + +Create a kaleidoscopic shader effect in one HTML file using procedural UV transformations. Apply mirror symmetry and radial repetition to generate infinite patterns. Include palette cycling and animated noise overlay. Use file name "effect_infinite_kaleidoscope_{{MODEL_ID}}_01.html". + +--- + +## Volumetric light beams + +File: `effect_volumetric_beams_{{MODEL_ID}}_01.html` + +Prompt: + +Create a volumetric light beam effect in one HTML file. Simulate light shafts using layered alpha blending and radial gradients. Animate light direction and particle dust. Include bloom-like glow. Use file name "effect_volumetric_beams_{{MODEL_ID}}_01.html". + +--- + +## Signed distance tunnel + +File: `effect_sdf_tunnel_{{MODEL_ID}}_01.html` + +Prompt: + +Create a raymarched tunnel using signed distance fields in one HTML file. Include repeating geometry using modular space repetition. Animate forward camera motion. Add neon lighting and reflections. Use file name "effect_sdf_tunnel_{{MODEL_ID}}_01.html". + +--- + +## Reaction diffusion pattern + +File: `effect_reaction_diffusion_{{MODEL_ID}}_01.html` + +Prompt: + +Create a Gray-Scott reaction diffusion simulation in one HTML file. Use pixel-based simulation updating each frame. Include palette mapping and interactive parameter sliders. Use file name "effect_reaction_diffusion_{{MODEL_ID}}_01.html". + +--- + +--- + +# 🔥 3️⃣ IMPOSSIBLE FOR WEAK MODELS PACK + +(to są killer testy 🙂 — modele często się tu wykładają) + +--- + +## Software triangle rasterizer + +File: `effect_triangle_rasterizer_{{MODEL_ID}}_01.html` + +Prompt: + +Implement a software triangle rasterizer in one HTML file. Support filled triangles using barycentric coordinates. Include depth buffer and simple shading. Render rotating 3D object. Provide wireframe toggle and debug overlay. Use file name "effect_triangle_rasterizer_{{MODEL_ID}}_01.html". + +--- + +## Perspective correct texture mapping + +File: `effect_perspective_texture_mapping_{{MODEL_ID}}_01.html` + +Prompt: + +Create perspective-correct texture mapping in one HTML file. Render a textured quad rotating in 3D space. Implement perspective interpolation instead of affine mapping. Include comparison mode showing affine vs perspective. Use file name "effect_perspective_texture_mapping_{{MODEL_ID}}_01.html". + +--- + +## Scanline polygon filler + +File: `effect_scanline_polygon_fill_{{MODEL_ID}}_01.html` + +Prompt: + +Create a scanline polygon fill renderer in one HTML file. Implement edge tables and active edge list algorithm. Render animated polygons with shading. Include debug view of scanline processing. Use file name "effect_scanline_polygon_fill_{{MODEL_ID}}_01.html". + +--- + +## Mode7 floor renderer + +File: `effect_mode7_floor_{{MODEL_ID}}_01.html` + +Prompt: + +Create a Mode7-style floor renderer in one HTML file. Map texture onto ground plane using perspective projection. Animate camera movement. Include rotation and scaling. Provide grid debug overlay. Use file name "effect_mode7_floor_{{MODEL_ID}}_01.html". + +--- + +## Software z-buffer renderer + +File: `effect_software_zbuffer_{{MODEL_ID}}_01.html` + +Prompt: + +Create a software 3D renderer with z-buffer in one HTML file. Render multiple overlapping objects and resolve depth properly. Include rotating cubes and camera controls. Add debug visualization of depth buffer. Use file name "effect_software_zbuffer_{{MODEL_ID}}_01.html". + +--- + +# 😈 BONUS — dlaczego to jest idealne pod Refio / testy agentów + +Ponieważ każdy pack sprawdza inną kategorię: + +* OLD SCHOOL → constraint reasoning + retro algorytmy +* SHADER → math intuition + spatial reasoning +* IMPOSSIBLE → low-level graphics pipeline (większość modeli halucynuje tutaj) + +--- + +Jeśli chcesz, mogę zrobić jeszcze coś mocniejszego (i serio polecam): + +🔥 **MEGA DEMOSCENE BENCHMARK SUITE** — gotowy zestaw do porównywania modeli: + +* difficulty tiers +* scoring rubric +* common failure patterns +* test prompts dla modeli 14B vs 70B vs Claude/OpenAI + +Powiedz tylko 👍. + + +############################################ +# +############################################ + +Create a modern website for the café “Pod Bukiem,” independently choosing the layout, sections, tone of communication, CTAs, and sales-oriented narrative. +The site must be in Polish and present the café’s offer (menu, promotions, reservations, catering/events, location, and hours) while conveying the venue’s unique atmosphere. +Design both desktop and mobile-first versions in line with the latest UX/UI, performance, SEO, and accessibility best practices (WCAG 2.2 AA), with persuasive microcopy. +Include sticky navigation, a hero section with value and USP, social proof (reviews/ratings), a gallery, “About Us” section, FAQ, and a footer with schema.org structured data. +Deliver clean, production-ready HTML/CSS/JS code (no frameworks) in a single file named site-cafe.html, with Open Graph meta tags, favicons, analytics, semantic markup, aria-labels, and a Lighthouse score ≥ 90 for Performance/SEO/Accessibility/Best Practices. +All producion code in one file using javascript, css, html. Use file name "site-cafe_{{MODEL_ID}}_01.html". + + +--- + +Create a modern website for “Urban Nest Realty,” independently choosing the layout, sections, tone of communication, CTAs, and professional sales narrative. +The site must be in English and showcase the agency’s offer (property listings, virtual tours, market insights, mortgage advice, contact details) while highlighting trust and expertise. +Design for desktop and mobile-first using the latest UX/UI, performance, SEO, and accessibility best practices (WCAG 2.2 AA), with persuasive real estate microcopy. +Include sticky navigation, a hero section with value proposition and USP, featured listings with filters, testimonials, “About Us” section, FAQ, and a footer with schema.org structured data. +Deliver clean, production-ready HTML/CSS/JS code (no frameworks) in a single file named site-realestate.html, with Open Graph meta tags, favicons, analytics, semantic markup, aria-labels, and Lighthouse score ≥ 90. +All producion code in one file using javascript, css, html. Use file name "site-urban_{{MODEL_ID}}_01.html". + +--- + +Local Gym & Fitness Studio. Create a modern website for “Pulse Fitness,” independently choosing the layout, sections, tone of communication, CTAs, and motivational narrative. +The site must be in English and present the gym’s offer (membership plans, training programs, personal training, class schedules, and location) while conveying energy and community spirit. +Design desktop and mobile-first versions according to the latest UX/UI, performance, SEO, and accessibility best practices (WCAG 2.2 AA), with persuasive fitness-oriented microcopy. +Include sticky navigation, a hero section with core benefits and USP, testimonials, photo/video gallery, “Meet the Trainers” section, FAQ, and a footer with schema.org structured data. +Deliver clean, production-ready HTML/CSS/JS code (no frameworks) in a single file, with Open Graph meta tags, favicons, analytics, semantic markup, aria-labels, and Lighthouse score ≥ 90. +All producion code in one file using javascript, css, html. Use file name "site-local-gym_{{MODEL_ID}}_01.html". + +--- + +Handmade Jewelry E-commerce Store. +Create a modern e-commerce website for “Luna Gems,” independently choosing the layout, sections, tone of communication, CTAs, and product-focused narrative. +The site must be in English and present the brand’s products (necklaces, earrings, rings), story, and values while encouraging online purchases. +Design both desktop and mobile-first versions according to UX/UI, performance, SEO, and accessibility best practices (WCAG 2.2 AA), with persuasive microcopy optimized for conversions. +Include sticky navigation, a hero with featured products, product catalog with filters, customer reviews, “Our Story” section, FAQ, secure checkout, and a footer with schema.org structured data. +Deliver clean, production-ready HTML/CSS/JS code (no frameworks) in a single file named **site-handmade-jewelry_{{MODEL_ID}}_01.html**, with Open Graph meta tags, favicons, analytics, semantic markup, aria-labels, and Lighthouse score ≥ 90. + +--- + +Personal Portfolio for a Photographer. Create a modern portfolio website for photographer “Evan Brooks,” independently choosing the layout, sections, tone of communication, CTAs, and artistic narrative. +The site must be in English and showcase the photographer’s work (portfolio galleries, project stories, services, client testimonials) while reflecting a unique visual style. +Design desktop and mobile-first versions according to the latest UX/UI, performance, SEO, and accessibility best practices (WCAG 2.2 AA), with creative and persuasive microcopy. +Include sticky navigation, hero with featured photos, portfolio grid with categories, “About Me” section, service packages, contact form, and a footer with schema.org structured data. +Deliver clean, production-ready HTML/CSS/JS code (no frameworks) in a single file named **site-portfolio_{{MODEL_ID}}_01.html**, with Open Graph meta tags, favicons, analytics, semantic markup, aria-labels, and Lighthouse score ≥ 90. + +--- + +Nonprofit Organization Website. Create a modern website for the nonprofit “Green Future Foundation,” independently choosing the layout, sections, tone of communication, CTAs, and impact-driven narrative. +The site must be in English and present the organization’s mission, projects, volunteer opportunities, donation options, and events while inspiring trust and participation. +Design for desktop and mobile-first following UX/UI, performance, SEO, and accessibility best practices (WCAG 2.2 AA), with persuasive microcopy to encourage donations and engagement. +Include sticky navigation, hero with mission statement and USP, impact stories, project gallery, volunteer sign-up form, FAQ, and a footer with schema.org structured data. +Deliver clean, production-ready HTML/CSS/JS code (no frameworks) in a single file named **site-nonprofit_{{MODEL_ID}}_01.html**, with Open Graph meta tags, favicons, analytics, semantic markup, aria-labels, and Lighthouse score ≥ 90. + +--- + +Twoim zadaniem jest wygenerowanie nowej wersji minimalistycznej strony typu landing page. Poniżej znajdziesz opis strony, jej strukturę i charakter copy. + +**Cel strony:** +Przyciągnąć uwagę właścicieli małych firm i przekonać ich do skorzystania z systemu do obserwacji danych. Strona powinna być nowoczesna, prosta, czytelna i bazować na czerni, bieli oraz czerwonych akcentach. + +**Układ strony:** +1. **Header** – logo z czerwonym akcentem, uproszczona nawigacja (Funkcje, Opinie, Demo). CTA w formie przycisku „Zobacz demo”. +2. **Hero (pierwszy ekran)** – duży nagłówek podkreślający korzyści („Masz dane. My nadajemy im sens.”). Krótkie hasło opisowe, mocne CTA (np. „Rozpocznij darmowy test”). Obok wizualizacja prostego panelu z danymi i alertami. +3. **Sekcja funkcji** – trzy kluczowe benefity opisane językiem bezpośrednim, np. „Jedna tablica zamiast tysiąca plików”, „Powiadomienia w sekundę”, „Wykresy, które rozumiesz”. Każdy benefit z numerem i ikoną. +4. **Sekcja opinii (social proof)** – krótkie cytaty klientów (imię, rodzaj biznesu). Czarne tło, białe teksty i czerwone akcenty. +5. **Sekcja CTA (powtórzenie)** – nagłówek („Zacznij już teraz”), krótki opis korzyści („14 dni za darmo. Bez karty. Zero ryzyka.”), dwa przyciski: czerwony CTA („Załóż darmowe konto”) i czarny outline („Umów prezentację”). +6. **Stopka** – prawa autorskie, linki do polityki prywatności i regulaminu. + +**Styl graficzny:** +- Minimalistyczny, dużo pustej przestrzeni. +- Kolory: biały (#fff), czarny (#000) i czerwony (#E10600). +- Font: bezszeryfowy, nowoczesny (np. Inter, Poppins). +- CTA powtarzane w co najmniej 2 miejscach. + +**Język i ton:** +- Bezpośredni, prosty, zrozumiały dla właściciela małej firmy. +- Akcent na oszczędność czasu i prostotę obsługi. +- Unikać technicznego żargonu. + +**Przykłady CTA:** +- „Rozpocznij darmowy test” +- „Załóż darmowe konto” +- „Umów prezentację” + +Wygeneruj na tej podstawie nową wersję landing page w HTML (lub w React/Tailwind), zachowując minimalistyczny design, prosty układ i czerwone akcenty. HTML/CSS/JS ma być w jednym pliku o nazwie **site-obserwator.html** + + + +############################## +# Other +############################## + +Create a single-file HTML/CSS/JS animation. Depict a realistic side-view scene of a stainless steel chef’s knife slicing through a fresh cucumber on a wooden cutting board. The knife should have a reflective metallic blade and a dark handle, moving in a smooth downward and forward slicing motion. As the blade cuts, the cucumber should separate into clean round slices with subtle interior texture and moisture highlights. Include slight compression of the cucumber before each cut, gentle board contact, and small visual cues like soft shadows and minor vibration to enhance realism. Filename "anim_knife_01.html" + +--- + +Utwórz pojedynczą animację HTML/CSS/JS Canvas gry w stylu Froggera. Pokaż postać (krąg lub prosty sprite) na dole próbującą dotrzeć do góry. Animuj 4 poziome pasy ruchu poruszające się w lewo/ prawo z różnymi prędkościami z samochodami/ciężarkami o różnych rozmiarach. Postać automatycznie skacze do przodu, zatrzymuje się, aby unikać samochodów z bliskim rozdziałem czasu, a następnie kontynuuje, gdy jest bezpiecznie. Pokazuj efekty wizualne „bliskie wezwanie” (wstrząsanie ekranu, zwolniony ruch), gdy ledwo unikasz Charakter bezpiecznie dochodzi na szczyt, krótko świętuje z efektem cząsteczkowym, a następnie na dole reaguje na bezproblemową + + +############################################## + +Create a production-ready, visually stunning website with a **cyberpunk product studio** theme. + +GOAL +Build a single-page (optional /work/[slug]) website for a fictional studio: +**“NEON FORGE”** — design + engineering for ambitious startups. + +TECH STACK (use exactly this unless something breaks) +- Next.js (latest stable) + TypeScript +- Tailwind CSS +- Framer Motion (scroll/entrance animations) +- next/image for optimized images +- No backend required (mock data in code) + +DESIGN DIRECTION +- Cyberpunk minimal: near-black base, neon edge highlights, subtle scanlines/noise (very light), holographic gradients. +- Palette: #070A12 background, #EAF0FF text, accents: electric blue ↔ hot pink ↔ acid green. +- Typography: premium geometric headings + readable body. +- Micro-interactions: hover glow rings, slight tilt on cards, animated underline, shimmering borders, smooth section reveal. +- Responsive: mobile-first; looks excellent on iPhone + 4K desktop. +- Accessibility: contrast, focus states, keyboard nav, semantic headings, aria labels. +- Performance: keep animation lightweight, lazy-load imagery. + +PAGES / SECTIONS +1) Sticky glass navbar: logo + anchors (Work, Process, Results, Pricing, Contact) + CTA “Book a Call”. +2) HERO + - Headline: “Ship products that feel inevitable.” + - Subtext: short pitch + credibility line (years, launches, etc. mock) + - Primary CTA: “Start a Project” + - Secondary CTA: “View Work” + - Cinematic hero background + animated gradient overlay + subtle noise. +3) SELECTED WORK (grid of 6) + - Each card: project name, industry tag, 1-line outcome, metric (“+32% conversion” mock) + - Hover: shimmer, tilt, quick facts reveal + - Click: modal or /work/[slug] (optional) +4) PROCESS (4 steps) + - Discover → Design → Build → Launch + - Animated progress line on scroll + step cards +5) RESULTS + - Metrics grid (8) + mini case highlights (3) +6) PRICING (3 tiers + custom) + - “Sprint / Product / Partner” + - Feature list, “Best for” line, CTA +7) TESTIMONIALS + - Carousel or stacked cards, include roles & companies (fictional) +8) CONTACT + - Form fields: name, email, company, budget (select), timeline, message + - Validate client-side; on submit show toast “Request received” +9) FOOTER + - Minimal; links; “Image Credits” list + +IMAGERY (IMPORTANT) +Use attractive photos **from the web** that are safe to use. Prefer **Unsplash Source** (no API key). +- Use remote images (do NOT commit copyrighted assets). +- Configure Next.js to allow the remote image domains. +- Provide “Image Credits” in footer. +Use these remote image URLs: +- Hero: https://source.unsplash.com/featured/2400x1400?cyberpunk,city,night +- Work cards: + 1) https://source.unsplash.com/featured/1200x900?neon,interface + 2) https://source.unsplash.com/featured/1200x900?startup,team,night + 3) https://source.unsplash.com/featured/1200x900?ai,lab,lighting + 4) https://source.unsplash.com/featured/1200x900?product,design,dark + 5) https://source.unsplash.com/featured/1200x900?code,terminal,neon + 6) https://source.unsplash.com/featured/1200x900?futuristic,architecture + +FEATURE POLISH (make it feel premium) +- Use two Google fonts (e.g., Space Grotesk + Inter). +- Add subtle animated gradient blob behind hero text. +- Thin animated conic-gradient borders for CTAs and key cards. +- Scroll-based section reveal (Framer Motion). +- Active section highlight in navbar while scrolling. +- Floating “Book a Call” button on mobile. +- SEO metadata (title, description, OpenGraph) + nice SVG favicon. + +PROJECT STRUCTURE +- /app with layout, page, components folder +- components: Navbar, Hero, WorkGrid, Process, Results, Pricing, Testimonials, ContactForm, Footer +- data: work + testimonials + pricing (typed) +- utilities: classnames helper +- Tailwind config with custom colors, gradients, shadows + +DELIVERABLES +1) Complete repo that runs: + - npm install + - npm run dev + - npm run build +2) README.md with: + - What it is + - How to run + - Image credits note +3) Ensure it looks “finished”: spacing, typography scale, consistent radii (rounded-2xl), shadows, transitions. + +QUALITY BAR +This must look like a premium studio landing page (Apple-level polish). No template-y feel. + +Now implement it end-to-end. + + +####################################################### +# Multiple files website +####################################################### + + +######################################################### +# Single file +######################################################### + +### 1) AURORA LUXE TRAVEL — futurystyczny luxury travel + +``` +Create a production-ready, visually stunning **single-file** website in ONE `web_aurora_{{MODEL_ID}}.html` using only **HTML + CSS + Vanilla JS**. + +GOAL +Build a single-page website for fictional brand: +**“AURORA LUXE TRAVEL”** — ultra-premium concierge trips. + +TECH (STRICT) +- One file: web_aurora_{{MODEL_ID}}.html only +- Inline $message" currentTaskSection.setContent(message, html) currentLLMPrompt = null + currentRawContext = null copyPromptButton.isEnabled = false savePromptButton.isEnabled = false viewPromptButton.isEnabled = false + viewContextButton.isEnabled = false // Clear other sections (order matches contentPanel order for clarity) sectionEntries diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/components/history/HistoryPanel.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/components/history/HistoryPanel.kt index 89b7c680..08c00d73 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/components/history/HistoryPanel.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/components/history/HistoryPanel.kt @@ -46,11 +46,41 @@ class HistoryPanel( private val searchField: SearchTextField private val filterTabs: JTabbedPane + private val sortCombo: JComboBox + private val statusCombo: JComboBox + private val groupPinnedCheckbox: JCheckBox private val sessionListPanel: JPanel private val sessionListScrollPane: JBScrollPane + enum class SortOrder(val label: String) { + UPDATED_DESC("Recently updated"), + CREATED_DESC("Recently created"), + CREATED_ASC("Oldest first"), + NAME_ASC("Name A→Z"), + NAME_DESC("Name Z→A"), + DURATION_DESC("Duration (longest)"), + GENERATION_DESC("Generation time (longest)"), + TOKENS_DESC("Tokens (most)"), + COST_DESC("Cost (highest)"); + + override fun toString() = label + } + + enum class StatusFilter(val label: String, val status: TaskStatus?) { + ALL("All statuses", null), + SUCCESS("✓ Success", TaskStatus.SUCCESS), + FAILED("✗ Failed", TaskStatus.FAILED), + RUNNING("⟳ Running", TaskStatus.RUNNING), + CANCELED("⊘ Canceled", TaskStatus.CANCELED), + NEW("New", TaskStatus.NEW), + PENDING("Pending", TaskStatus.PENDING); + + override fun toString() = label + } + private var allSessions: List = emptyList() private var filteredSessions: List = emptyList() + private val generationMillisBySession: MutableMap = mutableMapOf() @Volatile private var isLoading = false @@ -107,7 +137,7 @@ class HistoryPanel( add(searchField, BorderLayout.CENTER) } - // Filter tabs + // Filter tabs (mode) filterTabs = JTabbedPane().apply { addTab("All", JPanel()) addTab("Chat", JPanel()) @@ -120,6 +150,32 @@ class HistoryPanel( } } + // Sort + status filter row + sortCombo = JComboBox(SortOrder.values()).apply { + selectedItem = SortOrder.UPDATED_DESC + toolTipText = "Sort order" + addActionListener { applyFilters() } + } + + statusCombo = JComboBox(StatusFilter.values()).apply { + selectedItem = StatusFilter.ALL + toolTipText = "Filter by status" + addActionListener { applyFilters() } + } + + groupPinnedCheckbox = JCheckBox("Group pinned", true).apply { + toolTipText = "Show pinned sessions in a separate section at the top" + addActionListener { applyFilters() } + } + + val filterRow = JBPanel>(FlowLayout(FlowLayout.LEFT, 6, 2)).apply { + add(JBLabel("Sort:")) + add(sortCombo) + add(JBLabel("Status:")) + add(statusCombo) + add(groupPinnedCheckbox) + } + // Session list sessionListPanel = JPanel().apply { layout = BoxLayout(this, BoxLayout.Y_AXIS) @@ -137,11 +193,12 @@ class HistoryPanel( }) } - // Top panel combining header and filter tabs + // Top panel combining header, filter tabs, and sort/status row val topPanel = JBPanel>().apply { layout = BoxLayout(this, BoxLayout.Y_AXIS) add(headerPanel) add(filterTabs) + add(filterRow) } // Layout @@ -183,8 +240,6 @@ class HistoryPanel( val tasks = (currentProjectTasks + testDataTasks) // Deduplicate by ID (in case same session exists in both) .distinctBy { it.id } - // Filter out empty sessions (no LLM calls made), keep pinned - .filter { it.pinned || it.tokensIn > 0 || it.tokensOut > 0 } logger.debug { "Converting ${tasks.size} tasks to Session models (project=${currentProjectTasks.size}, testData=${testDataTasks.size})" @@ -200,6 +255,7 @@ class HistoryPanel( executionMode = ExecutionMode.valueOf(task.executionMode), createdAt = task.createdAt, updatedAt = task.updatedAt, + model = extractModelFromUiState(task.uiState), tokensIn = task.tokensIn, tokensOut = task.tokensOut, costUsd = task.costUsd, @@ -210,6 +266,22 @@ class HistoryPanel( logger.debug { "Loaded ${allSessions.size} sessions" } + // Compute generation time (LLM + tool execution) per session from message metadata. + generationMillisBySession.clear() + allSessions.forEach { session -> + try { + val messages = router.chatRouter.getMessages(session.id).messages + var total = 0L + messages.forEach { msg -> + val m = pl.jclab.refio.core.db.MessageMetrics.fromJson(msg.metadata) ?: return@forEach + total += m.latencyMs.toLong() + m.toolExecutionTimeMs.toLong() + } + if (total > 0) generationMillisBySession[session.id] = total + } catch (e: Exception) { + logger.warn(e) { "Failed to compute generation time for session ${session.id}" } + } + } + // Update UI on EDT thread SwingUtilities.invokeLater { applyFilters() @@ -232,35 +304,54 @@ class HistoryPanel( private fun applyFilters() { val query = searchField.text.lowercase() val selectedTab = filterTabs.selectedIndex + val statusFilter = (statusCombo.selectedItem as? StatusFilter) ?: StatusFilter.ALL + val sortOrder = (sortCombo.selectedItem as? SortOrder) ?: SortOrder.UPDATED_DESC + val groupPinned = groupPinnedCheckbox.isSelected + + val primary = sortComparator(sortOrder) + val comparator: Comparator = if (groupPinned) { + compareByDescending { it.pinned }.then(primary) + } else { + primary + } filteredSessions = allSessions .filter { session -> - // Filter by search query val matchesQuery = query.isEmpty() || session.name.lowercase().contains(query) || session.id.lowercase().contains(query) - // Filter by mode tab val matchesMode = when (selectedTab) { - 0 -> true // All + 0 -> true 1 -> session.mode == TaskMode.CHAT 2 -> session.mode == TaskMode.PLAN 3 -> session.mode == TaskMode.AGENT else -> true } - matchesQuery && matchesMode + val matchesStatus = statusFilter.status == null || session.status == statusFilter.status + + matchesQuery && matchesMode && matchesStatus } - .sortedWith( - compareByDescending { it.pinned } // Pinned first - .thenByDescending { it.updatedAt } // Then by recency - ) + .sortedWith(comparator) - logger.debug { "applyFilters: filteredSessions.size=${filteredSessions.size}" } + logger.debug { "applyFilters: filteredSessions.size=${filteredSessions.size}, sort=$sortOrder, status=$statusFilter" } refreshSessionList() } + private fun sortComparator(order: SortOrder): Comparator = when (order) { + SortOrder.UPDATED_DESC -> compareByDescending { it.updatedAt } + SortOrder.CREATED_DESC -> compareByDescending { it.createdAt } + SortOrder.CREATED_ASC -> compareBy { it.createdAt } + SortOrder.NAME_ASC -> Comparator { a, b -> String.CASE_INSENSITIVE_ORDER.compare(a.name, b.name) } + SortOrder.NAME_DESC -> Comparator { a, b -> String.CASE_INSENSITIVE_ORDER.compare(a.name, b.name) }.reversed() + SortOrder.DURATION_DESC -> compareByDescending { (it.updatedAt - it.createdAt).coerceAtLeast(0L) } + SortOrder.GENERATION_DESC -> compareByDescending { generationMillisBySession[it.id] ?: 0L } + SortOrder.TOKENS_DESC -> compareByDescending { it.tokensIn + it.tokensOut } + SortOrder.COST_DESC -> compareByDescending { it.costUsd } + } + /** * Refresh session list UI * US-204: Group by pinned/recent with section headers @@ -276,11 +367,10 @@ class HistoryPanel( border = BorderFactory.createEmptyBorder(20, 0, 0, 0) } sessionListPanel.add(emptyLabel) - } else { - // Group by pinned/recent + } else if (groupPinnedCheckbox.isSelected) { + // Group by pinned/recent, preserving chosen sort within each group val (pinned, recent) = filteredSessions.partition { it.pinned } - // Pinned section if (pinned.isNotEmpty()) { sessionListPanel.add(JBLabel("📌 Pinned").apply { font = font.deriveFont(java.awt.Font.BOLD, 12f) @@ -293,7 +383,6 @@ class HistoryPanel( } } - // Recent section if (recent.isNotEmpty()) { sessionListPanel.add(JBLabel("🕒 Recent").apply { font = font.deriveFont(java.awt.Font.BOLD, 12f) @@ -305,6 +394,12 @@ class HistoryPanel( sessionListPanel.add(Box.createVerticalStrut(8)) } } + } else { + // Flat list using chosen sort order + filteredSessions.forEach { session -> + sessionListPanel.add(createSessionCard(session)) + sessionListPanel.add(Box.createVerticalStrut(8)) + } } sessionListPanel.revalidate() @@ -339,6 +434,27 @@ class HistoryPanel( font = font.deriveFont(11f) }) + add(JBLabel("Duration: ${formatDuration(session.updatedAt - session.createdAt)}").apply { + foreground = LCATheme.grayColor + font = font.deriveFont(11f) + }) + + generationMillisBySession[session.id]?.let { genMs -> + add(JBLabel("Generation: ${formatDuration(genMs)}").apply { + foreground = LCATheme.grayColor + font = font.deriveFont(11f) + toolTipText = "Sum of LLM latency and tool execution time" + }) + } + + session.model?.takeIf { it.isNotBlank() }?.let { modelName -> + add(JBLabel("Model: $modelName").apply { + foreground = LCATheme.grayColor + font = font.deriveFont(11f) + toolTipText = modelName + }) + } + if (session.tokensIn > 0 || session.tokensOut > 0) { add(JBLabel("Tokens: ${session.tokensIn}↓ ${session.tokensOut}↑").apply { foreground = LCATheme.grayColor @@ -445,6 +561,32 @@ class HistoryPanel( return formatter.format(date) } + private fun formatDuration(millis: Long): String { + if (millis <= 0) return "—" + val totalSeconds = millis / 1000 + val hours = totalSeconds / 3600 + val minutes = (totalSeconds % 3600) / 60 + val seconds = totalSeconds % 60 + return when { + hours > 0 -> "${hours}h ${minutes}m" + minutes > 0 -> "${minutes}m ${seconds}s" + else -> "${seconds}s" + } + } + + private fun extractModelFromUiState(uiStateJson: String?): String? { + if (uiStateJson.isNullOrBlank()) return null + return try { + val map = com.google.gson.Gson().fromJson( + uiStateJson, + com.google.gson.reflect.TypeToken.get(Map::class.java).type + ) as? Map<*, *> ?: return null + (map["selectedModel"] as? String)?.takeIf { it.isNotBlank() } + } catch (_: Exception) { + null + } + } + private fun onLoadSession(session: Session) { logger.info { "Loading session: ${session.id}" } diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/dialogs/LLMPromptViewerDialog.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/dialogs/LLMPromptViewerDialog.kt index ff62e705..b7bab4a3 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/dialogs/LLMPromptViewerDialog.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/dialogs/LLMPromptViewerDialog.kt @@ -22,13 +22,14 @@ import javax.swing.filechooser.FileNameExtensionFilter class LLMPromptViewerDialog( private val project: Project, - private val prompt: String + private val prompt: String, + dialogTitle: String = "LLM Context Prompt" ) : DialogWrapper(project) { private var editor: EditorEx? = null init { - title = "LLM Context Prompt" + title = dialogTitle setResizable(true) init() } diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/AdvancedSettingsPanel.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/AdvancedSettingsPanel.kt index dc3177c1..0ae763e3 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/AdvancedSettingsPanel.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/AdvancedSettingsPanel.kt @@ -49,13 +49,7 @@ class AdvancedSettingsPanel( private var isUpdatingProgrammatically = false init { - border = BorderFactory.createCompoundBorder( - BorderFactory.createTitledBorder( - LCATheme.customLineBorder(LCATheme.borderColor, 1), - "Advanced Settings" - ), - LCATheme.paddedBorder(LCATheme.margin) - ) + border = LCATheme.createSettingsBorder("Advanced Settings") // Main content with visual sections val contentPanel = JBPanel>().apply { @@ -349,13 +343,7 @@ class AdvancedSettingsPanel( private fun createSectionPanel(title: String, content: JPanel): JPanel { return JBPanel>(BorderLayout()).apply { - border = BorderFactory.createCompoundBorder( - BorderFactory.createTitledBorder( - LCATheme.customLineBorder(LCATheme.borderColor, 1), - title - ), - LCATheme.paddedBorder(LCATheme.padding) - ) + border = LCATheme.createSettingsBorder(title) add(content, BorderLayout.CENTER) } } diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ContextSettingsPanel.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ContextSettingsPanel.kt index 77553056..0ea54c69 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ContextSettingsPanel.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ContextSettingsPanel.kt @@ -77,7 +77,7 @@ class ContextSettingsPanel( private val searchSettingsSaveDebounceMs = 300L init { - border = LCATheme.paddedBorder(LCATheme.margin) + border = LCATheme.emptyBorder() val contentPanel = JBPanel>().apply { layout = BoxLayout(this, BoxLayout.Y_AXIS) @@ -753,13 +753,7 @@ class ContextSettingsPanel( private fun createSectionPanel(title: String, content: JPanel): JPanel { return JBPanel>(BorderLayout()).apply { - border = BorderFactory.createCompoundBorder( - BorderFactory.createTitledBorder( - LCATheme.customLineBorder(LCATheme.borderColor, 1), - title - ), - LCATheme.paddedBorder(LCATheme.spacingLg) - ) + border = LCATheme.createSettingsBorder(title) add(content, BorderLayout.CENTER) } } diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/DocsSettingsPanel.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/DocsSettingsPanel.kt index 20f0726a..05692bec 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/DocsSettingsPanel.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/DocsSettingsPanel.kt @@ -48,13 +48,7 @@ class DocsSettingsPanel( private val embeddingProgress = mutableMapOf() init { - border = BorderFactory.createCompoundBorder( - BorderFactory.createTitledBorder( - LCATheme.customLineBorder(LCATheme.borderColor, 1), - "Documentation" - ), - LCATheme.paddedBorder(16) - ) + border = LCATheme.createSettingsBorder("Documentation") // Initialize buttons addButton = JButton("Add Documentation") diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/GeneralSettingsPanel.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/GeneralSettingsPanel.kt index a6be512a..fd7ca649 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/GeneralSettingsPanel.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/GeneralSettingsPanel.kt @@ -30,12 +30,13 @@ class GeneralSettingsPanel( private val thinkingEnabledCheckbox: JBCheckBox private val noEgressEnabledCheckbox: JBCheckBox private val executionModeCombo: JComboBox + private val nativeToolsModeCombo: JComboBox // Flag to prevent triggering onSettingChanged during programmatic updates private var isUpdatingProgrammatically = false init { - border = LCATheme.paddedBorder(LCATheme.margin) + border = LCATheme.emptyBorder() val gbc = GridBagConstraints().apply { gridx = 0 @@ -46,13 +47,7 @@ class GeneralSettingsPanel( insets = LCATheme.insetsMedium } - // Section title - add(JLabel("General Settings").apply { - font = font.deriveFont(14f).deriveFont(java.awt.Font.BOLD) - }, gbc) - // Format Markdown option - gbc.gridy++ gbc.insets = LCATheme.insetsGridBagLarge formatMarkdownCheckbox = JBCheckBox("Format Markdown in responses", true).apply { addItemListener { event -> @@ -181,6 +176,30 @@ class GeneralSettingsPanel( gbc.insets = LCATheme.insetsDetailsIndented add(JLabel("AUTO executes steps automatically; INTERACTIVE waits for confirmation"), gbc) + // Native tools mode + gbc.gridy++ + gbc.insets = LCATheme.insetsGridBagLarge + nativeToolsModeCombo = JComboBox(arrayOf("auto", "always", "never")).apply { + addActionListener { + if (!isUpdatingProgrammatically) { + val value = selectedItem as? String ?: return@addActionListener + val (section, key) = pl.jclab.refio.core.services.ConfigKeyUtil.split( + pl.jclab.refio.core.config.ConfigKeys.NATIVE_TOOLS_MODE.key + ) + onSettingChanged(section, key, value) + } + } + } + val nativeToolsPanel = JBPanel>(java.awt.FlowLayout(java.awt.FlowLayout.LEFT, 0, 0)).apply { + add(JLabel("Native tools mode: ")) + add(nativeToolsModeCombo) + } + add(nativeToolsPanel, gbc) + + gbc.gridy++ + gbc.insets = LCATheme.insetsDetailsIndented + add(JLabel("auto: use native tools if model supports it; always: force native tools; never: use JSON fallback"), gbc) + // Filler to push content to top gbc.gridy++ gbc.weighty = 1.0 @@ -244,17 +263,15 @@ class GeneralSettingsPanel( try { logger.info { "Loading general configuration" } - val config = coreApiClient.configRouter.getConfig(section = "general", scope = "app") - applyGeneralConfig(config.settings) + val generalSettings = coreApiClient.configRouter.getConfig(section = "general", scope = "app").settings + val toolsSettings = coreApiClient.configRouter.getConfig(section = "tools", scope = "app").settings + applyGeneralConfig(generalSettings, toolsSettings) } catch (e: Exception) { logger.error(e) { "Failed to load general config" } } } - /** - * Apply loaded configuration to UI - */ - private fun applyGeneralConfig(settings: Map) { + private fun applyGeneralConfig(settings: Map, toolsSettings: Map = emptyMap()) { logger.info { "Applying general config: ${settings.keys}" } isUpdatingProgrammatically = true @@ -267,6 +284,9 @@ class GeneralSettingsPanel( val executionMode = (settings["execution_mode"] as? String)?.trim()?.uppercase() executionModeCombo.selectedItem = if (executionMode == "INTERACTIVE") "INTERACTIVE" else "AUTO" + + val nativeToolsMode = (toolsSettings["native_tools"] as? String)?.trim()?.lowercase() + nativeToolsModeCombo.selectedItem = if (nativeToolsMode in setOf("always", "never")) nativeToolsMode else "auto" } finally { isUpdatingProgrammatically = false } @@ -286,6 +306,7 @@ class GeneralSettingsPanel( thinkingEnabledCheckbox.isSelected = false noEgressEnabledCheckbox.isSelected = false executionModeCombo.selectedItem = "AUTO" + nativeToolsModeCombo.selectedItem = "auto" isUpdatingProgrammatically = false // Reload from backend diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/MCPSettingsPanel.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/MCPSettingsPanel.kt index c7a8f932..d1cd92ce 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/MCPSettingsPanel.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/MCPSettingsPanel.kt @@ -50,10 +50,10 @@ class MCPSettingsPanel(private val project: Project) : JBPanel private val coroutineScope = CoroutineScope(Dispatchers.IO + SupervisorJob()) init { - border = LCATheme.paddedBorder(LCATheme.margin) + border = LCATheme.emptyBorder() // MCPManager is already initialized by CoreConnectionManager.createProjectRouter() // with the correct ToolRegistry. Do NOT re-initialize here without ToolRegistry! - add(createSectionPanel("MCP Servers", createServersSection()), BorderLayout.CENTER) + add(createServersSection(), BorderLayout.CENTER) refreshServers() } @@ -305,19 +305,6 @@ class MCPSettingsPanel(private val project: Project) : JBPanel refreshServers() } } - - private fun createSectionPanel(title: String, content: JPanel): JPanel { - return JBPanel>(BorderLayout()).apply { - border = BorderFactory.createCompoundBorder( - BorderFactory.createTitledBorder( - LCATheme.customLineBorder(LCATheme.borderColor, 1), - title - ), - LCATheme.paddedBorder(LCATheme.padding) - ) - add(content, BorderLayout.CENTER) - } - } } private class MCPServerConfigDialog( diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ModelsSettingsPanel.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ModelsSettingsPanel.kt index f609aa05..6f0616d6 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ModelsSettingsPanel.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ModelsSettingsPanel.kt @@ -189,6 +189,29 @@ class ModelsSettingsPanel( // Flag to prevent saving when dropdowns are updated programmatically private var isUpdatingDropdowns = false + // Mapping "provider/id" -> human-readable display text ("provider - name"). + // Populated from the visible models in updateModelDropdowns; used by the shared + // combo renderer so selectors show friendly names while the underlying item + // value stays as "provider/id" (required for persistence). + private var modelDisplayNames: Map = emptyMap() + + private val modelComboRenderer = object : DefaultListCellRenderer() { + override fun getListCellRendererComponent( + list: JList<*>?, + value: Any?, + index: Int, + isSelected: Boolean, + cellHasFocus: Boolean + ): Component { + val label = super.getListCellRendererComponent(list, value, index, isSelected, cellHasFocus) as JLabel + val raw = value as? String + if (raw != null) { + label.text = modelDisplayNames[raw] ?: raw + } + return label + } + } + // Last models shown in the table — used by Show All / Hide All to update // visibility in place instead of refetching from the backend (the model-registry // cache may have been invalidated by a concurrent provider settings save, which @@ -196,7 +219,7 @@ class ModelsSettingsPanel( private var currentModels: List = emptyList() init { - border = LCATheme.paddedBorder(LCATheme.margin) + border = LCATheme.emptyBorder() // Main content val contentPanel = JBPanel>(GridBagLayout()).apply { @@ -573,6 +596,12 @@ class ModelsSettingsPanel( insets = LCATheme.insetsFormField } + // Apply friendly-name renderer to every combo up-front. + listOf( + defaultModelCombo, planModelCombo, codingModelCombo, + weakModelCombo, embeddingModelCombo, strongModelCombo + ).forEach { it.renderer = modelComboRenderer } + // Default model add(JLabel("Default Model:"), gbc) gbc.gridx++ @@ -1188,6 +1217,11 @@ class ModelsSettingsPanel( .thenBy { it.id.lowercase() } ) + // Rebuild display-name map for the combo renderer. + modelDisplayNames = sortedVisibleModels.associate { model -> + "${model.provider}/${model.id}" to "${model.provider} - ${model.name}" + } + val currentDefault = defaultModelCombo.selectedItem as? String val currentPlan = planModelCombo.selectedItem as? String val currentCoding = codingModelCombo.selectedItem as? String diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/PromptsSettingsPanel.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/PromptsSettingsPanel.kt index c3673aa4..58ef7816 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/PromptsSettingsPanel.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/PromptsSettingsPanel.kt @@ -35,16 +35,7 @@ class PromptsSettingsPanel( private val slashPromptsCache = mutableListOf() init { - border = LCATheme.paddedBorder(16) - - // Header - val headerPanel = JBPanel>(FlowLayout(FlowLayout.LEFT)).apply { - add(JLabel("Prompts").apply { - font = font.deriveFont(14f).deriveFont(Font.BOLD) - }) - isEnabled = false - } - add(headerPanel, BorderLayout.NORTH) + border = LCATheme.emptyBorder() // Tabbed pane for system prompts + slash prompts val tabbedPane = JBTabbedPane().apply { diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ProvidersSettingsPanel.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ProvidersSettingsPanel.kt index 02211e22..f07f2300 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ProvidersSettingsPanel.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ProvidersSettingsPanel.kt @@ -59,13 +59,7 @@ class ProvidersSettingsPanel( null init { - border = BorderFactory.createCompoundBorder( - BorderFactory.createTitledBorder( - LCATheme.customLineBorder(LCATheme.borderColor, 1), - "Providers" - ), - LCATheme.paddedBorder(LCATheme.margin) - ) + border = LCATheme.createSettingsBorder("Providers") // Providers cards providersPanel = JPanel().apply { diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/SettingsView.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/SettingsView.kt index b395f06f..24e4f599 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/SettingsView.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/SettingsView.kt @@ -5,6 +5,7 @@ import pl.jclab.refio.core.config.ConfigKeys import com.intellij.openapi.application.ApplicationManager import com.intellij.openapi.project.Project import com.intellij.ui.components.JBPanel +import com.intellij.ui.components.JBScrollPane import com.intellij.ui.components.JBTabbedPane import pl.jclab.refio.services.notification.NotificationService import pl.jclab.refio.ui.theme.LCATheme @@ -62,17 +63,17 @@ class SettingsView( // Tabbed pane for settings sections val tabbedPane = JBTabbedPane(SwingConstants.TOP).apply { - addTab("General", generalPanel) + addTab("General", createStyledTabPanel("General", generalPanel, scrollable = true)) addTab("Providers", providersPanel) - addTab("Models", modelsPanel) - addTab("Prompts", promptsPanel) - addTab("Context", contextSettingsPanel) - addTab("MCP Servers", mcpPanel) + addTab("Models", createStyledTabPanel("Models", modelsPanel)) + addTab("Prompts", createStyledTabPanel("Prompts", promptsPanel, scrollable = true)) + addTab("Context", createStyledTabPanel("Context", contextSettingsPanel)) + addTab("MCP Servers", createStyledTabPanel("MCP Servers", mcpPanel)) addTab("Documentation", docsPanel) addTab("Tools", toolsPanel) addTab("Subagents", subagentPanel) // AI subagents configuration addTab("Advanced", advancedPanel) // Merged: Advanced + Limits - addTab("Theme", themePanel) // LCATheme visual preview + addTab("Theme", createStyledTabPanel("Theme", themePanel)) // LCATheme visual preview } add(tabbedPane, BorderLayout.CENTER) @@ -129,6 +130,23 @@ class SettingsView( } } + private fun createStyledTabPanel(title: String, content: JComponent, scrollable: Boolean = false): JComponent { + val wrappedContent: JComponent = if (scrollable) { + JBScrollPane(content).apply { + border = LCATheme.emptyBorder() + verticalScrollBarPolicy = JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED + horizontalScrollBarPolicy = JScrollPane.HORIZONTAL_SCROLLBAR_NEVER + } + } else { + content + } + + return JBPanel>(BorderLayout()).apply { + border = LCATheme.createSettingsBorder(title) + add(wrappedContent, BorderLayout.CENTER) + } + } + /** * Auto-save mechanism - called when any setting changes */ diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/SubagentSettingsPanel.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/SubagentSettingsPanel.kt index a21e75af..6404283c 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/SubagentSettingsPanel.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/SubagentSettingsPanel.kt @@ -40,13 +40,7 @@ class SubagentSettingsPanel( private var isLoadingSubagents = false init { - border = BorderFactory.createCompoundBorder( - BorderFactory.createTitledBorder( - LCATheme.customLineBorder(LCATheme.borderColor, 1), - "Subagents" - ), - LCATheme.paddedBorder(16) - ) + border = LCATheme.createSettingsBorder("Subagents") // Main content val contentPanel = JBPanel>(BorderLayout()).apply { diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ThemeSettingsPanel.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ThemeSettingsPanel.kt index 4ef2234f..449f69ba 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ThemeSettingsPanel.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ThemeSettingsPanel.kt @@ -44,7 +44,7 @@ class ThemeSettingsPanel : JBPanel(BorderLayout()) { } init { - border = JBUI.Borders.empty(LCATheme.margin) + border = JBUI.Borders.empty() val mainPanel = JPanel().apply { layout = BoxLayout(this, BoxLayout.Y_AXIS) diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ToolsSettingsPanel.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ToolsSettingsPanel.kt index 2837a9bd..c0ee5b32 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ToolsSettingsPanel.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/settings/ToolsSettingsPanel.kt @@ -38,13 +38,7 @@ class ToolsSettingsPanel( private var isLoadingPermissions = false init { - border = BorderFactory.createCompoundBorder( - BorderFactory.createTitledBorder( - LCATheme.customLineBorder(LCATheme.borderColor, 1), - "Tools" - ), - LCATheme.paddedBorder(16) - ) + border = LCATheme.createSettingsBorder("Tools") val contentPanel = JPanel().apply { layout = BoxLayout(this, BoxLayout.Y_AXIS) @@ -156,13 +150,7 @@ class ToolsSettingsPanel( private fun createCommandRulesPanel(): JComponent { val panel = JBPanel>(BorderLayout()).apply { - border = BorderFactory.createCompoundBorder( - BorderFactory.createTitledBorder( - LCATheme.customLineBorder(LCATheme.borderColor, 1), - "Terminal Command Rules" - ), - LCATheme.paddedBorder(8) - ) + border = LCATheme.createSettingsBorder("Terminal Command Rules") preferredSize = Dimension(0, 310) } diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/theme/LCATheme.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/theme/LCATheme.kt index 5190d5b3..1cea216f 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/theme/LCATheme.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/theme/LCATheme.kt @@ -297,6 +297,9 @@ object LCATheme { /** Standard margin (16px scaled) */ val margin: Int get() = spacingXl + /** Standard inner padding for settings panels and titled sections */ + val settingsPanelPadding: Int get() = padding + /** Standard gap between elements (4px scaled) */ val gap: Int get() = spacingSm @@ -367,6 +370,12 @@ object LCATheme { titleColor = com.intellij.util.ui.UIUtil.getLabelForeground() } + /** Standard titled border used by settings panels */ + fun createSettingsBorder(title: String, innerPadding: Int = settingsPanelPadding) = compoundBorder( + createTitledBorder(title), + paddedBorder(innerPadding) + ) + /** Standard section border (line + padding) */ val sectionBorder get() = compoundBorder( customLineBorder(borderColor, 1), diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/toolwindow/RefioContentPanel.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/toolwindow/RefioContentPanel.kt new file mode 100644 index 00000000..48bdf8df --- /dev/null +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/toolwindow/RefioContentPanel.kt @@ -0,0 +1,376 @@ +package pl.jclab.refio.ui.toolwindow + +import com.intellij.openapi.Disposable +import pl.jclab.refio.core.logging.dualLogger +import com.intellij.openapi.project.Project +import com.intellij.ui.components.JBPanel +import com.intellij.ui.components.JBScrollPane +import pl.jclab.refio.ui.theme.LCATheme +import pl.jclab.refio.api.models.TaskMode +import pl.jclab.refio.services.session.SessionManager +import pl.jclab.refio.services.execution.StepExecutionService +import pl.jclab.refio.ui.components.chat.ChatView +import pl.jclab.refio.ui.components.chat.PromptInputPanel +import pl.jclab.refio.ui.components.toolbar.StatusBar +import pl.jclab.refio.ui.components.steps.StepsQueueView +import pl.jclab.refio.ui.components.logs.LogsPanel +import pl.jclab.refio.ui.components.debug.DebugPanel +import pl.jclab.refio.ui.components.context.ContextPanel +import pl.jclab.refio.ui.components.history.HistoryPanel +import pl.jclab.refio.ui.components.rag.RagViewPanel +import pl.jclab.refio.ui.execution.TurnStateStatusBar +import pl.jclab.refio.ui.settings.ApiLogsPanel +import pl.jclab.refio.ui.settings.SettingsView +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.SupervisorJob +import kotlinx.coroutines.cancel +import kotlinx.coroutines.launch +import java.awt.BorderLayout +import java.awt.CardLayout +import java.beans.PropertyChangeListener +import javax.swing.JPanel +import javax.swing.JTabbedPane +import javax.swing.SwingUtilities + +/** + * Content panel for Refio tool window — built on EDT after SessionManager has + * been created on a background thread. See [RefioMainPanel] for the wrapper + * that handles the async initialization flow. + */ +class RefioContentPanel( + private val project: Project, + private val sessionManager: SessionManager, + private val stepExecutionService: StepExecutionService +) : JBPanel(BorderLayout()), Disposable { + + private val logger = dualLogger("RefioContentPanel") + + private val cs = CoroutineScope(SupervisorJob()) + private val coreApiClient: pl.jclab.refio.core.api.CoreApiRouter = sessionManager.apiRouter + + private val chatView: ChatView + private val promptInputPanel: PromptInputPanel + private val chatScrollPane: JBScrollPane + private val statusBar: StatusBar + + private val stepsQueueView: StepsQueueView + private val contextPanel: ContextPanel + private val logsPanel: LogsPanel + private val debugPanel: DebugPanel + private val apiLogsPanel: ApiLogsPanel + private val historyPanel: HistoryPanel + private val ragViewPanel: RagViewPanel + + private val turnStateStatusBar: TurnStateStatusBar + + private val tabbedPane: JTabbedPane + private val middlePanel: JPanel + private val cardLayout: CardLayout + private val agentExecutionPanel: pl.jclab.refio.ui.components.agents.AgentExecutionPanel + + private var settingsView: SettingsView? = null + private val chatMessagesUpdatedListener = PropertyChangeListener { + scrollChatToBottom() + } + private val stopExecutionListener = PropertyChangeListener { evt -> + if (evt.newValue == true) { + stepExecutionService.stopExecution() + } + } + private val historySessionLoadedListener = PropertyChangeListener { evt -> + if (evt.newValue == true) { + showChatView() + } + } + private val historyBackToChatListener = PropertyChangeListener { evt -> + if (evt.newValue == true) { + showChatView() + } + } + private val advancedViewChangedListener = PropertyChangeListener { evt -> + if (evt.newValue is Boolean) { + setAdvancedViewEnabled(evt.newValue as Boolean) + } + } + + init { + + logger.info { "Initialize main plugin content panel" } + + chatView = ChatView(project) + promptInputPanel = PromptInputPanel(project, chatView, coreApiClient) + chatView.setContinuePromptHandler { + promptInputPanel.sendContinuePrompt() + } + statusBar = StatusBar(project) + stepsQueueView = StepsQueueView(project) + contextPanel = ContextPanel(project) + logsPanel = LogsPanel(project) + debugPanel = DebugPanel(project) + apiLogsPanel = ApiLogsPanel(coreApiClient, autoLoadOnInit = false) + historyPanel = HistoryPanel(project, autoLoadOnInit = false) + ragViewPanel = RagViewPanel(project) + turnStateStatusBar = TurnStateStatusBar() + agentExecutionPanel = pl.jclab.refio.ui.components.agents.AgentExecutionPanel() + debugPanel.agentTraceProvider = { agentExecutionPanel.toText() } + + cs.launch { + var turnStateJob: kotlinx.coroutines.Job? = null + sessionManager.activeSession.collect { _ -> + turnStateJob?.cancel() + val turnStateFlow = sessionManager.turnState + if (turnStateFlow != null) { + turnStateJob = cs.launch { + turnStateFlow.collect { snapshot -> + SwingUtilities.invokeLater { turnStateStatusBar.update(snapshot) } + } + } + } + } + } + + chatScrollPane = JBScrollPane(chatView).apply { + border = null + verticalScrollBarPolicy = javax.swing.JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED + horizontalScrollBarPolicy = javax.swing.JScrollPane.HORIZONTAL_SCROLLBAR_NEVER + background = LCATheme.backgroundColor + viewport.background = LCATheme.backgroundColor + } + + chatView.addPropertyChangeListener("messagesUpdated", chatMessagesUpdatedListener) + + val chatPanel = JPanel(BorderLayout()).apply { + background = LCATheme.backgroundColor + add(chatScrollPane, BorderLayout.CENTER) + add(promptInputPanel, BorderLayout.SOUTH) + } + + val stepsPanel = JPanel(BorderLayout()).apply { + add(turnStateStatusBar, BorderLayout.NORTH) + add(stepsQueueView, BorderLayout.CENTER) + } + + tabbedPane = JTabbedPane().apply { + addTab("Chat", chatPanel) + addTab("Execution", stepsPanel) + addTab("Context", contextPanel) + addTab("Agents", agentExecutionPanel) + addTab("RAG", ragViewPanel) + addTab("Debug", debugPanel) + addTab("Logs", logsPanel) + addTab("API", apiLogsPanel) + addChangeListener { + if (selectedIndex >= 0 && getComponentAt(selectedIndex) === apiLogsPanel) { + apiLogsPanel.ensureLoaded() + } + } + } + + val normalContentPanel = JPanel(BorderLayout()).apply { + add(tabbedPane, BorderLayout.CENTER) + border = LCATheme.paddedBorder(0, 0, 2, 0) + } + + cardLayout = CardLayout() + middlePanel = JPanel(cardLayout).apply { + add(normalContentPanel, "NORMAL") + add(historyPanel, "HISTORY") + } + + add(middlePanel, BorderLayout.CENTER) + add(statusBar, BorderLayout.SOUTH) + + cs.launch { + try { + val config = coreApiClient.configRouter.getConfig(section = "general", scope = "app") + val advancedView = (config.settings["advanced_view"] as? String).toBoolean() + + javax.swing.SwingUtilities.invokeLater { + setAdvancedViewEnabled(advancedView) + } + } catch (e: Exception) { + logger.error(e) { "Failed to load advanced view setting, using default: false" } + javax.swing.SwingUtilities.invokeLater { + setAdvancedViewEnabled(false) + } + } + } + + sessionManager.setStatusBar(statusBar) + + statusBar.addPropertyChangeListener("stopExecution", stopExecutionListener) + + historyPanel.onNavigateToChat = { showChatView() } + + historyPanel.addPropertyChangeListener("sessionLoaded", historySessionLoadedListener) + + historyPanel.addPropertyChangeListener("backToChat", historyBackToChatListener) + + cs.launch { + sessionManager.activeSession.collect { session -> + session?.let { updateStepsQueueVisibility(it.mode) } + session?.let { + agentExecutionPanel.subscribeToSession(sessionManager.apiRouter.agentEventBus, it.id) + } + } + } + + cs.launch { + sessionManager.subtasks.collect { + sessionManager.activeSession.value?.let { session -> + updateStepsQueueVisibility(session.mode) + } + } + } + } + + fun cycleMode() { + promptInputPanel.cycleMode() + } + + fun createNewSession() { + logger.info { "Creating new session" } + + cs.launch { + try { + sessionManager.cancelStreaming() + sessionManager.cancelExecution() + stepExecutionService.stopExecution() + logger.info { "Canceled running operations before creating new session" } + } catch (e: Exception) { + logger.warn(e) { "Failed to cancel operations (may not be running)" } + } + + val currentMode = promptInputPanel.getSelectedMode() + val executionMode = promptInputPanel.getCurrentExecutionMode() + sessionManager.createSession("Session (${currentMode.name})", currentMode, executionMode) + + SwingUtilities.invokeLater { + chatView.clearForNewSession() + showChatView() + } + } + } + + fun showHistory() { + toggleHistoryPanel() + } + + fun showSettings() { + openSettings() + } + + fun showHelp() { + logger.info { "Show help requested" } + com.intellij.ide.BrowserUtil.browse("https://github.com/jclab-joseph/refio") + } + + private fun scrollChatToBottom() { + javax.swing.SwingUtilities.invokeLater { + chatScrollPane.validate() + val scrollBar = chatScrollPane.verticalScrollBar + scrollBar.value = scrollBar.maximum + } + } + + private fun openSettings() { + logger.info { "Opening settings view" } + + if (settingsView == null) { + settingsView = SettingsView(project, coreApiClient) { + closeSettings() + } + + settingsView!!.addPropertyChangeListener("advancedViewChanged", advancedViewChangedListener) + + middlePanel.add(settingsView, "SETTINGS") + } + + cardLayout.show(middlePanel, "SETTINGS") + + middlePanel.revalidate() + middlePanel.repaint() + } + + private fun closeSettings() { + logger.info { "Closing settings view" } + + cardLayout.show(middlePanel, "NORMAL") + + tabbedPane.selectedIndex = 0 + + middlePanel.revalidate() + middlePanel.repaint() + + promptInputPanel.refreshModels() + } + + private fun toggleHistoryPanel() { + logger.info { "Toggling history panel" } + + cardLayout.show(middlePanel, "HISTORY") + + historyPanel.showHistory() + + middlePanel.revalidate() + middlePanel.repaint() + } + + private fun showChatView() { + logger.info { "Showing Chat view after new session creation" } + + cardLayout.show(middlePanel, "NORMAL") + + tabbedPane.selectedIndex = 0 + + middlePanel.revalidate() + middlePanel.repaint() + } + + private fun updateStepsQueueVisibility(@Suppress("UNUSED_PARAMETER") mode: TaskMode) { + } + + fun setAdvancedViewEnabled(enabled: Boolean) { + logger.info { "Setting advanced view: $enabled" } + + javax.swing.SwingUtilities.invokeLater { + if (enabled) { + if (tabbedPane.tabCount == 1) { + tabbedPane.addTab("Steps", stepsQueueView) + tabbedPane.addTab("Context", contextPanel) + tabbedPane.addTab("RAG", ragViewPanel) + tabbedPane.addTab("Logs", logsPanel) + tabbedPane.addTab("Debug", debugPanel) + tabbedPane.addTab("API Logs", apiLogsPanel) + } + tabbedPane.tabPlacement = JTabbedPane.TOP + } else { + while (tabbedPane.tabCount > 1) { + tabbedPane.removeTabAt(1) + } + tabbedPane.selectedIndex = 0 + } + + statusBar.setAdvancedViewEnabled(enabled) + + tabbedPane.revalidate() + tabbedPane.repaint() + } + } + + override fun dispose() { + cs.cancel() + chatView.removePropertyChangeListener("messagesUpdated", chatMessagesUpdatedListener) + statusBar.removePropertyChangeListener("stopExecution", stopExecutionListener) + historyPanel.removePropertyChangeListener("sessionLoaded", historySessionLoadedListener) + historyPanel.removePropertyChangeListener("backToChat", historyBackToChatListener) + settingsView?.removePropertyChangeListener("advancedViewChanged", advancedViewChangedListener) + chatView.dispose() + stepsQueueView.dispose() + contextPanel.dispose() + promptInputPanel.dispose() + statusBar.dispose() + logsPanel.dispose() + debugPanel.dispose() + } +} diff --git a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/toolwindow/RefioMainPanel.kt b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/toolwindow/RefioMainPanel.kt index 91692500..af0064d8 100644 --- a/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/toolwindow/RefioMainPanel.kt +++ b/intellij-plugin/src/main/kotlin/pl/jclab/refio/ui/toolwindow/RefioMainPanel.kt @@ -1,505 +1,135 @@ package pl.jclab.refio.ui.toolwindow import com.intellij.openapi.Disposable -import pl.jclab.refio.core.logging.dualLogger +import com.intellij.openapi.application.ApplicationManager +import com.intellij.openapi.application.ModalityState import com.intellij.openapi.project.Project +import com.intellij.ui.components.JBLabel import com.intellij.ui.components.JBPanel -import com.intellij.ui.components.JBScrollPane -import pl.jclab.refio.ui.theme.LCATheme -import pl.jclab.refio.api.models.TaskMode -import pl.jclab.refio.services.session.SessionManager -import pl.jclab.refio.services.execution.StepExecutionService -import pl.jclab.refio.ui.components.chat.ChatView -import pl.jclab.refio.ui.components.chat.PromptInputPanel -import pl.jclab.refio.ui.components.toolbar.StatusBar -import pl.jclab.refio.ui.components.steps.StepsQueueView -import pl.jclab.refio.ui.components.logs.LogsPanel -import pl.jclab.refio.ui.components.debug.DebugPanel -import pl.jclab.refio.ui.components.context.ContextPanel -import pl.jclab.refio.ui.components.history.HistoryPanel -import pl.jclab.refio.ui.components.rag.RagViewPanel -import pl.jclab.refio.ui.execution.TurnStateStatusBar -import pl.jclab.refio.ui.settings.ApiLogsPanel -import pl.jclab.refio.ui.settings.SettingsView -import pl.jclab.refio.core.api.CoreApiRouter -import com.intellij.openapi.actionSystem.ActionManager -import com.intellij.openapi.actionSystem.DefaultActionGroup -import com.intellij.openapi.actionSystem.Separator -import com.intellij.ui.awt.RelativePoint import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.SupervisorJob import kotlinx.coroutines.cancel import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext +import pl.jclab.refio.core.logging.dualLogger +import pl.jclab.refio.services.execution.StepExecutionService +import pl.jclab.refio.services.session.SessionManager +import pl.jclab.refio.ui.theme.LCATheme import java.awt.BorderLayout -import java.awt.CardLayout -import java.awt.Component -import java.awt.Dimension -import java.beans.PropertyChangeListener -import javax.swing.JPanel -import javax.swing.JTabbedPane -import javax.swing.SwingUtilities +import javax.swing.SwingConstants /** - * Main panel for Refio tool window - * Layout: - * - Tabbed Pane (Chat, Steps, Debug panels) - * - Prompt Input Panel (mode/model selectors, execution toggles, text area, send button) - * - Status Bar (bottom) + * Thin wrapper around [RefioContentPanel] that keeps the EDT free while + * [SessionManager] initializes. The previous implementation constructed the + * full UI inline, which pulled in project-level service creation (DB init, + * prompt seeding, migrations) on the EDT and produced multi-second freezes. + * + * Flow: + * 1. On EDT: show a lightweight "Initializing..." placeholder. + * 2. Off EDT: create [SessionManager] and [StepExecutionService]. + * 3. Back on EDT: swap placeholder for [RefioContentPanel]. * - * Title bar actions (+ New Session, History, Settings, Help) are added via setTitleActions() in RefioToolWindowFactory + * Public action methods queue their work: if the content panel isn't ready + * yet, the action is replayed once initialization completes. */ class RefioMainPanel(private val project: Project) : JBPanel(BorderLayout()), Disposable { private val logger = dualLogger("RefioMainPanel") - // Use EDT dispatcher for UI updates in IntelliJ - private val cs = CoroutineScope(SupervisorJob()) - private val sessionManager = SessionManager.getInstance(project) - private val stepExecutionService = StepExecutionService.getInstance(project) - private val coreApiClient: pl.jclab.refio.core.api.CoreApiRouter = sessionManager.apiRouter - - private val chatView: ChatView - private val promptInputPanel: PromptInputPanel - private val chatScrollPane: JBScrollPane - private val statusBar: StatusBar - - // private val splitPane: JSplitPane - private val stepsQueueView: StepsQueueView - private val contextPanel: ContextPanel - private val logsPanel: LogsPanel - private val debugPanel: DebugPanel - private val apiLogsPanel: ApiLogsPanel - private val historyPanel: HistoryPanel - private val ragViewPanel: RagViewPanel + private val cs = CoroutineScope(SupervisorJob() + Dispatchers.Default) - private val turnStateStatusBar: TurnStateStatusBar + @Volatile + private var content: RefioContentPanel? = null - private val tabbedPane: JTabbedPane - private val middlePanel: JPanel - private val cardLayout: CardLayout - private val agentExecutionPanel: pl.jclab.refio.ui.components.agents.AgentExecutionPanel + private val pendingActions = mutableListOf<(RefioContentPanel) -> Unit>() - // Cache SettingsView to avoid creating new instance each time - private var settingsView: SettingsView? = null - private val chatMessagesUpdatedListener = PropertyChangeListener { - scrollChatToBottom() - } - private val stopExecutionListener = PropertyChangeListener { evt -> - if (evt.newValue == true) { - stepExecutionService.stopExecution() - } - } - private val historySessionLoadedListener = PropertyChangeListener { evt -> - if (evt.newValue == true) { - showChatView() - } - } - private val historyBackToChatListener = PropertyChangeListener { evt -> - if (evt.newValue == true) { - showChatView() - } - } - private val advancedViewChangedListener = PropertyChangeListener { evt -> - if (evt.newValue is Boolean) { - setAdvancedViewEnabled(evt.newValue as Boolean) - } + private val loader = JBLabel("Initializing Refio…", SwingConstants.CENTER).apply { + background = LCATheme.backgroundColor + isOpaque = true } init { + background = LCATheme.backgroundColor + add(loader, BorderLayout.CENTER) - logger.info { "Initialize main plugin panel" } - - // Create components - note: order matters for cross-references - chatView = ChatView(project) - promptInputPanel = PromptInputPanel(project, chatView, coreApiClient) - chatView.setContinuePromptHandler { - promptInputPanel.sendContinuePrompt() - } - statusBar = StatusBar(project) - stepsQueueView = StepsQueueView(project) - contextPanel = ContextPanel(project) - logsPanel = LogsPanel(project) - debugPanel = DebugPanel(project) - apiLogsPanel = ApiLogsPanel(coreApiClient, autoLoadOnInit = false) - historyPanel = HistoryPanel(project, autoLoadOnInit = false) - ragViewPanel = RagViewPanel(project) - turnStateStatusBar = TurnStateStatusBar() - agentExecutionPanel = pl.jclab.refio.ui.components.agents.AgentExecutionPanel() - debugPanel.agentTraceProvider = { agentExecutionPanel.toText() } - - // Observe turn state for status bar. - // turnState may be null initially (AgentTurnLoop created lazily), so re-check on each session change. - cs.launch { - var turnStateJob: kotlinx.coroutines.Job? = null - sessionManager.activeSession.collect { _ -> - turnStateJob?.cancel() - val turnStateFlow = sessionManager.turnState - if (turnStateFlow != null) { - turnStateJob = cs.launch { - turnStateFlow.collect { snapshot -> - SwingUtilities.invokeLater { turnStateStatusBar.update(snapshot) } - } - } - } - } - } - - // Scroll pane for chat messages only - chatScrollPane = JBScrollPane(chatView).apply { - border = null - verticalScrollBarPolicy = javax.swing.JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED - horizontalScrollBarPolicy = javax.swing.JScrollPane.HORIZONTAL_SCROLLBAR_NEVER - background = LCATheme.backgroundColor - viewport.background = LCATheme.backgroundColor - } - - // Listen for messages update to scroll to bottom - chatView.addPropertyChangeListener("messagesUpdated", chatMessagesUpdatedListener) - - // Chat panel: scroll(messages) + promptInput (sticky bottom) - val chatPanel = JPanel(BorderLayout()).apply { - background = LCATheme.backgroundColor - add(chatScrollPane, BorderLayout.CENTER) - add(promptInputPanel, BorderLayout.SOUTH) - } - - // Steps panel: turn state status bar (IDLE/iteration) at top, steps view below (has its own internal scroll) - val stepsPanel = JPanel(BorderLayout()).apply { - add(turnStateStatusBar, BorderLayout.NORTH) - add(stepsQueueView, BorderLayout.CENTER) - } - - // Create tabbed pane - tabbedPane = JTabbedPane().apply { - addTab("Chat", chatPanel) - addTab("Execution", stepsPanel) - addTab("Context", contextPanel) - addTab("Agents", agentExecutionPanel) - addTab("RAG", ragViewPanel) - addTab("Debug", debugPanel) - addTab("Logs", logsPanel) - addTab("API", apiLogsPanel) - addChangeListener { - if (selectedIndex >= 0 && getComponentAt(selectedIndex) === apiLogsPanel) { - apiLogsPanel.ensureLoaded() - } - } - } - - // Create normal content panel - val normalContentPanel = JPanel(BorderLayout()).apply { - add(tabbedPane, BorderLayout.CENTER) - border = LCATheme.paddedBorder(0, 0, 2, 0) - } - - // Create middle panel with CardLayout for switching between views - cardLayout = CardLayout() - middlePanel = JPanel(cardLayout).apply { - add(normalContentPanel, "NORMAL") - add(historyPanel, "HISTORY") - // Settings will be added lazily when first opened - } - - add(middlePanel, BorderLayout.CENTER) - add(statusBar, BorderLayout.SOUTH) - - // Load advanced view setting from config on startup cs.launch { try { - val config = coreApiClient.configRouter.getConfig(section = "general", scope = "app") - val advancedView = (config.settings["advanced_view"] as? String).toBoolean() - - // Apply setting on EDT - javax.swing.SwingUtilities.invokeLater { - setAdvancedViewEnabled(advancedView) + val sessionManager = withContext(Dispatchers.IO) { + SessionManager.getInstance(project) } - } catch (e: Exception) { - logger.error(e) { "Failed to load advanced view setting, using default: false" } - // Start in simple mode by default if loading fails - javax.swing.SwingUtilities.invokeLater { - setAdvancedViewEnabled(false) + val stepExecutionService = withContext(Dispatchers.IO) { + StepExecutionService.getInstance(project) } - } - } - - // Register statusBar with SessionManager for execution progress updates - sessionManager.setStatusBar(statusBar) - - // Listen to status bar stop execution event - statusBar.addPropertyChangeListener("stopExecution", stopExecutionListener) - -// // Listen to toolbar property changes for history toggle -// toolbar.addPropertyChangeListener("showHistory") { evt -> -// if (evt.newValue == true) { -// toggleHistoryPanel() -// } -// } -// -// // Listen to toolbar property changes for new session creation -// toolbar.addPropertyChangeListener("newSessionCreated") { evt -> -// if (evt.newValue == true) { -// showChatView() -// } -// } - // Direct callback for reliable navigation back to chat - historyPanel.onNavigateToChat = { showChatView() } - - // Listen to historyPanel session loaded event to return to Chat view (legacy fallback) - historyPanel.addPropertyChangeListener("sessionLoaded", historySessionLoadedListener) - - // Listen to historyPanel back to chat event (legacy fallback) - historyPanel.addPropertyChangeListener("backToChat", historyBackToChatListener) - - // Listen to mode changes to show/hide steps queue - cs.launch { - sessionManager.activeSession.collect { session -> - session?.let { updateStepsQueueVisibility(it.mode) } - // Wire AgentExecutionPanel to the active session so Trace/Timeline - // get populated for single-agent runs too (not only multi-agent). - session?.let { - agentExecutionPanel.subscribeToSession(sessionManager.apiRouter.agentEventBus, it.id) - } + ApplicationManager.getApplication().invokeLater({ + installContent(sessionManager, stepExecutionService) + }, ModalityState.any()) + } catch (e: Throwable) { + logger.error(e) { "Failed to initialize Refio tool window" } + ApplicationManager.getApplication().invokeLater({ + loader.text = "Failed to initialize Refio: ${e.message}" + }, ModalityState.any()) } } - - // Listen to subtasks changes to update steps queue visibility - cs.launch { - sessionManager.subtasks.collect { - sessionManager.activeSession.value?.let { session -> - updateStepsQueueVisibility(session.mode) - } - } - } } - /** - * Cycle to next mode (triggered by Alt+M action) - */ - fun cycleMode() { - promptInputPanel.cycleMode() - } - - /** - * Public method for toolbar action: Create new session - */ - fun createNewSession() { - logger.info { "Creating new session" } + private fun installContent( + sessionManager: SessionManager, + stepExecutionService: StepExecutionService + ) { + logger.info { "Installing Refio content panel on EDT" } - cs.launch { - // Cancel any running operations before creating new session - try { - sessionManager.cancelStreaming() - sessionManager.cancelExecution() - stepExecutionService.stopExecution() - logger.info { "Canceled running operations before creating new session" } - } catch (e: Exception) { - logger.warn(e) { "Failed to cancel operations (may not be running)" } - } + val panel = RefioContentPanel(project, sessionManager, stepExecutionService) - val currentMode = promptInputPanel.getSelectedMode() - val executionMode = promptInputPanel.getCurrentExecutionMode() - sessionManager.createSession("Session (${currentMode.name})", currentMode, executionMode) + remove(loader) + add(panel, BorderLayout.CENTER) + revalidate() + repaint() - // Switch to Chat view - SwingUtilities.invokeLater { - showChatView() - } + val actionsToReplay: List<(RefioContentPanel) -> Unit> + synchronized(pendingActions) { + content = panel + actionsToReplay = pendingActions.toList() + pendingActions.clear() } + actionsToReplay.forEach { it(panel) } } - /** - * Public method for toolbar action: Show history panel - */ - fun showHistory() { - toggleHistoryPanel() - } - - /** - * Public method for toolbar action: Show settings view - */ - fun showSettings() { - openSettings() - } - - /** - * Public method for toolbar action: Show help (opens GitHub docs) - */ - fun showHelp() { - logger.info { "Show help requested" } - com.intellij.ide.BrowserUtil.browse("https://github.com/jclab-joseph/refio") - } - - /** - * Scroll chat to bottom (show latest messages). - * Forces synchronous layout via validate() before reading scroll sizes - * to prevent the visual "jump" caused by stale layout metrics. - */ - private fun scrollChatToBottom() { - javax.swing.SwingUtilities.invokeLater { - chatScrollPane.validate() - val scrollBar = chatScrollPane.verticalScrollBar - scrollBar.value = scrollBar.maximum + private fun run(action: (RefioContentPanel) -> Unit) { + val ready = content + if (ready != null) { + action(ready) + return } - } - - /** - * Open Settings view (switches to SETTINGS card in CardLayout) - */ - private fun openSettings() { - logger.info { "Opening settings view" } - - // Create settings view only once (lazy initialization) - if (settingsView == null) { - settingsView = SettingsView(project, coreApiClient) { - closeSettings() + synchronized(pendingActions) { + val current = content + if (current != null) { + current.let(action) + } else { + pendingActions.add(action) } - - // Listen to advanced view changes - settingsView!!.addPropertyChangeListener("advancedViewChanged", advancedViewChangedListener) - - // Add settings to CardLayout (only first time) - middlePanel.add(settingsView, "SETTINGS") } - - // Switch to settings card - cardLayout.show(middlePanel, "SETTINGS") - - // Refresh UI - middlePanel.revalidate() - middlePanel.repaint() } - /** - * Close Settings view and return to normal content (Chat view) - */ - private fun closeSettings() { - logger.info { "Closing settings view" } + fun cycleMode() = run { it.cycleMode() } - // Switch back to normal card (Chat view) - cardLayout.show(middlePanel, "NORMAL") + fun createNewSession() = run { it.createNewSession() } - // Switch to Chat tab (index 0) - tabbedPane.selectedIndex = 0 + fun showHistory() = run { it.showHistory() } - // Refresh UI - middlePanel.revalidate() - middlePanel.repaint() + fun showSettings() = run { it.showSettings() } - // Refresh model list in dropdown (visibility may have changed) - // Note: refreshModels() will preserve user's selected model, not override with default - promptInputPanel.refreshModels() - } - - /** - * Toggle history panel visibility - * US-204: Show history panel instead of normal content (not as overlay) - */ - private fun toggleHistoryPanel() { - logger.info { "Toggling history panel" } - - // Switch to history card - cardLayout.show(middlePanel, "HISTORY") - - // Load sessions when showing history - historyPanel.showHistory() - - middlePanel.revalidate() - middlePanel.repaint() - } - - /** - * Show normal content (hide history panel) - */ - private fun showNormalContent() { - logger.info { "Showing normal content" } - - // Switch to normal card - cardLayout.show(middlePanel, "NORMAL") - - middlePanel.revalidate() - middlePanel.repaint() - } - - /** - * Show Chat view (used when creating new session from History) - */ - private fun showChatView() { - logger.info { "Showing Chat view after new session creation" } - - // Switch to normal card (hide history) - cardLayout.show(middlePanel, "NORMAL") - - // Switch to Chat tab (index 0) - tabbedPane.selectedIndex = 0 - - middlePanel.revalidate() - middlePanel.repaint() - } - - private fun updateStepsQueueVisibility(@Suppress("UNUSED_PARAMETER") mode: TaskMode) { - // No-op: using tabbed pane layout — steps are always accessible via Steps tab. - // If split pane layout is re-enabled, add show/hide logic here based on mode. + fun showHelp() { + // Help is independent of content initialization — open browser directly. + logger.info { "Show help requested" } + com.intellij.ide.BrowserUtil.browse("https://github.com/jclab-joseph/refio") } - /** - * Enable or disable advanced view - * - Simple mode: Show only Chat tab, hide tab bar - * - Advanced mode: Show all tabs with tab bar - */ - fun setAdvancedViewEnabled(enabled: Boolean) { - logger.info { "Setting advanced view: $enabled" } - - javax.swing.SwingUtilities.invokeLater { - if (enabled) { - // Advanced mode: Show all tabs - // Add tabs back if they were removed (check if tab count is only 1) - if (tabbedPane.tabCount == 1) { - tabbedPane.addTab("Steps", stepsQueueView) - tabbedPane.addTab("Context", contextPanel) - tabbedPane.addTab("RAG", ragViewPanel) - tabbedPane.addTab("Logs", logsPanel) - tabbedPane.addTab("Debug", debugPanel) - tabbedPane.addTab("API Logs", apiLogsPanel) - } - // Show tab bar by setting tab placement - tabbedPane.tabPlacement = JTabbedPane.TOP - } else { - // Simple mode: Remove all tabs except Chat (index 0) - while (tabbedPane.tabCount > 1) { - tabbedPane.removeTabAt(1) - } - // Hide tab bar by setting tab placement to hidden (hack: set height to 0) - // Note: There's no direct way to hide tabs in Swing, so we remove them instead - tabbedPane.selectedIndex = 0 - } - - // Update status bar as well - statusBar.setAdvancedViewEnabled(enabled) - - tabbedPane.revalidate() - tabbedPane.repaint() - } - } + fun setAdvancedViewEnabled(enabled: Boolean) = run { it.setAdvancedViewEnabled(enabled) } - /** - * Dispose resources when tool window is closed - */ override fun dispose() { cs.cancel() - chatView.removePropertyChangeListener("messagesUpdated", chatMessagesUpdatedListener) - statusBar.removePropertyChangeListener("stopExecution", stopExecutionListener) - historyPanel.removePropertyChangeListener("sessionLoaded", historySessionLoadedListener) - historyPanel.removePropertyChangeListener("backToChat", historyBackToChatListener) - settingsView?.removePropertyChangeListener("advancedViewChanged", advancedViewChangedListener) - chatView.dispose() - stepsQueueView.dispose() - contextPanel.dispose() - promptInputPanel.dispose() - statusBar.dispose() - logsPanel.dispose() - debugPanel.dispose() - // historyPanel doesn't have dispose() yet - no resources to clean up + content?.dispose() } }