diff --git a/.aiignore b/.aiignore
index e44d2066..9b950707 100644
--- a/.aiignore
+++ b/.aiignore
@@ -25,3 +25,4 @@ GEMINI.md
.gitignore
.aiignore
_example
+.intellijPlatform
diff --git a/.gitignore b/.gitignore
index b4c8f6f3..e3c4f957 100644
--- a/.gitignore
+++ b/.gitignore
@@ -86,3 +86,4 @@ yarn-error.log*
/docs/tasks/
/.gradle-user-home-fix/
/.intellijPlatform/
+/run-ai.bat
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bcb80344..091e66bb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,38 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
---
+## [0.0.1.8] - 2026-04-26
+
+### Added
+
+- **Native function calling** — agents can now use providers' structured `tools` API instead of the JSON-in-text envelope. Opt-in per model via `tools.native_tools: auto | always | never` in `config.yaml`; default is `auto` (enabled for models with `supportsFunctionCalling=true`).
+ - OpenAI (Chat Completions + Responses API) — strict-mode schema sanitization, `tool_choice: auto`.
+ - Anthropic — `tools` array + `content[].tool_use` response parsing; schema sanitized (composition keywords stripped).
+ - Gemini — `functionDeclarations` array + `functionCall` response parsing; Gemini-specific type normalization.
+- `ToolSchemaSanitizer` — provider-aware JSON Schema normalization (OpenAI strict compatibility check, Anthropic forbidden-keyword stripping, Gemini single-type coercion and nullable fields).
+- `NativeToolsFallbackTracker` — session-scoped in-memory registry; if a provider returns 400 "tools not supported", the model is marked and all subsequent requests in the same process skip native tools automatically.
+- `NativeToolsResolver` (`NativeToolsMode` enum: `AUTO / ALWAYS / NEVER`) — central decision logic with precedence: fallback cache → NEVER → ALWAYS → `ModelDefinition.supportsFunctionCalling`.
+- XML tag fallback in `ToolCallParser` — recovers tool calls from `…` pseudo-tags emitted by weak local models (qwen<70B, some Gemma builds) that bypass the native API entirely. Warns on use.
+- New models: **GPT-5.5**, **GPT-5.4 Nano**, **Qwen 3.6 35B/27B** (Ollama), **Qwen 3-next 80B**, **gpt-oss-safeguard 20B/120B**, OpenRouter `qwen/*` patterns.
+- `claude-sonnet-4-6` added to `SupportedModels`.
+- Per-iteration token and cost metrics persisted directly to the task row so the History panel and live stats bar show running totals without aggregating from message metadata.
+
+### Changed
+
+- `AgentTurnLoop` auto-falls back to the JSON-in-text path when a provider throws `ToolsNotSupportedException` — prompt is rebuilt without `` and the JSON envelope contract; no user action needed.
+- When native tools are active the `response_format` / `json_object` override is suppressed (incompatible with function-calling mode on most providers).
+- `TurnLLMCaller` accepts `nativeToolSchemas` and forwards them as `native_tools` in `kwargs`; retry handler also propagates `thinking`, `reasoningEffort`, and `noEgressEnabled`.
+- Subagent profiles filter the native `tools` array to match their `` list — prevents the model calling tools the harness would reject.
+- `ToolCallParser.extractToolCalls` pre-guard (`startsWith("{")`) removed — some models (e.g. glm-4.7-flash) emit prose before the JSON envelope, which the brace-matching strategy handles correctly. All-strategies-failed log downgraded from `WARN` to `DEBUG`.
+
+### Fixed
+
+- Anthropic tool results were mapped to `role: "assistant"` — now correctly mapped to `role: "user"`, fixing HTTP 400 rejections from claude-opus-4-6 and newer models that treat assistant-role tool results as prefill.
+- `temperature` parameter no longer sent to models that deprecated it (`claude-opus-4-7`, `claude-opus-4-6`, `gpt-5.5`, `gpt-5.4-nano`) — removes spurious provider warnings.
+- Anthropic streaming: `content_block_start` for `tool_use` blocks now captures the call `id` alongside the name.
+
+---
+
## [0.0.1.7] - 2025-04-17
### Added
diff --git a/README.md b/README.md
index 30b8575c..02095f3f 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
[](LICENSE)
[](https://www.jetbrains.com/idea/)
-[](CHANGELOG.md)
+[](CHANGELOG.md)
[](docs/ROADMAP.md)
**Open-source AI coding plugin for IntelliJ IDEA, built in Kotlin.**
@@ -216,12 +216,12 @@ See [docs/config.md](docs/config.md) for full configuration reference.
## Project status
-| | |
-|---|---|
-| **Version** | 0.0.1.5 |
-| **Stage** | Early-stage — active development |
-| **License** | MIT |
-| **Community** | Small, growing — PRs and issues welcome |
+| | |
+|---|------------------------------------------|
+| **Version** | 0.0.1.8 |
+| **Stage** | Early-stage — active development |
+| **License** | MIT |
+| **Community** | Small, growing — PRs and issues welcome |
| **Change cadence** | Fast. Breaking changes possible pre-1.0. |
---
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/api/ApiModels.kt b/core/src/main/kotlin/pl/jclab/refio/core/api/ApiModels.kt
index fd9b8482..349338d7 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/api/ApiModels.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/api/ApiModels.kt
@@ -444,7 +444,9 @@ data class ProjectContextResponse(
// Active LLM request preview (exact runtime shape for next call)
val activeLlmRequestPrompt: String? = null,
// Auxiliary prompt preview (tool/system/user templates not in active request)
- val auxiliaryPromptsPreview: String? = null
+ val auxiliaryPromptsPreview: String? = null,
+ // Raw context prompt as produced by ContextService.buildLLMContextPrompt (no system prompt, no messages)
+ val rawContextPrompt: String? = null
)
/**
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/api/CoreApiRouter.kt b/core/src/main/kotlin/pl/jclab/refio/core/api/CoreApiRouter.kt
index 542cdb28..72e69df2 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/api/CoreApiRouter.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/api/CoreApiRouter.kt
@@ -113,6 +113,7 @@ class CoreApiRouter(
configService = configService,
ragRepository = persistence.ragRepository,
snapshotRepository = persistence.snapshotRepository,
+ snapshotGroupRepository = persistence.snapshotGroupRepository,
analysisReportRepository = persistence.projectAnalysisReportRepository,
taskRepository = taskRepository,
chatMessageRepository = persistence.chatMessageRepository,
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AgentTurnLoopFactory.kt b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AgentTurnLoopFactory.kt
index 6dc27023..502bddc7 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AgentTurnLoopFactory.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AgentTurnLoopFactory.kt
@@ -125,7 +125,8 @@ internal class AgentTurnLoopFactory(
llmRetryHandler = null,
workingMemoryIntegration = workingMemoryIntegration,
agentEventBus = agentEventBus,
- hookService = hookService
+ hookService = hookService,
+ toolPermissionsService = toolPermissionsService
)
}
}
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AnalysisStack.kt b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AnalysisStack.kt
index b97ec57c..11fa998a 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AnalysisStack.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/AnalysisStack.kt
@@ -5,6 +5,7 @@ import pl.jclab.refio.core.config.ConfigKeys
import pl.jclab.refio.core.db.repositories.ChatMessageRepository
import pl.jclab.refio.core.db.repositories.ProjectAnalysisReportRepository
import pl.jclab.refio.core.db.repositories.RagRepository
+import pl.jclab.refio.core.db.repositories.SnapshotGroupRepository
import pl.jclab.refio.core.db.repositories.SnapshotRepository
import pl.jclab.refio.core.db.repositories.SubtaskRepository
import pl.jclab.refio.core.db.repositories.TaskRepository
@@ -26,6 +27,7 @@ internal class AnalysisStack(
private val configService: ConfigService,
private val ragRepository: RagRepository,
snapshotRepository: SnapshotRepository,
+ snapshotGroupRepository: SnapshotGroupRepository,
analysisReportRepository: ProjectAnalysisReportRepository,
taskRepository: TaskRepository,
chatMessageRepository: ChatMessageRepository,
@@ -104,7 +106,7 @@ internal class AnalysisStack(
} else null
val snapshotService: SnapshotService? = if (projectRoot != null) {
- SnapshotService(snapshotRepository, projectRoot)
+ SnapshotService(snapshotRepository, snapshotGroupRepository, projectRoot)
} else null
/**
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/DomainRouters.kt b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/DomainRouters.kt
index 723acff5..9a717fc6 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/DomainRouters.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/DomainRouters.kt
@@ -193,6 +193,7 @@ internal class DomainRouters(
projectAnalyzer = projectAnalyzer,
richProjectAnalysisEngine = richProjectAnalysisEngine,
promptSectionProviders = promptSectionProviders,
+ configService = configService,
)
}
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/PersistenceModule.kt b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/PersistenceModule.kt
index 1897d078..6acb4fda 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/api/modules/PersistenceModule.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/api/modules/PersistenceModule.kt
@@ -10,6 +10,7 @@ import pl.jclab.refio.core.db.repositories.DocumentationRepository
import pl.jclab.refio.core.db.repositories.ProjectAnalysisReportRepository
import pl.jclab.refio.core.db.repositories.PromptsRepository
import pl.jclab.refio.core.db.repositories.RagRepository
+import pl.jclab.refio.core.db.repositories.SnapshotGroupRepository
import pl.jclab.refio.core.db.repositories.SnapshotRepository
import pl.jclab.refio.core.db.repositories.SubtaskRepository
import pl.jclab.refio.core.db.repositories.TaskRepository
@@ -29,6 +30,7 @@ class PersistenceModule {
val ragRepository = RagRepository()
val documentationRepository = DocumentationRepository()
val snapshotRepository = SnapshotRepository()
+ val snapshotGroupRepository = SnapshotGroupRepository()
val projectAnalysisReportRepository = ProjectAnalysisReportRepository()
val agentSessionRepository = AgentSessionRepository()
val agentInstanceRepository = AgentInstanceRepository()
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/api/routers/ProjectContextRouter.kt b/core/src/main/kotlin/pl/jclab/refio/core/api/routers/ProjectContextRouter.kt
index 49d1e8f6..66f5882f 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/api/routers/ProjectContextRouter.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/api/routers/ProjectContextRouter.kt
@@ -10,10 +10,18 @@ import pl.jclab.refio.core.llm.LLMMessage
import pl.jclab.refio.core.llm.LLMMessageMapper
import pl.jclab.refio.core.llm.TokenEstimator
import pl.jclab.refio.core.models.context.*
+import pl.jclab.refio.core.llm.ModelDefinitions
+import pl.jclab.refio.core.llm.NativeToolsFallbackTracker
+import pl.jclab.refio.core.llm.parseNativeToolsMode
+import pl.jclab.refio.core.llm.shouldUseNativeTools
+import pl.jclab.refio.core.services.ConfigService
import pl.jclab.refio.core.services.ContextService
import pl.jclab.refio.core.services.ProjectAnalyzerService
import pl.jclab.refio.core.services.PromptsService
import pl.jclab.refio.core.services.analysis.project.RichProjectAnalysisEngine
+import pl.jclab.refio.core.services.turn.nativeToolsDescriptionOverride
+import pl.jclab.refio.core.config.ConfigKeys
+import pl.jclab.refio.core.api.ModelOperation
import pl.jclab.refio.core.logging.dualLogger
import java.nio.file.Path
@@ -37,7 +45,8 @@ class ProjectContextRouter(
* (e.g. , ) that the runtime prompt has.
* Pass emptyList() to preserve the legacy stripped preview.
*/
- private val promptSectionProviders: List = emptyList()
+ private val promptSectionProviders: List = emptyList(),
+ private val configService: ConfigService? = null,
) : Router {
override suspend fun initialize() {
@@ -48,6 +57,42 @@ class ProjectContextRouter(
logger.info { "[ProjectContextRouter] Shutting down" }
}
+ private fun resolveResponseContractFragment(nativeToolsActive: Boolean): String {
+ val name = if (nativeToolsActive) "response-contract-native" else "response-contract-json"
+ return promptsService.getFragment(name)
+ }
+
+ /**
+ * Mirror of [TurnPromptBuilder.resolveMultiAgentSection] for preview rendering. Returns the
+ * full delegation-guidance block when the caller can actually invoke subagents — empty
+ * string otherwise. ProjectContextRouter previews the "main" run (no profile overrides),
+ * so only the mode-level tool list is checked for `invoke_subagent`.
+ */
+ private fun resolveMultiAgentSectionFragment(mode: TaskMode, taskId: String): String {
+ val tools = toolDescriptionBuilder.getToolsForMode(mode, taskId)
+ val hasInvokeSubagent = tools.any { it.name.equals("invoke_subagent", ignoreCase = true) }
+ if (!hasInvokeSubagent) return ""
+ val fragmentName = when (mode) {
+ TaskMode.PLAN -> "multi-agent-plan"
+ TaskMode.AGENT -> "multi-agent-agent"
+ TaskMode.CHAT -> return ""
+ }
+ return promptsService.getFragment(fragmentName)
+ }
+
+ private fun isNativeToolsActiveForTask(mode: TaskMode, taskId: String): Boolean {
+ if (configService == null) return false
+ val operation = when (mode) {
+ TaskMode.PLAN -> ModelOperation.PLAN
+ TaskMode.AGENT -> ModelOperation.CODING
+ TaskMode.CHAT -> return false
+ }
+ val (modelId, providerName) = configService.getModel(operation, taskId)
+ val nativeToolsMode = parseNativeToolsMode(configService.getTyped(ConfigKeys.NATIVE_TOOLS_MODE, taskId))
+ val modelDef = ModelDefinitions.getDefinition(providerName, modelId)
+ return shouldUseNativeTools(nativeToolsMode, modelDef, modelId, NativeToolsFallbackTracker.getFallbackSet())
+ }
+
/**
* Get project context (for UI visualization).
*/
@@ -199,7 +244,7 @@ class ProjectContextRouter(
): RuntimePromptPreview {
return when (task.mode) {
TaskMode.CHAT -> buildChatRuntimePromptPreview(chatHistory, pendingUserInput, contextPrompt, contextSectionTokens)
- TaskMode.PLAN -> buildPlanRuntimePromptPreview(task, chatHistory, pendingUserInput, contextPrompt, contextSectionTokens)
+ TaskMode.PLAN -> buildPlanRuntimePromptPreview(task, chatHistory, pendingUserInput, contextPrompt, userContextRefs, contextSectionTokens)
TaskMode.AGENT -> buildAgentRuntimePromptPreview(task, chatHistory, pendingUserInput, contextPrompt, userContextRefs, contextSectionTokens)
}
}
@@ -248,30 +293,63 @@ class ProjectContextRouter(
chatHistory: List,
pendingUserInput: String?,
contextPrompt: String,
+ userContextRefs: List,
contextSectionTokens: Map
): RuntimePromptPreview {
- val toolDescriptions = toolDescriptionBuilder.getToolDescriptions(TaskMode.PLAN, task.id)
- val validToolNames = toolDescriptionBuilder.getValidToolNames(TaskMode.PLAN, task.id)
- val basePrompt = promptsService.getSystemPrompt(
- type = PromptType.SYSTEM_PLAN,
- variables = mapOf("tool_descriptions" to toolDescriptions, "valid_tool_names" to validToolNames)
- )
- val systemPrompt = appendProviderSections(basePrompt, TaskMode.PLAN, task.id, iteration = 0)
+ val nativeTools = isNativeToolsActiveForTask(TaskMode.PLAN, task.id)
+ val responseContract = resolveResponseContractFragment(nativeTools)
+ val multiAgentSection = resolveMultiAgentSectionFragment(TaskMode.PLAN, task.id)
+ val systemPrompt = if (nativeTools) {
+ val basePrompt = promptsService.getSystemPrompt(
+ type = PromptType.SYSTEM_PLAN,
+ variables = mapOf(
+ "tool_descriptions" to nativeToolsDescriptionOverride(),
+ "response_contract" to responseContract,
+ "multi_agent_section" to multiAgentSection
+ )
+ )
+ appendProviderSections(basePrompt, TaskMode.PLAN, task.id, iteration = 0)
+ } else {
+ val toolDescriptions = toolDescriptionBuilder.getToolDescriptions(TaskMode.PLAN, task.id)
+ val validToolNames = toolDescriptionBuilder.getValidToolNames(TaskMode.PLAN, task.id)
+ val basePrompt = promptsService.getSystemPrompt(
+ type = PromptType.SYSTEM_PLAN,
+ variables = mapOf(
+ "tool_descriptions" to toolDescriptions,
+ "valid_tool_names" to validToolNames,
+ "response_contract" to responseContract,
+ "multi_agent_section" to multiAgentSection
+ )
+ )
+ appendProviderSections(basePrompt, TaskMode.PLAN, task.id, iteration = 0)
+ }
- val requestText = pendingUserInput
- ?: chatHistory.lastOrNull { it.role == MessageRole.USER }?.content ?: ""
- val userPrompt = buildString {
- appendLine("User request:")
- appendLine(requestText)
- appendLine()
- appendLine("Create a detailed execution plan as JSON.")
- }.trim()
- val messages = listOf(LLMMessage(role = "user", content = userPrompt))
+ // For native tools, show the same multi-message structure that AgentTurnLoop actually sends
+ // (ContextService builds stable context + working memory + conversation history).
+ // For the legacy JSON path, keep the old single-message "Create a detailed execution plan" format.
+ val messages: List
+ val effectiveContextPrompt: String
+ if (nativeTools) {
+ messages = buildAgentMessagesForPreview(task.id, chatHistory, pendingUserInput, userContextRefs,
+ pendingUserInput ?: chatHistory.lastOrNull { it.role == MessageRole.USER }?.content)
+ effectiveContextPrompt = ""
+ } else {
+ val requestText = pendingUserInput
+ ?: chatHistory.lastOrNull { it.role == MessageRole.USER }?.content ?: ""
+ val userPrompt = buildString {
+ appendLine("User request:")
+ appendLine(requestText)
+ appendLine()
+ appendLine("Create a detailed execution plan as JSON.")
+ }.trim()
+ messages = listOf(LLMMessage(role = "user", content = userPrompt))
+ effectiveContextPrompt = contextPrompt
+ }
val preparedPayload = LLMClient.prepareRequestPayload(
messages = messages,
systemPrompt = systemPrompt,
- contextContent = contextPrompt.takeIf { it.isNotBlank() },
+ contextContent = effectiveContextPrompt.takeIf { it.isNotBlank() },
systemMessages = emptyList()
)
val activeTokens = preparedPayload.estimatedInputTokens
@@ -282,7 +360,7 @@ class ProjectContextRouter(
systemPromptForBreakdown = systemPrompt,
systemMessages = emptyList(),
messages = messages,
- hasInjectedContext = contextPrompt.isNotBlank()
+ hasInjectedContext = effectiveContextPrompt.isNotBlank()
)
val preview = renderActiveRequestPreview(
@@ -302,18 +380,37 @@ class ProjectContextRouter(
userContextRefs: List,
contextSectionTokens: Map
): RuntimePromptPreview {
- val toolDescriptions = toolDescriptionBuilder.getToolDescriptions(TaskMode.AGENT, task.id)
- val toolSelectionMatrix = toolDescriptionBuilder.getToolSelectionMatrix(TaskMode.AGENT, task.id)
- val baseSystemPromptRaw = promptsService.getSystemPrompt(
- type = PromptType.SYSTEM_AGENT,
- variables = mapOf(
- "tool_descriptions" to toolDescriptions,
- "tool_selection_matrix" to toolSelectionMatrix
+ val nativeToolsAgent = isNativeToolsActiveForTask(TaskMode.AGENT, task.id)
+ val responseContractAgent = resolveResponseContractFragment(nativeToolsAgent)
+ val multiAgentSectionAgent = resolveMultiAgentSectionFragment(TaskMode.AGENT, task.id)
+ val baseSystemPrompt = if (nativeToolsAgent) {
+ val toolSelectionMatrix = toolDescriptionBuilder.getToolSelectionMatrix(TaskMode.AGENT, task.id)
+ val baseRaw = promptsService.getSystemPrompt(
+ type = PromptType.SYSTEM_AGENT,
+ variables = mapOf(
+ "tool_descriptions" to nativeToolsDescriptionOverride(),
+ "tool_selection_matrix" to toolSelectionMatrix,
+ "response_contract" to responseContractAgent,
+ "multi_agent_section" to multiAgentSectionAgent
+ )
)
- )
- // Apply the same section providers used by TurnPromptBuilder in runtime
- // so the preview reflects the real prompt (including ).
- val baseSystemPrompt = appendProviderSections(baseSystemPromptRaw, TaskMode.AGENT, task.id, iteration = 0)
+ appendProviderSections(baseRaw, TaskMode.AGENT, task.id, iteration = 0)
+ } else {
+ val toolDescriptions = toolDescriptionBuilder.getToolDescriptions(TaskMode.AGENT, task.id)
+ val toolSelectionMatrix = toolDescriptionBuilder.getToolSelectionMatrix(TaskMode.AGENT, task.id)
+ val baseRaw = promptsService.getSystemPrompt(
+ type = PromptType.SYSTEM_AGENT,
+ variables = mapOf(
+ "tool_descriptions" to toolDescriptions,
+ "tool_selection_matrix" to toolSelectionMatrix,
+ "response_contract" to responseContractAgent,
+ "multi_agent_section" to multiAgentSectionAgent
+ )
+ )
+ // Apply the same section providers used by TurnPromptBuilder in runtime
+ // so the preview reflects the real prompt (including ).
+ appendProviderSections(baseRaw, TaskMode.AGENT, task.id, iteration = 0)
+ }
val runtimeSystemPrompt = if (contextPrompt.isNotBlank()) {
"$baseSystemPrompt\n\n\n$contextPrompt\n"
@@ -530,7 +627,17 @@ class ProjectContextRouter(
.filter { it != activeType }
.sortedBy { it.name }
.mapNotNull { type ->
- val content = runCatching { promptsService.getSystemPrompt(type) }.getOrNull()?.takeIf { it.isNotBlank() }
+ // Auxiliary previews are non-active modes shown for reference. Supply blank
+ // placeholders for all declared variables so `PromptTemplate.render` doesn't
+ // throw — content quality doesn't matter here, we just need it to render.
+ val auxVariables = mapOf(
+ "tool_descriptions" to "",
+ "tool_selection_matrix" to "",
+ "valid_tool_names" to "",
+ "response_contract" to "",
+ "multi_agent_section" to ""
+ )
+ val content = runCatching { promptsService.getSystemPrompt(type, auxVariables) }.getOrNull()?.takeIf { it.isNotBlank() }
?: return@mapNotNull null
val role = if (type.name.endsWith("_USER")) "user_template" else "system"
PromptPreviewEntry(source = type.name, role = role, content = content, estimatedTokens = TokenEstimator.estimateTokens(content))
@@ -633,7 +740,8 @@ class ProjectContextRouter(
taskRequirementsPrompt = taskRequirementsPrompt,
recentWorkPrompt = recentWorkPrompt,
activeLlmRequestPrompt = activeLlmPreviewPrompt,
- auxiliaryPromptsPreview = auxiliaryPreviewPrompt
+ auxiliaryPromptsPreview = auxiliaryPreviewPrompt,
+ rawContextPrompt = llmPrompt
)
}
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigKeys.kt b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigKeys.kt
index 3d3abef0..3b8ea2a4 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigKeys.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigKeys.kt
@@ -163,6 +163,22 @@ object ConfigKeys {
default = false
)
+ // ==================== TOOLS ====================
+
+ /**
+ * Native function-calling mode.
+ * - "auto" (default): use native tools iff ModelDefinition.supportsFunctionCalling=true
+ * - "always": force native tools even on unlisted models (debug / opt-in)
+ * - "never" : force JSON-in-text path (escape hatch)
+ */
+ val NATIVE_TOOLS_MODE = ConfigKey(
+ key = "tools.native_tools",
+ parser = { raw -> raw.trim().lowercase().takeIf { it in setOf("auto", "always", "never") } },
+ default = "auto",
+ validator = { it in setOf("auto", "always", "never") },
+ yamlAccessor = { it.getNativeToolsMode()?.trim()?.lowercase() }
+ )
+
val GENERAL_EXECUTION_MODE = ConfigKey(
key = "general.execution_mode",
parser = { it.trim().uppercase().takeIf { s -> s.isNotBlank() } },
@@ -720,6 +736,7 @@ object ConfigKeys {
AUTO_OPTIMIZE_PERCENTAGE,
READ_ONLY_MODE,
// Tools
+ NATIVE_TOOLS_MODE,
TOOLS_PERMISSIONS,
TOOL_PERMISSION_RUN_TERMINAL,
// Tool summary
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlEmitter.kt b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlEmitter.kt
index 730d7f6f..a5137183 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlEmitter.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlEmitter.kt
@@ -32,6 +32,7 @@ internal object ConfigYamlEmitter {
general.thinkingEnabled?.let { sb.appendLine(" thinkingEnabled: $it") }
general.noEgressEnabled?.let { sb.appendLine(" noEgressEnabled: $it") }
general.executionMode?.let { sb.appendLine(" executionMode: \"$it\"") }
+ general.nativeToolsMode?.let { sb.appendLine(" nativeToolsMode: \"$it\"") }
sb.appendLine()
}
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlMerger.kt b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlMerger.kt
index d604960e..586c3df8 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlMerger.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlMerger.kt
@@ -40,6 +40,7 @@ internal object ConfigYamlMerger {
thinkingEnabled = override.thinkingEnabled ?: base.thinkingEnabled,
noEgressEnabled = override.noEgressEnabled ?: base.noEgressEnabled,
executionMode = override.executionMode ?: base.executionMode,
+ nativeToolsMode = override.nativeToolsMode ?: base.nativeToolsMode,
)
}
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlModel.kt b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlModel.kt
index 1c14c3b8..3aa9a31e 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlModel.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/config/ConfigYamlModel.kt
@@ -10,7 +10,8 @@ data class GeneralConfig(
val advancedView: Boolean? = null,
val thinkingEnabled: Boolean? = null,
val noEgressEnabled: Boolean? = null,
- val executionMode: String? = null
+ val executionMode: String? = null,
+ val nativeToolsMode: String? = null
)
@Serializable
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/config/HierarchicalConfigLoader.kt b/core/src/main/kotlin/pl/jclab/refio/core/config/HierarchicalConfigLoader.kt
index fb4c6a56..c41d82ec 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/config/HierarchicalConfigLoader.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/config/HierarchicalConfigLoader.kt
@@ -96,6 +96,7 @@ class HierarchicalConfigLoader private constructor(
fun getGeneralThinkingEnabled(): Boolean? = getConfig().general?.thinkingEnabled
fun getGeneralNoEgressEnabled(): Boolean? = getConfig().general?.noEgressEnabled
fun getGeneralExecutionMode(): String? = getConfig().general?.executionMode
+ fun getNativeToolsMode(): String? = getConfig().general?.nativeToolsMode
// ═══════════════════════════════════════════════════════════════════════════════
// Provider Settings
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/DatabaseFactory.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/DatabaseFactory.kt
index e8313d8b..9d4e6bf8 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/db/DatabaseFactory.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/db/DatabaseFactory.kt
@@ -66,15 +66,21 @@ object DatabaseFactory {
org.jetbrains.exposed.sql.transactions.TransactionManager.manager.defaultMinRepetitionDelay = 100
org.jetbrains.exposed.sql.transactions.TransactionManager.manager.defaultMaxRepetitionDelay = 1000
- // Create tables
- transaction(database) {
+ // Migrations run BEFORE createMissingTablesAndColumns so they can rebuild
+ // tables whose schema drifted from the current Exposed definitions
+ // (e.g. V4 rewires snapshots/subtasks to use snapshot_groups).
+ // Each migration is idempotent and guards against missing tables for fresh DBs.
+ MigrationRunner.run(database)
- // Create tables in dependency order
- // Note: Subtasks <-> Snapshots have circular reference (both nullable)
+ transaction(database) {
+ // Create tables in dependency order.
+ // Snapshot groups have no FK back to subtasks (subtaskId is a plain column),
+ // so there's no cycle: Tasks -> SnapshotGroups -> Subtasks -> Snapshots.
SchemaUtils.createMissingTablesAndColumns(
TasksTable,
- SnapshotsTable, // Created before Subtasks (circular ref handled via nullable FKs)
+ SnapshotGroupsTable,
SubtasksTable,
+ SnapshotsTable,
ChatMessagesTable,
ApiLogsTable,
MCPServersTable,
@@ -95,8 +101,6 @@ object DatabaseFactory {
logger.info { "All tables created successfully" }
}
- MigrationRunner.run(database)
-
// Set initialized AFTER everything is complete (tables + migrations)
initialized = true
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/SnapshotGroupsTable.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/SnapshotGroupsTable.kt
new file mode 100644
index 00000000..a74971c9
--- /dev/null
+++ b/core/src/main/kotlin/pl/jclab/refio/core/db/SnapshotGroupsTable.kt
@@ -0,0 +1,34 @@
+package pl.jclab.refio.core.db
+
+import org.jetbrains.exposed.sql.*
+import java.util.UUID
+
+/**
+ * Group of file snapshots taken together before a single write operation.
+ * One group per tool execution (typically one subtask). A group can contain
+ * snapshots of multiple files. Referenced from [SubtasksTable.snapshotIdBeforeWrite].
+ */
+object SnapshotGroupsTable : Table("snapshot_groups") {
+ val id = varchar("id", 36).clientDefault { UUID.randomUUID().toString() }
+ val taskId = varchar("task_id", 36).references(TasksTable.id, onDelete = ReferenceOption.CASCADE)
+
+ // Plain column (no FK) to avoid circular FK headaches with subtasks table.
+ // Referential integrity in the other direction: subtasks.snapshot_id_before_write -> snapshot_groups.id.
+ val subtaskId = varchar("subtask_id", 36).nullable()
+
+ val createdAt = long("created_at").clientDefault { System.currentTimeMillis() }
+
+ override val primaryKey = PrimaryKey(id)
+
+ init {
+ index("idx_snapshot_groups_task", false, taskId, createdAt)
+ index("idx_snapshot_groups_subtask", false, subtaskId)
+ }
+}
+
+data class SnapshotGroup(
+ val id: String,
+ val taskId: String,
+ val subtaskId: String?,
+ val createdAt: Long
+)
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/SnapshotsTable.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/SnapshotsTable.kt
index f4c744b3..83b1ffa8 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/db/SnapshotsTable.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/db/SnapshotsTable.kt
@@ -5,27 +5,27 @@ import java.util.UUID
/**
* Snapshots table definition using Exposed ORM DSL
- * Stores file content snapshots for rollback capability
+ * Stores file content snapshots for rollback capability.
+ * Each snapshot belongs to a [SnapshotGroupsTable] (one group per tool execution).
*/
object SnapshotsTable : Table("snapshots") {
val id = varchar("id", 36).clientDefault { UUID.randomUUID().toString() }
val taskId = varchar("task_id", 36).references(TasksTable.id, onDelete = ReferenceOption.CASCADE)
- val subtaskId = varchar("subtask_id", 36).references(SubtasksTable.id, onDelete = ReferenceOption.CASCADE).nullable()
+ val groupId = varchar("group_id", 36).references(SnapshotGroupsTable.id, onDelete = ReferenceOption.CASCADE)
- val filePath = text("file_path") // Absolute or relative path within sandbox
- val contentHash = varchar("content_hash", 64) // SHA-256 hash
- val contentCompressed = blob("content_compressed") // zlib compressed content
- val originalSize = long("original_size") // Size in bytes before compression
- val compressionRatio = double("compression_ratio").default(1.0) // originalSize / compressedSize
+ val filePath = text("file_path")
+ val contentHash = varchar("content_hash", 64)
+ val contentCompressed = blob("content_compressed")
+ val originalSize = long("original_size")
+ val compressionRatio = double("compression_ratio").default(1.0)
val createdAt = long("created_at").clientDefault { System.currentTimeMillis() }
override val primaryKey = PrimaryKey(id)
init {
- // Index for efficient retrieval of snapshots by task
index("idx_snapshots_task", false, taskId, createdAt)
- // Index for finding snapshots by file path
+ index("idx_snapshots_group", false, groupId)
index("idx_snapshots_file_hash", false, filePath, contentHash)
}
}
@@ -36,7 +36,7 @@ object SnapshotsTable : Table("snapshots") {
data class Snapshot(
val id: String,
val taskId: String,
- val subtaskId: String?,
+ val groupId: String,
val filePath: String,
val contentHash: String,
val contentCompressed: ByteArray,
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/SubtasksTable.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/SubtasksTable.kt
index 702bd411..0d037ad1 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/db/SubtasksTable.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/db/SubtasksTable.kt
@@ -38,8 +38,8 @@ object SubtasksTable : Table("subtasks") {
val costUsd = double("cost_usd").default(0.0)
val latencyMs = integer("latency_ms").default(0)
- // Snapshots for rollback
- val snapshotIdBeforeWrite = varchar("snapshot_id_before_write", 36).references(SnapshotsTable.id, onDelete = ReferenceOption.SET_NULL).nullable()
+ // Snapshots for rollback — references a snapshot group (set of files captured before one tool execution)
+ val snapshotIdBeforeWrite = varchar("snapshot_id_before_write", 36).references(SnapshotGroupsTable.id, onDelete = ReferenceOption.SET_NULL).nullable()
// Timestamps
val createdAt = long("created_at").clientDefault { System.currentTimeMillis() }
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/MigrationRunner.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/MigrationRunner.kt
index c8e5a1a6..d2ef6ac4 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/MigrationRunner.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/MigrationRunner.kt
@@ -18,6 +18,7 @@ object MigrationRunner {
// Seed data was for UI development only. Existing databases retain v1 data.
V2DropAgentEventsSessionFk(),
V3RenameSlashCommandToSlashPrompt(),
+ V4SnapshotGroups(),
)
fun run(database: Database) {
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/SqliteIntrospection.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/SqliteIntrospection.kt
new file mode 100644
index 00000000..e558ca45
--- /dev/null
+++ b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/SqliteIntrospection.kt
@@ -0,0 +1,24 @@
+package pl.jclab.refio.core.db.migrations
+
+import java.sql.Connection
+
+internal fun tableExists(jdbc: Connection, name: String): Boolean {
+ jdbc.prepareStatement(
+ "SELECT 1 FROM sqlite_master WHERE type='table' AND name = ?"
+ ).use { ps ->
+ ps.setString(1, name)
+ ps.executeQuery().use { rs -> return rs.next() }
+ }
+}
+
+internal fun columnExists(jdbc: Connection, table: String, column: String): Boolean {
+ if (!tableExists(jdbc, table)) return false
+ jdbc.createStatement().use { st ->
+ st.executeQuery("PRAGMA table_info(\"$table\")").use { rs ->
+ while (rs.next()) {
+ if (rs.getString("name").equals(column, ignoreCase = true)) return true
+ }
+ }
+ }
+ return false
+}
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V2DropAgentEventsSessionFk.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V2DropAgentEventsSessionFk.kt
index a2dd36ee..1e4b30f8 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V2DropAgentEventsSessionFk.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V2DropAgentEventsSessionFk.kt
@@ -27,6 +27,10 @@ class V2DropAgentEventsSessionFk : Migration {
override fun migrate(database: Database) {
transaction(database) {
val jdbc = (connection.connection as java.sql.Connection)
+ if (!tableExists(jdbc, "agent_events")) {
+ logger.info { "agent_events table does not exist (fresh DB); skipping V2 rebuild" }
+ return@transaction
+ }
jdbc.createStatement().use { st ->
st.execute("PRAGMA foreign_keys = OFF")
try {
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V3RenameSlashCommandToSlashPrompt.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V3RenameSlashCommandToSlashPrompt.kt
index 82900747..5df7f138 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V3RenameSlashCommandToSlashPrompt.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V3RenameSlashCommandToSlashPrompt.kt
@@ -20,6 +20,10 @@ class V3RenameSlashCommandToSlashPrompt : Migration {
override fun migrate(database: Database) {
transaction(database) {
val jdbc = (connection.connection as java.sql.Connection)
+ if (!tableExists(jdbc, "prompts")) {
+ logger.info { "prompts table does not exist (fresh DB); skipping V3 rename" }
+ return@transaction
+ }
jdbc.createStatement().use { st ->
val updated = st.executeUpdate(
"UPDATE prompts SET type = 'SLASH_PROMPT' WHERE type = 'SLASH_COMMAND'"
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V4SnapshotGroups.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V4SnapshotGroups.kt
new file mode 100644
index 00000000..c9f14f0c
--- /dev/null
+++ b/core/src/main/kotlin/pl/jclab/refio/core/db/migrations/V4SnapshotGroups.kt
@@ -0,0 +1,204 @@
+package pl.jclab.refio.core.db.migrations
+
+import org.jetbrains.exposed.sql.Database
+import org.jetbrains.exposed.sql.transactions.transaction
+import pl.jclab.refio.core.logging.dualLogger
+import java.util.UUID
+
+private val logger = dualLogger("V4SnapshotGroups")
+
+/**
+ * Introduces `snapshot_groups` and rewires FKs to fix the snapshot wiring:
+ *
+ * - `snapshots.subtask_id` (FK -> subtasks.id) becomes `snapshots.group_id` (FK -> snapshot_groups.id).
+ * - `subtasks.snapshot_id_before_write` previously declared FK -> snapshots.id; the runtime
+ * always tried to store a subtaskId there, so every link attempt failed the FK constraint
+ * and the column stayed NULL. We point the FK at snapshot_groups.id instead.
+ *
+ * SQLite cannot alter FKs in place, so affected tables are rebuilt. Existing snapshots are
+ * migrated by creating one group per distinct `subtask_id` (subtask_id is preserved on the
+ * group for traceability).
+ */
+class V4SnapshotGroups : Migration {
+ override val version: Int = 4
+
+ override fun migrate(database: Database) {
+ transaction(database) {
+ val jdbc = connection.connection as java.sql.Connection
+ // Fresh DB: snapshots is created later with new schema, nothing to migrate.
+ if (!tableExists(jdbc, "snapshots") || !columnExists(jdbc, "snapshots", "subtask_id")) {
+ logger.info { "snapshots table absent or already migrated; skipping V4" }
+ return@transaction
+ }
+ jdbc.createStatement().use { st ->
+ st.execute("PRAGMA foreign_keys = OFF")
+ try {
+ // 1) Create snapshot_groups if not present
+ st.execute(
+ """
+ CREATE TABLE IF NOT EXISTS snapshot_groups (
+ id VARCHAR(36) PRIMARY KEY,
+ task_id VARCHAR(36) NOT NULL,
+ subtask_id VARCHAR(36),
+ created_at BIGINT NOT NULL,
+ FOREIGN KEY (task_id) REFERENCES tasks(id) ON DELETE CASCADE
+ )
+ """.trimIndent()
+ )
+ st.execute("CREATE INDEX IF NOT EXISTS idx_snapshot_groups_task ON snapshot_groups(task_id, created_at)")
+ st.execute("CREATE INDEX IF NOT EXISTS idx_snapshot_groups_subtask ON snapshot_groups(subtask_id)")
+
+ // 2) Create groups for existing snapshots.
+ // One group per distinct (task_id, subtask_id) — NULL subtask_id also forms its own group per task.
+ val existingSnapshots = mutableListOf>()
+ jdbc.prepareStatement(
+ "SELECT DISTINCT task_id, subtask_id, MIN(created_at) FROM snapshots GROUP BY task_id, subtask_id"
+ ).use { ps ->
+ ps.executeQuery().use { rs ->
+ while (rs.next()) {
+ existingSnapshots.add(
+ Triple(rs.getString(1), rs.getString(2), rs.getLong(3))
+ )
+ }
+ }
+ }
+
+ val groupIdBySubtaskKey = mutableMapOf, String>()
+ jdbc.prepareStatement(
+ "INSERT INTO snapshot_groups(id, task_id, subtask_id, created_at) VALUES (?, ?, ?, ?)"
+ ).use { ins ->
+ for ((taskId, subtaskId, createdAt) in existingSnapshots) {
+ val groupId = UUID.randomUUID().toString()
+ groupIdBySubtaskKey[taskId to subtaskId] = groupId
+ ins.setString(1, groupId)
+ ins.setString(2, taskId)
+ if (subtaskId == null) ins.setNull(3, java.sql.Types.VARCHAR) else ins.setString(3, subtaskId)
+ ins.setLong(4, createdAt)
+ ins.addBatch()
+ }
+ ins.executeBatch()
+ }
+
+ // 3) Rebuild `snapshots` with group_id instead of subtask_id
+ st.execute(
+ """
+ CREATE TABLE snapshots_new (
+ id VARCHAR(36) PRIMARY KEY,
+ task_id VARCHAR(36) NOT NULL,
+ group_id VARCHAR(36) NOT NULL,
+ file_path TEXT NOT NULL,
+ content_hash VARCHAR(64) NOT NULL,
+ content_compressed BLOB NOT NULL,
+ original_size BIGINT NOT NULL,
+ compression_ratio DOUBLE NOT NULL DEFAULT 1.0,
+ created_at BIGINT NOT NULL,
+ FOREIGN KEY (task_id) REFERENCES tasks(id) ON DELETE CASCADE,
+ FOREIGN KEY (group_id) REFERENCES snapshot_groups(id) ON DELETE CASCADE
+ )
+ """.trimIndent()
+ )
+
+ // Copy rows, resolving group_id from the (task_id, subtask_id) pair
+ jdbc.prepareStatement(
+ "SELECT id, task_id, subtask_id, file_path, content_hash, content_compressed, original_size, compression_ratio, created_at FROM snapshots"
+ ).use { ps ->
+ jdbc.prepareStatement(
+ "INSERT INTO snapshots_new(id, task_id, group_id, file_path, content_hash, content_compressed, original_size, compression_ratio, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
+ ).use { ins ->
+ ps.executeQuery().use { rs ->
+ while (rs.next()) {
+ val id = rs.getString(1)
+ val taskId = rs.getString(2)
+ val subtaskId = rs.getString(3) // may be null
+ val groupId = groupIdBySubtaskKey[taskId to subtaskId]
+ ?: continue // orphan; shouldn't happen, skip safely
+ ins.setString(1, id)
+ ins.setString(2, taskId)
+ ins.setString(3, groupId)
+ ins.setString(4, rs.getString(4))
+ ins.setString(5, rs.getString(5))
+ ins.setBytes(6, rs.getBytes(6))
+ ins.setLong(7, rs.getLong(7))
+ ins.setDouble(8, rs.getDouble(8))
+ ins.setLong(9, rs.getLong(9))
+ ins.addBatch()
+ }
+ ins.executeBatch()
+ }
+ }
+ }
+
+ st.execute("DROP TABLE snapshots")
+ st.execute("ALTER TABLE snapshots_new RENAME TO snapshots")
+ st.execute("CREATE INDEX IF NOT EXISTS idx_snapshots_task ON snapshots(task_id, created_at)")
+ st.execute("CREATE INDEX IF NOT EXISTS idx_snapshots_group ON snapshots(group_id)")
+ st.execute("CREATE INDEX IF NOT EXISTS idx_snapshots_file_hash ON snapshots(file_path, content_hash)")
+
+ // 4) Rebuild `subtasks` to change FK of snapshot_id_before_write.
+ // Pre-existing values are effectively all NULL (FK always failed), so we can also
+ // NULL them defensively during the rebuild.
+ st.execute(
+ """
+ CREATE TABLE subtasks_new (
+ id VARCHAR(36) PRIMARY KEY,
+ task_id VARCHAR(36) NOT NULL,
+ order_index INT NOT NULL,
+ kind VARCHAR(32) NOT NULL,
+ status VARCHAR(16) NOT NULL DEFAULT 'NEW',
+ description TEXT NOT NULL,
+ params_json TEXT,
+ step_plan_json TEXT,
+ summary TEXT,
+ requires_approval INT NOT NULL DEFAULT 0,
+ approval_status VARCHAR(32) NOT NULL DEFAULT 'NOT_REQUIRED',
+ approved_at BIGINT,
+ result TEXT,
+ error_message TEXT,
+ error_stacktrace TEXT,
+ llm_model VARCHAR(64),
+ llm_provider VARCHAR(32),
+ input_tokens INT NOT NULL DEFAULT 0,
+ output_tokens INT NOT NULL DEFAULT 0,
+ cost_usd DOUBLE NOT NULL DEFAULT 0.0,
+ latency_ms INT NOT NULL DEFAULT 0,
+ snapshot_id_before_write VARCHAR(36),
+ created_at BIGINT NOT NULL,
+ updated_at BIGINT NOT NULL,
+ started_at BIGINT,
+ completed_at BIGINT,
+ FOREIGN KEY (task_id) REFERENCES tasks(id) ON DELETE CASCADE,
+ FOREIGN KEY (snapshot_id_before_write) REFERENCES snapshot_groups(id) ON DELETE SET NULL,
+ UNIQUE (task_id, order_index)
+ )
+ """.trimIndent()
+ )
+
+ st.execute(
+ """
+ INSERT INTO subtasks_new
+ (id, task_id, order_index, kind, status, description, params_json, step_plan_json,
+ summary, requires_approval, approval_status, approved_at, result, error_message,
+ error_stacktrace, llm_model, llm_provider, input_tokens, output_tokens, cost_usd,
+ latency_ms, snapshot_id_before_write, created_at, updated_at, started_at, completed_at)
+ SELECT
+ id, task_id, order_index, kind, status, description, params_json, step_plan_json,
+ summary, requires_approval, approval_status, approved_at, result, error_message,
+ error_stacktrace, llm_model, llm_provider, input_tokens, output_tokens, cost_usd,
+ latency_ms, NULL, created_at, updated_at, started_at, completed_at
+ FROM subtasks
+ """.trimIndent()
+ )
+
+ st.execute("DROP TABLE subtasks")
+ st.execute("ALTER TABLE subtasks_new RENAME TO subtasks")
+
+ logger.info {
+ "Migrated ${existingSnapshots.size} snapshot group(s); rebuilt snapshots + subtasks with corrected FKs"
+ }
+ } finally {
+ st.execute("PRAGMA foreign_keys = ON")
+ }
+ }
+ }
+ }
+}
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/RagRepository.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/RagRepository.kt
index 58194de2..80b77e8d 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/RagRepository.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/RagRepository.kt
@@ -296,20 +296,27 @@ class RagRepository {
// ========== Embeddings Operations ==========
/**
- * Create embedding
+ * Create embedding.
+ *
+ * Uses INSERT OR IGNORE: if the parent chunk has been concurrently deleted
+ * (FK violation) or the (chunkId, model) unique key already exists, the
+ * insert is silently skipped and `null` is returned. This avoids spamming
+ * the log with `SQLITE_CONSTRAINT_FOREIGNKEY` exceptions during concurrent
+ * RAG index refreshes (FileAnalyzerService re-indexes while another caller
+ * may have evicted the file row).
*/
fun createEmbedding(
chunkId: Int,
model: String,
vector: ByteArray,
dimensions: Int
- ): Int = transaction {
- EmbeddingsTable.insertAndGetId {
+ ): Int? = transaction {
+ EmbeddingsTable.insertIgnoreAndGetId {
it[EmbeddingsTable.chunkId] = chunkId
it[EmbeddingsTable.model] = model
it[EmbeddingsTable.vector] = ExposedBlob(vector)
it[EmbeddingsTable.dimensions] = dimensions
- }.value
+ }?.value
}
/**
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/SnapshotGroupRepository.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/SnapshotGroupRepository.kt
new file mode 100644
index 00000000..ca001ac1
--- /dev/null
+++ b/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/SnapshotGroupRepository.kt
@@ -0,0 +1,51 @@
+package pl.jclab.refio.core.db.repositories
+
+import org.jetbrains.exposed.sql.*
+import org.jetbrains.exposed.sql.transactions.transaction
+import pl.jclab.refio.core.db.SnapshotGroup
+import pl.jclab.refio.core.db.SnapshotGroupsTable
+import pl.jclab.refio.core.logging.dualLogger
+
+private val logger = dualLogger("SnapshotGroupRepository")
+
+/**
+ * Repository for [SnapshotGroupsTable]. A group aggregates the file snapshots
+ * captured before a single tool execution.
+ */
+class SnapshotGroupRepository {
+
+ fun create(taskId: String, subtaskId: String?): SnapshotGroup {
+ return transaction {
+ val id = SnapshotGroupsTable.insert {
+ it[SnapshotGroupsTable.taskId] = taskId
+ it[SnapshotGroupsTable.subtaskId] = subtaskId
+ } get SnapshotGroupsTable.id
+
+ logger.info { "Created snapshot group: id=$id, taskId=$taskId, subtaskId=$subtaskId" }
+ findById(id) ?: error("Failed to retrieve created snapshot group $id")
+ }
+ }
+
+ fun findById(id: String): SnapshotGroup? = transaction {
+ SnapshotGroupsTable.selectAll()
+ .where { SnapshotGroupsTable.id eq id }
+ .map(::toGroup)
+ .singleOrNull()
+ }
+
+ fun findBySubtaskId(subtaskId: String): SnapshotGroup? = transaction {
+ SnapshotGroupsTable.selectAll()
+ .where { SnapshotGroupsTable.subtaskId eq subtaskId }
+ .orderBy(SnapshotGroupsTable.createdAt to SortOrder.DESC)
+ .limit(1)
+ .map(::toGroup)
+ .singleOrNull()
+ }
+
+ private fun toGroup(row: ResultRow) = SnapshotGroup(
+ id = row[SnapshotGroupsTable.id],
+ taskId = row[SnapshotGroupsTable.taskId],
+ subtaskId = row[SnapshotGroupsTable.subtaskId],
+ createdAt = row[SnapshotGroupsTable.createdAt]
+ )
+}
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/SnapshotRepository.kt b/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/SnapshotRepository.kt
index 411d5e92..7d56f46c 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/SnapshotRepository.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/db/repositories/SnapshotRepository.kt
@@ -24,7 +24,7 @@ class SnapshotRepository {
*/
fun create(
taskId: String,
- subtaskId: String? = null,
+ groupId: String,
filePath: String,
content: String,
contentHash: String
@@ -36,7 +36,7 @@ class SnapshotRepository {
val snapshotId = SnapshotsTable.insert {
it[SnapshotsTable.taskId] = taskId
- it[SnapshotsTable.subtaskId] = subtaskId
+ it[SnapshotsTable.groupId] = groupId
it[SnapshotsTable.filePath] = filePath
it[SnapshotsTable.contentHash] = contentHash
it[contentCompressed] = ExposedBlob(compressed)
@@ -78,12 +78,12 @@ class SnapshotRepository {
}
/**
- * Find snapshots for a specific subtask
+ * Find snapshots belonging to a snapshot group (one group per tool execution).
*/
- fun findBySubtaskId(subtaskId: String): List {
+ fun findByGroupId(groupId: String): List {
return transaction {
SnapshotsTable.selectAll()
- .where { SnapshotsTable.subtaskId eq subtaskId }
+ .where { SnapshotsTable.groupId eq groupId }
.orderBy(SnapshotsTable.createdAt to SortOrder.DESC)
.map { rowToSnapshot(it) }
}
@@ -205,7 +205,7 @@ class SnapshotRepository {
return Snapshot(
id = row[SnapshotsTable.id],
taskId = row[SnapshotsTable.taskId],
- subtaskId = row[SnapshotsTable.subtaskId],
+ groupId = row[SnapshotsTable.groupId],
filePath = row[SnapshotsTable.filePath],
contentHash = row[SnapshotsTable.contentHash],
contentCompressed = row[SnapshotsTable.contentCompressed].bytes,
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/Base.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/Base.kt
index 01a6ac01..7c6b9277 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/llm/Base.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/Base.kt
@@ -56,6 +56,33 @@ data class LLMUsage(
val totalTokens: Int
)
+/**
+ * Single tool invocation emitted by the LLM via native function-calling API.
+ *
+ * Unified across providers: OpenAI tool_calls[], Anthropic content[].tool_use,
+ * Ollama message.tool_calls[]. Each adapter maps its provider-specific shape to this
+ * type before returning from chat().
+ *
+ * @property id Stable call identifier. Ollama omits it so adapters generate UUID.
+ * @property name Tool name as registered in ToolRegistry.
+ * @property argumentsJson Raw JSON string of arguments. AgentTurnLoop parses this.
+ */
+data class NativeToolCall(
+ val id: String,
+ val name: String,
+ val argumentsJson: String,
+)
+
+/**
+ * Incremental tool-call delta for streaming.
+ */
+data class NativeToolCallDelta(
+ val index: Int,
+ val idDelta: String? = null,
+ val nameDelta: String? = null,
+ val argumentsDelta: String? = null,
+)
+
/**
* Single chunk from streaming LLM response (US-027)
*/
@@ -63,7 +90,8 @@ data class StreamChunk(
val delta: String, // Incremental content
val thinking: String? = null, // Incremental thinking content (reasoning models)
val finishReason: String? = null, // "stop", "length", "cancelled", etc.
- val usage: LLMUsage? = null // Present only on final chunk
+ val usage: LLMUsage? = null, // Present only on final chunk
+ val toolCallDelta: NativeToolCallDelta? = null // Present only on native-tools streaming path
)
/**
@@ -77,9 +105,28 @@ data class LLMResponse(
val cost: Double, // Estimated cost in USD
val finishReason: String? = null,
val rawResponse: Map? = null, // Allow nullable values in response map
- val thinking: String? = null // Complete thinking process (reasoning models)
+ val thinking: String? = null, // Complete thinking process (reasoning models)
+ /**
+ * Tool calls emitted via native function-calling API.
+ *
+ * - null = adapter did NOT use native tools path (fall back to JSON-in-text parsing)
+ * - emptyList() = native tools path was used, but model chose not to call any tool
+ * - non-empty = AgentTurnLoop should use these directly, skipping ToolCallParser
+ *
+ * Adapters MUST leave this null when `tools` was not passed in the request.
+ */
+ val nativeToolCalls: List? = null,
)
+/**
+ * Thrown by adapters when a provider rejects a request because native tool calling is
+ * not supported (e.g. HTTP 400 with "tool_choice"/"tools" in the error message).
+ *
+ * Caught by AgentTurnLoop to mark the model in [NativeToolsFallbackTracker] and retry
+ * without tools on the JSON-in-text path.
+ */
+class ToolsNotSupportedException(message: String, cause: Throwable? = null) : Exception(message, cause)
+
/**
* Abstract base class for LLM adapters.
*
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/JsonExtractor.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/JsonExtractor.kt
index b7d54ec3..cf146bdf 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/llm/JsonExtractor.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/JsonExtractor.kt
@@ -95,6 +95,23 @@ object JsonExtractor {
return rawJson
}
+ // Turn-loop envelope: {"response": "...", "actions": [{"tool": ..., "args": ...}]}
+ // Treat each action as a subtask so downstream plan-extraction code works uniformly.
+ // This is the canonical shape emitted by PLAN/AGENT modes on the JSON-in-text path
+ // and by the native-tools serializer.
+ if (rawJson.containsKey("actions") && rawJson["actions"] is List<*>) {
+ logger.info { "[JSON] Normalizing envelope: 'actions' array → 'subtasks'" }
+ val subtasks = normalizeStepsArray(rawJson["actions"])
+ val planDescription = rawJson["response"]?.toString()
+ ?: rawJson["plan"]?.toString()
+ ?: rawJson["summary"]?.toString()
+ ?: "Execution plan"
+ return mapOf(
+ "plan" to planDescription,
+ "subtasks" to subtasks
+ )
+ }
+
// Helper function to find key case-insensitively
fun findKey(vararg keys: String): Pair? {
for (key in keys) {
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/LLMClient.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/LLMClient.kt
index 841f04fc..8f2730fe 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/llm/LLMClient.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/LLMClient.kt
@@ -364,11 +364,14 @@ class LLMClient(
kwargs = adapterKwargs
)
- // For streaming mode, build response from accumulated data
+ // For streaming mode, build response from accumulated data while preserving
+ // adapter-supplied fields (nativeToolCalls, thinking, rawResponse). Dropping
+ // nativeToolCalls here would force AgentTurnLoop back into JSON-in-text parsing
+ // even when the adapter successfully parsed the native tool_calls stream.
val finalResponse = if (stream && contentBuilder.isNotEmpty()) {
val usage = finalUsage ?: response.usage
val cost = estimateCost(usage, provider, model)
- LLMResponse(
+ response.copy(
content = contentBuilder.toString(),
usage = usage,
model = model,
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/ModelDefinitions.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/ModelDefinitions.kt
index ca6e686d..682bef83 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/llm/ModelDefinitions.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/ModelDefinitions.kt
@@ -71,6 +71,40 @@ object ModelDefinitions {
*/
val OPENAI_MODELS = mapOf(
+ //
+ // GPT 5.5
+ //
+ "gpt-5.5" to ModelDefinition(
+ id = "gpt-5.5",
+ name = "GPT-5.5",
+ provider = "openai",
+ description = "Best intelligence at scale for agentic, coding, and professional workflows",
+ capabilities = listOf(
+ ModelCapability.CHAT_COMPLETION,
+ ModelCapability.TEXT_COMPLETION,
+ ModelCapability.VISION,
+ ),
+ modelType = ModelType.TEXT,
+ maxContext = 1_050_000,
+ maxOutputTokens = 128_000,
+ costPer1MInput = 2.50,
+ costPer1MOutput = 15.00,
+ supportsVision = true,
+ supportsReasoning = true,
+ supportsStreaming = true,
+ supportsFunctionCalling = true,
+ supportsThinking = true,
+ reasoningTokensMultiplier = 2.5,
+ endpointType = ApiEndpointType.CHAT_COMPLETIONS,
+ apiFormat = ApiFormat.CHAT_COMPLETIONS,
+ paramMappings = mapOf(
+ "max_tokens" to "max_completion_tokens"
+ ),
+ removeParams = listOf(
+ "temperature"
+ ),
+ active = true
+ ),
//
// GPT 5.4
//
@@ -135,6 +169,36 @@ object ModelDefinitions {
),
active = true
),
+ "gpt-5.4-nano" to ModelDefinition(
+ id = "gpt-5.4-nano",
+ name = "GPT-5.4 Nano",
+ provider = "openai",
+ description = "Ultra cost-effective model for simple tasks",
+ capabilities = listOf(
+ ModelCapability.CHAT_COMPLETION,
+ ModelCapability.TEXT_COMPLETION,
+ ModelCapability.VISION,
+ ),
+ modelType = ModelType.TEXT,
+ maxContext = 128_000,
+ maxOutputTokens = 32_768,
+ costPer1MInput = 0.05,
+ costPer1MOutput = 0.40,
+ supportsVision = false,
+ supportsReasoning = true,
+ supportsStreaming = true,
+ supportsFunctionCalling = true,
+ paramMappings = mapOf(
+ "max_tokens" to "max_completion_tokens"
+ ),
+ removeParams = listOf(
+ "frequency_penalty",
+ "presence_penalty",
+ "top_p",
+ "temperature"
+ ),
+ active = true
+ ),
//
// GPT 5.2
@@ -1225,6 +1289,7 @@ object ModelDefinitions {
supportsStreaming = true,
supportsFunctionCalling = true,
supportsThinking = true,
+ removeParams = listOf("temperature"),
active = true
),
@@ -1248,6 +1313,7 @@ object ModelDefinitions {
supportsStreaming = true,
supportsFunctionCalling = true,
supportsThinking = true,
+ removeParams = listOf("temperature"),
active = true
),
@@ -2879,6 +2945,283 @@ object ModelDefinitions {
active = true
),
+ "qwen3.6:35b" to ModelDefinition(
+ id = "qwen3.6:35b",
+ name = "Qwen 3.6 35B",
+ provider = "ollama",
+ description = "Qwen 3.6 35B multimodal model with 256K context and native tool use",
+ capabilities = listOf(
+ ModelCapability.CHAT_COMPLETION,
+ ModelCapability.VISION,
+ ModelCapability.TOOL_USE
+ ),
+ modelType = ModelType.MULTIMODAL,
+ maxContext = 256_000,
+ maxOutputTokens = null,
+ costPer1MInput = 0.0,
+ costPer1MOutput = 0.0,
+ supportsVision = true,
+ supportsReasoning = true,
+ supportsStreaming = true,
+ supportsFunctionCalling = true,
+ defaultParams = mapOf("temperature" to 0.7),
+ active = true
+ ),
+
+
+ "qwen3.6:27b" to ModelDefinition(
+ id = "qwen3.6:27b",
+ name = "Qwen 3.6 27B",
+ provider = "ollama",
+ description = "Qwen 3.6 27B multimodal model with 256K context and native tool use",
+ capabilities = listOf(
+ ModelCapability.CHAT_COMPLETION,
+ ModelCapability.VISION,
+ ModelCapability.TOOL_USE
+ ),
+ modelType = ModelType.MULTIMODAL,
+ maxContext = 256_000,
+ maxOutputTokens = null,
+ costPer1MInput = 0.0,
+ costPer1MOutput = 0.0,
+ supportsVision = true,
+ supportsReasoning = true,
+ supportsStreaming = true,
+ supportsFunctionCalling = true,
+ defaultParams = mapOf("temperature" to 0.7),
+ active = true
+ ),
+
+ "qwen3-next:80b" to ModelDefinition(
+ id = "qwen3-next:80b",
+ name = "Qwen 3 Next 80B",
+ provider = "ollama",
+ description = "Qwen 3 Next 80B with native tool use and long context",
+ capabilities = listOf(
+ ModelCapability.CHAT_COMPLETION,
+ ModelCapability.TEXT_COMPLETION,
+ ModelCapability.TOOL_USE
+ ),
+ modelType = ModelType.TEXT,
+ maxContext = 128_000,
+ maxOutputTokens = null,
+ costPer1MInput = 0.0,
+ costPer1MOutput = 0.0,
+ supportsVision = false,
+ supportsReasoning = true,
+ supportsStreaming = true,
+ supportsFunctionCalling = true,
+ defaultParams = mapOf("temperature" to 0.7),
+ active = true
+ ),
+
+ "gpt-oss-safeguard:20b" to ModelDefinition(
+ id = "gpt-oss-safeguard:20b",
+ name = "GPT-OSS Safeguard 20B",
+ provider = "ollama",
+ description = "OpenAI gpt-oss-safeguard 20B with native tool use",
+ capabilities = listOf(
+ ModelCapability.CHAT_COMPLETION,
+ ModelCapability.TEXT_COMPLETION,
+ ModelCapability.TOOL_USE
+ ),
+ modelType = ModelType.TEXT,
+ maxContext = 128_000,
+ maxOutputTokens = null,
+ costPer1MInput = 0.0,
+ costPer1MOutput = 0.0,
+ supportsVision = false,
+ supportsReasoning = true,
+ supportsStreaming = true,
+ supportsFunctionCalling = true,
+ defaultParams = mapOf("temperature" to 0.7),
+ active = true
+ ),
+
+ "gpt-oss-safeguard:120b" to ModelDefinition(
+ id = "gpt-oss-safeguard:120b",
+ name = "GPT-OSS Safeguard 120B",
+ provider = "ollama",
+ description = "OpenAI gpt-oss-safeguard 120B with native tool use",
+ capabilities = listOf(
+ ModelCapability.CHAT_COMPLETION,
+ ModelCapability.TEXT_COMPLETION,
+ ModelCapability.TOOL_USE
+ ),
+ modelType = ModelType.TEXT,
+ maxContext = 128_000,
+ maxOutputTokens = null,
+ costPer1MInput = 0.0,
+ costPer1MOutput = 0.0,
+ supportsVision = false,
+ supportsReasoning = true,
+ supportsStreaming = true,
+ supportsFunctionCalling = true,
+ defaultParams = mapOf("temperature" to 0.7),
+ active = true
+ ),
+
+ "deepseek-v3.2:latest" to ModelDefinition(
+ id = "deepseek-v3.2:latest",
+ name = "DeepSeek V3.2",
+ provider = "ollama",
+ description = "DeepSeek V3.2 with native tool use and reasoning",
+ capabilities = listOf(
+ ModelCapability.CHAT_COMPLETION,
+ ModelCapability.TEXT_COMPLETION,
+ ModelCapability.TOOL_USE
+ ),
+ modelType = ModelType.TEXT,
+ maxContext = 128_000,
+ maxOutputTokens = null,
+ costPer1MInput = 0.0,
+ costPer1MOutput = 0.0,
+ supportsVision = false,
+ supportsReasoning = true,
+ supportsStreaming = true,
+ supportsFunctionCalling = true,
+ defaultParams = mapOf("temperature" to 0.7),
+ active = true
+ ),
+
+ "kimi-k2-thinking:latest" to ModelDefinition(
+ id = "kimi-k2-thinking:latest",
+ name = "Kimi K2 Thinking",
+ provider = "ollama",
+ description = "Moonshot Kimi K2 Thinking with native tool use",
+ capabilities = listOf(
+ ModelCapability.CHAT_COMPLETION,
+ ModelCapability.TEXT_COMPLETION,
+ ModelCapability.TOOL_USE
+ ),
+ modelType = ModelType.TEXT,
+ maxContext = 200_000,
+ maxOutputTokens = null,
+ costPer1MInput = 0.0,
+ costPer1MOutput = 0.0,
+ supportsVision = false,
+ supportsReasoning = true,
+ supportsStreaming = true,
+ supportsFunctionCalling = true,
+ defaultParams = mapOf("temperature" to 0.7),
+ active = true
+ ),
+
+ "minimax-m2:latest" to ModelDefinition(
+ id = "minimax-m2:latest",
+ name = "MiniMax M2",
+ provider = "ollama",
+ description = "MiniMax M2 with native tool use",
+ capabilities = listOf(
+ ModelCapability.CHAT_COMPLETION,
+ ModelCapability.TEXT_COMPLETION,
+ ModelCapability.TOOL_USE
+ ),
+ modelType = ModelType.TEXT,
+ maxContext = 128_000,
+ maxOutputTokens = null,
+ costPer1MInput = 0.0,
+ costPer1MOutput = 0.0,
+ supportsVision = false,
+ supportsReasoning = true,
+ supportsStreaming = true,
+ supportsFunctionCalling = true,
+ defaultParams = mapOf("temperature" to 0.7),
+ active = true
+ ),
+
+ "nemotron-3-super:120b" to ModelDefinition(
+ id = "nemotron-3-super:120b",
+ name = "Nemotron 3 Super 120B",
+ provider = "ollama",
+ description = "NVIDIA Nemotron 3 Super 120B with native tool use",
+ capabilities = listOf(
+ ModelCapability.CHAT_COMPLETION,
+ ModelCapability.TEXT_COMPLETION,
+ ModelCapability.TOOL_USE
+ ),
+ modelType = ModelType.TEXT,
+ maxContext = 128_000,
+ maxOutputTokens = null,
+ costPer1MInput = 0.0,
+ costPer1MOutput = 0.0,
+ supportsVision = false,
+ supportsReasoning = true,
+ supportsStreaming = true,
+ supportsFunctionCalling = true,
+ defaultParams = mapOf("temperature" to 0.7),
+ active = true
+ ),
+
+ "nemotron-3-nano:4b" to ModelDefinition(
+ id = "nemotron-3-nano:4b",
+ name = "Nemotron 3 Nano 4B",
+ provider = "ollama",
+ description = "NVIDIA Nemotron 3 Nano 4B with native tool use",
+ capabilities = listOf(
+ ModelCapability.CHAT_COMPLETION,
+ ModelCapability.TEXT_COMPLETION,
+ ModelCapability.TOOL_USE
+ ),
+ modelType = ModelType.TEXT,
+ maxContext = 128_000,
+ maxOutputTokens = null,
+ costPer1MInput = 0.0,
+ costPer1MOutput = 0.0,
+ supportsVision = false,
+ supportsReasoning = true,
+ supportsStreaming = true,
+ supportsFunctionCalling = true,
+ defaultParams = mapOf("temperature" to 0.7),
+ active = true
+ ),
+
+ "nemotron-3-nano:30b" to ModelDefinition(
+ id = "nemotron-3-nano:30b",
+ name = "Nemotron 3 Nano 30B",
+ provider = "ollama",
+ description = "NVIDIA Nemotron 3 Nano 30B with native tool use",
+ capabilities = listOf(
+ ModelCapability.CHAT_COMPLETION,
+ ModelCapability.TEXT_COMPLETION,
+ ModelCapability.TOOL_USE
+ ),
+ modelType = ModelType.TEXT,
+ maxContext = 128_000,
+ maxOutputTokens = null,
+ costPer1MInput = 0.0,
+ costPer1MOutput = 0.0,
+ supportsVision = false,
+ supportsReasoning = true,
+ supportsStreaming = true,
+ supportsFunctionCalling = true,
+ defaultParams = mapOf("temperature" to 0.7),
+ active = true
+ ),
+
+ "nemotron-cascade-2:30b" to ModelDefinition(
+ id = "nemotron-cascade-2:30b",
+ name = "Nemotron Cascade 2 30B",
+ provider = "ollama",
+ description = "NVIDIA Nemotron Cascade 2 30B with native tool use",
+ capabilities = listOf(
+ ModelCapability.CHAT_COMPLETION,
+ ModelCapability.TEXT_COMPLETION,
+ ModelCapability.TOOL_USE
+ ),
+ modelType = ModelType.TEXT,
+ maxContext = 128_000,
+ maxOutputTokens = null,
+ costPer1MInput = 0.0,
+ costPer1MOutput = 0.0,
+ supportsVision = false,
+ supportsReasoning = true,
+ supportsStreaming = true,
+ supportsFunctionCalling = true,
+ defaultParams = mapOf("temperature" to 0.7),
+ active = true
+ ),
+
"qwen3-coder:30b" to ModelDefinition(
id = "qwen3-coder:30b",
name = "Qwen 3 Coder 30B",
@@ -3007,8 +3350,7 @@ object ModelDefinitions {
description = "Compact reasoning model with excellent efficiency",
capabilities = listOf(
ModelCapability.CHAT_COMPLETION,
- ModelCapability.REASONING,
- ModelCapability.TOOL_USE
+ ModelCapability.REASONING
),
modelType = ModelType.TEXT,
maxContext = 64_000,
@@ -3018,7 +3360,7 @@ object ModelDefinitions {
supportsVision = false,
supportsReasoning = true,
supportsStreaming = true,
- supportsFunctionCalling = true,
+ supportsFunctionCalling = false,
defaultParams = mapOf("temperature" to 0.7),
active = true
),
@@ -3030,8 +3372,7 @@ object ModelDefinitions {
description = "Open reasoning model approaching O3/Gemini 2.5 Pro performance",
capabilities = listOf(
ModelCapability.CHAT_COMPLETION,
- ModelCapability.REASONING,
- ModelCapability.TOOL_USE
+ ModelCapability.REASONING
),
modelType = ModelType.TEXT,
maxContext = 64_000,
@@ -3041,7 +3382,7 @@ object ModelDefinitions {
supportsVision = false,
supportsReasoning = true,
supportsStreaming = true,
- supportsFunctionCalling = true,
+ supportsFunctionCalling = false,
defaultParams = mapOf("temperature" to 0.7),
active = true
),
@@ -3053,8 +3394,7 @@ object ModelDefinitions {
description = "High-performance reasoning model",
capabilities = listOf(
ModelCapability.CHAT_COMPLETION,
- ModelCapability.REASONING,
- ModelCapability.TOOL_USE
+ ModelCapability.REASONING
),
modelType = ModelType.TEXT,
maxContext = 64_000,
@@ -3064,7 +3404,7 @@ object ModelDefinitions {
supportsVision = false,
supportsReasoning = true,
supportsStreaming = true,
- supportsFunctionCalling = true,
+ supportsFunctionCalling = false,
defaultParams = mapOf("temperature" to 0.7),
active = true
),
@@ -3076,8 +3416,7 @@ object ModelDefinitions {
description = "Large reasoning model with superior capabilities",
capabilities = listOf(
ModelCapability.CHAT_COMPLETION,
- ModelCapability.REASONING,
- ModelCapability.TOOL_USE
+ ModelCapability.REASONING
),
modelType = ModelType.TEXT,
maxContext = 64_000,
@@ -3087,7 +3426,7 @@ object ModelDefinitions {
supportsVision = false,
supportsReasoning = true,
supportsStreaming = true,
- supportsFunctionCalling = true,
+ supportsFunctionCalling = false,
defaultParams = mapOf("temperature" to 0.7),
active = true
),
@@ -3099,8 +3438,7 @@ object ModelDefinitions {
description = "Very large reasoning model with excellent performance",
capabilities = listOf(
ModelCapability.CHAT_COMPLETION,
- ModelCapability.REASONING,
- ModelCapability.TOOL_USE
+ ModelCapability.REASONING
),
modelType = ModelType.TEXT,
maxContext = 64_000,
@@ -3110,7 +3448,7 @@ object ModelDefinitions {
supportsVision = false,
supportsReasoning = true,
supportsStreaming = true,
- supportsFunctionCalling = true,
+ supportsFunctionCalling = false,
defaultParams = mapOf("temperature" to 0.7),
active = true
),
@@ -3122,8 +3460,7 @@ object ModelDefinitions {
description = "Flagship reasoning model with state-of-the-art capabilities",
capabilities = listOf(
ModelCapability.CHAT_COMPLETION,
- ModelCapability.REASONING,
- ModelCapability.TOOL_USE
+ ModelCapability.REASONING
),
modelType = ModelType.TEXT,
maxContext = 64_000,
@@ -3133,7 +3470,7 @@ object ModelDefinitions {
supportsVision = false,
supportsReasoning = true,
supportsStreaming = true,
- supportsFunctionCalling = true,
+ supportsFunctionCalling = false,
defaultParams = mapOf("temperature" to 0.7),
active = true
),
@@ -3145,8 +3482,7 @@ object ModelDefinitions {
description = "Massive MoE reasoning model rivaling closed-source models",
capabilities = listOf(
ModelCapability.CHAT_COMPLETION,
- ModelCapability.REASONING,
- ModelCapability.TOOL_USE
+ ModelCapability.REASONING
),
modelType = ModelType.TEXT,
maxContext = 64_000,
@@ -3156,7 +3492,7 @@ object ModelDefinitions {
supportsVision = false,
supportsReasoning = true,
supportsStreaming = true,
- supportsFunctionCalling = true,
+ supportsFunctionCalling = false,
defaultParams = mapOf("temperature" to 0.7),
active = true
),
@@ -4447,6 +4783,149 @@ object ModelDefinitions {
)
)
+ /**
+ * OpenRouter Models Registry (curated)
+ *
+ * OpenRouter exposes 200+ models via dynamic listing. This registry contains
+ * static definitions for the popular families where we know native
+ * function-calling is reliable — so AgentTurnLoop takes the native tool_calls
+ * path instead of the JSON-in-text envelope. Unknown OpenRouter model IDs
+ * still fall back to null (JSON envelope) — that preserves backwards
+ * compatibility for exotic community models.
+ *
+ * Matching is prefix-based in [getDefinition] (e.g. "amazon/nova-lite-v1"
+ * matches entry "amazon/nova-").
+ */
+ val OPENROUTER_MODELS = mapOf(
+ // Anthropic
+ "anthropic/claude-opus" to openrouterDef("anthropic/claude-opus-*", maxContext = 1_000_000, vision = true),
+ "anthropic/claude-sonnet" to openrouterDef("anthropic/claude-sonnet-*", maxContext = 1_000_000, vision = true),
+ "anthropic/claude-haiku" to openrouterDef("anthropic/claude-haiku-*", maxContext = 200_000, vision = true),
+ "anthropic/claude-" to openrouterDef("anthropic/claude-*", maxContext = 200_000, vision = true),
+ // OpenAI
+ "openai/gpt-5.4-pro" to openrouterDef("openai/gpt-5.4-pro", maxContext = 1_050_000, vision = true),
+ "openai/gpt-5.4" to openrouterDef("openai/gpt-5.4-*", maxContext = 400_000, vision = true),
+ "openai/gpt-5" to openrouterDef("openai/gpt-5-*", maxContext = 400_000, vision = true),
+ "openai/gpt-audio" to openrouterDef("openai/gpt-audio*", maxContext = 128_000),
+ "openai/gpt-" to openrouterDef("openai/gpt-*", maxContext = 128_000, vision = true),
+ "openai/o" to openrouterDef("openai/o*", maxContext = 200_000, reasoning = true),
+ // Google
+ "google/gemini-3" to openrouterDef("google/gemini-3*", maxContext = 1_048_576, vision = true),
+ "google/gemini-" to openrouterDef("google/gemini-*", maxContext = 1_000_000, vision = true),
+ "google/gemma-" to openrouterDef("google/gemma-*", maxContext = 262_144, vision = true),
+ // Amazon
+ "amazon/nova-" to openrouterDef("amazon/nova-*", maxContext = 300_000, vision = true),
+ // Meta
+ "meta-llama/llama-" to openrouterDef("meta-llama/llama-*", maxContext = 128_000),
+ "meta-llama/" to openrouterDef("meta-llama/*", maxContext = 128_000),
+ // Mistral
+ "mistralai/" to openrouterDef("mistralai/*", maxContext = 262_144),
+ // Qwen
+ "qwen/qwen3.6" to openrouterDef("qwen/qwen3.6-*", maxContext = 1_000_000, vision = true),
+ "qwen/qwen3.5" to openrouterDef("qwen/qwen3.5-*", maxContext = 262_144, vision = true),
+ // qwen3-coder-* variants emit XML pseudo-tags instead of native tool_calls
+ // (observed: `......`).
+ // Prefix entry must come before "qwen/qwen3" to win the prefix match.
+ "qwen/qwen3-coder" to openrouterDef("qwen/qwen3-coder-*", maxContext = 262_144, functionCalling = false),
+ "qwen/qwen3" to openrouterDef("qwen/qwen3-*", maxContext = 262_144),
+ "qwen/" to openrouterDef("qwen/*", maxContext = 128_000),
+ // DeepSeek
+ "deepseek/" to openrouterDef("deepseek/*", maxContext = 128_000),
+ // xAI
+ "x-ai/grok-" to openrouterDef("x-ai/grok-*", maxContext = 2_000_000, vision = true),
+ // Cohere
+ "cohere/command-" to openrouterDef("cohere/command-*", maxContext = 128_000),
+ // Moonshot
+ "moonshotai/kimi" to openrouterDef("moonshotai/kimi-*", maxContext = 262_144, vision = true),
+ "moonshotai/" to openrouterDef("moonshotai/*", maxContext = 128_000),
+ // MiniMax
+ // minimax-m2.7 emits `...`
+ // pseudo-XML instead of native tool_calls. Force JSON envelope mode.
+ "minimax/minimax-m2.7" to openrouterDef("minimax/minimax-m2.7*", maxContext = 196_608, functionCalling = false),
+ "minimax/minimax-m2" to openrouterDef("minimax/minimax-m2*", maxContext = 196_608),
+ "minimax/" to openrouterDef("minimax/*", maxContext = 128_000),
+ // Z.AI
+ "z-ai/glm-5v" to openrouterDef("z-ai/glm-5v*", maxContext = 202_752, vision = true),
+ "z-ai/glm-" to openrouterDef("z-ai/glm-*", maxContext = 202_752),
+ "z-ai/" to openrouterDef("z-ai/*", maxContext = 128_000),
+ // ByteDance
+ "bytedance-seed/" to openrouterDef("bytedance-seed/*", maxContext = 262_144, vision = true),
+ // NVIDIA
+ "nvidia/nemotron" to openrouterDef("nvidia/nemotron-*", maxContext = 262_144),
+ "nvidia/" to openrouterDef("nvidia/*", maxContext = 128_000),
+ // InclusionAI (Ling family)
+ "inclusionai/" to openrouterDef("inclusionai/*", maxContext = 262_144),
+ // Arcee
+ "arcee-ai/" to openrouterDef("arcee-ai/*", maxContext = 262_144),
+ // Kwai
+ "kwaipilot/" to openrouterDef("kwaipilot/*", maxContext = 256_000),
+ // Reka
+ "rekaai/" to openrouterDef("rekaai/*", maxContext = 16_384, vision = true),
+ // Xiaomi
+ "xiaomi/mimo" to openrouterDef("xiaomi/mimo*", maxContext = 1_048_576, vision = true),
+ // Upstage
+ "upstage/solar" to openrouterDef("upstage/solar-*", maxContext = 128_000),
+ // Inception
+ "inception/mercury" to openrouterDef("inception/mercury*", maxContext = 128_000),
+ // Tencent
+ "tencent/" to openrouterDef("tencent/*", maxContext = 262_144),
+ // Writer
+ "writer/palmyra" to openrouterDef("writer/palmyra-*", maxContext = 1_040_000),
+ // Allen AI
+ "allenai/olmo" to openrouterDef("allenai/olmo-*", maxContext = 65_536),
+ // StepFun
+ "stepfun/step" to openrouterDef("stepfun/step-*", maxContext = 262_144),
+ // Liquid
+ "liquid/lfm" to openrouterDef("liquid/lfm-*", maxContext = 32_768),
+ // AION
+ "aion-labs/" to openrouterDef("aion-labs/*", maxContext = 131_072),
+ // Baidu
+ "baidu/" to openrouterDef("baidu/*", maxContext = 65_536, vision = true),
+ // OpenRouter meta-models
+ "openrouter/" to openrouterDef("openrouter/*", maxContext = 200_000, vision = true)
+ )
+
+ private fun openrouterDef(
+ id: String,
+ maxContext: Int,
+ vision: Boolean = false,
+ reasoning: Boolean = false,
+ /**
+ * Whether the upstream model reliably emits native OpenAI-format
+ * `tool_calls`. When false, AgentTurnLoop falls back to the
+ * `response-contract-json` system prompt and parses JSON envelopes
+ * out of plain text — that is the right path for models which
+ * advertise function calling but actually emit pseudo-XML
+ * (e.g. ``, `...`).
+ */
+ functionCalling: Boolean = true
+ ): ModelDefinition = ModelDefinition(
+ id = id,
+ name = id,
+ provider = "openrouter",
+ description = if (functionCalling)
+ "OpenRouter curated family with native function-calling"
+ else
+ "OpenRouter family that does NOT emit native tool_calls — JSON envelope only",
+ capabilities = buildList {
+ add(ModelCapability.CHAT_COMPLETION)
+ add(ModelCapability.TEXT_COMPLETION)
+ if (vision) add(ModelCapability.VISION)
+ },
+ modelType = ModelType.TEXT,
+ maxContext = maxContext,
+ maxOutputTokens = null,
+ costPer1MInput = 0.0,
+ costPer1MOutput = 0.0,
+ supportsVision = vision,
+ supportsReasoning = reasoning,
+ supportsStreaming = true,
+ supportsFunctionCalling = functionCalling,
+ endpointType = ApiEndpointType.CHAT_COMPLETIONS,
+ apiFormat = ApiFormat.CHAT_COMPLETIONS,
+ active = true
+ )
+
/**
* Get model definition by ID and provider.
* Returns null if not found (use fallback).
@@ -4463,7 +4942,10 @@ object ModelDefinitions {
"gemini" -> GEMINI_MODELS[modelId]
"lmstudio" -> LM_STUDIO_MODELS[modelId]
"zai" -> ZAI_MODELS[modelId]
- "openrouter" -> null // OpenRouter uses dynamic models
+ "openrouter" -> OPENROUTER_MODELS.entries
+ .firstOrNull { (prefix, _) -> modelId.startsWith(prefix) }
+ ?.value
+ ?.copy(id = modelId, name = modelId)
else -> null
}
}
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/NativeToolsFallbackTracker.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/NativeToolsFallbackTracker.kt
new file mode 100644
index 00000000..55b05938
--- /dev/null
+++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/NativeToolsFallbackTracker.kt
@@ -0,0 +1,32 @@
+package pl.jclab.refio.core.llm
+
+import pl.jclab.refio.core.logging.dualLogger
+import java.util.concurrent.ConcurrentHashMap
+
+private val logger = dualLogger("NativeToolsFallbackTracker")
+
+/**
+ * In-memory registry of models that failed native function-calling in the current process run.
+ *
+ * When a model returns a provider error indicating tools are not supported, it is added here.
+ * Subsequent requests in the same process skip the native path for that model automatically.
+ *
+ * Not persisted — a process restart gives each model a fresh chance.
+ */
+object NativeToolsFallbackTracker {
+ private val fallbackModels = ConcurrentHashMap.newKeySet()
+
+ fun isFallback(modelId: String): Boolean = modelId in fallbackModels
+
+ fun markFallback(modelId: String, reason: String) {
+ if (fallbackModels.add(modelId)) {
+ logger.warn { "[NATIVE_TOOLS_FALLBACK] Marking $modelId as fallback: $reason" }
+ }
+ }
+
+ fun getFallbackSet(): Set = fallbackModels.toSet()
+
+ fun clear() {
+ fallbackModels.clear()
+ }
+}
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/NativeToolsResolver.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/NativeToolsResolver.kt
new file mode 100644
index 00000000..0cd8872a
--- /dev/null
+++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/NativeToolsResolver.kt
@@ -0,0 +1,39 @@
+package pl.jclab.refio.core.llm
+
+import pl.jclab.refio.core.logging.dualLogger
+
+private val logger = dualLogger("NativeToolsResolver")
+
+enum class NativeToolsMode { AUTO, ALWAYS, NEVER }
+
+fun parseNativeToolsMode(raw: String?): NativeToolsMode = when (raw?.trim()?.lowercase()) {
+ "always" -> NativeToolsMode.ALWAYS
+ "never" -> NativeToolsMode.NEVER
+ else -> NativeToolsMode.AUTO
+}
+
+/**
+ * Decide whether a given request should use native function-calling API.
+ *
+ * Precedence:
+ * 1. modelId in fallbackFlags → false (session-scoped failure cache)
+ * 2. mode == NEVER → false
+ * 3. mode == ALWAYS → true
+ * 4. mode == AUTO → ModelDefinition.supportsFunctionCalling (false if no definition)
+ */
+fun shouldUseNativeTools(
+ mode: NativeToolsMode,
+ definition: ModelDefinition?,
+ modelId: String,
+ fallbackFlags: Set = emptySet(),
+): Boolean {
+ if (modelId in fallbackFlags) {
+ logger.debug { "[NATIVE_TOOLS] $modelId in session fallback set, forcing JSON path" }
+ return false
+ }
+ return when (mode) {
+ NativeToolsMode.NEVER -> false
+ NativeToolsMode.ALWAYS -> true
+ NativeToolsMode.AUTO -> definition?.supportsFunctionCalling == true
+ }
+}
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/SupportedModels.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/SupportedModels.kt
index a3c36c71..d4f23a34 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/llm/SupportedModels.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/SupportedModels.kt
@@ -28,7 +28,14 @@ object SupportedModels {
* - GPT-3.5 Turbo (cost-effective)
* - O1 series (reasoning models)
*/
- private val OPENAI_SUPPORTED = setOf(
+ private val OPENAI_SUPPORTED = setOf(
+ // GPT-5.5
+ "gpt-5.5",
+ "gpt-5.5-mini",
+ "gpt-5.5-nano",
+ "gpt-5.5-codex-max",
+ "gpt-5.5-codex",
+ "gpt-5.5-codex-mini",
// GPT-5.4
"gpt-5.4",
"gpt-5.4-mini",
@@ -80,7 +87,7 @@ object SupportedModels {
/**
* Anthropic supported models.
*/
- private val ANTHROPIC_SUPPORTED = setOf(
+ private val ANTHROPIC_SUPPORTED = setOf(
// Opus models
"claude-opus-4-7",
"anthropic.claude-opus-4-7",
@@ -93,6 +100,7 @@ object SupportedModels {
"claude-opus-4-0",
"claude-opus-4-20250514",
// Sonnet models
+ "claude-sonnet-4-6",
"claude-sonnet-4-5-20250929",
"claude-sonnet-4-5",
"claude-sonnet-4-20250514",
@@ -117,7 +125,7 @@ object SupportedModels {
*
* Format: "provider/model" (e.g., "google/gemini-2.0-flash-exp")
*/
- private val OPENROUTER_PATTERNS = setOf(
+ private val OPENROUTER_PATTERNS = setOf(
// Google Gemini models (1.x and 2.x series)
Regex("^.*gpt-.*"),
Regex("^.*amazon.*"),
@@ -177,7 +185,7 @@ object SupportedModels {
/**
* Ollama supported models.
*/
- private val OLLAMA_SUPPORTED = setOf(
+ private val OLLAMA_SUPPORTED = setOf(
Regex("^.*"),
)
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/ToolSchemaSanitizer.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/ToolSchemaSanitizer.kt
new file mode 100644
index 00000000..b1f57073
--- /dev/null
+++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/ToolSchemaSanitizer.kt
@@ -0,0 +1,209 @@
+package pl.jclab.refio.core.llm
+
+import pl.jclab.refio.core.tools.base.ToolSchema
+
+/**
+ * Provider-specific schema normalization for native tool calling.
+ *
+ * The source schemas come from tool definitions and are intentionally provider-agnostic.
+ * Each provider accepts a slightly different subset:
+ * - OpenAI strict mode requires fully strict object schemas.
+ * - Anthropic accepts JSON Schema objects but is picky about composition/meta keywords.
+ * - Gemini expects an OpenAPI-style schema shape and nullable fields instead of JSON Schema unions.
+ */
+object ToolSchemaSanitizer {
+
+ data class OpenAIToolSchema(
+ val tool: ToolSchema,
+ val strict: Boolean,
+ val strictIncompatibilities: List
+ )
+
+ fun forOpenAI(tool: ToolSchema): OpenAIToolSchema {
+ val sanitized = sanitizeForOpenAI(tool.parametersJsonSchema)
+ val incompatibilities = mutableListOf()
+ collectOpenAIStrictIncompatibilities(
+ node = sanitized,
+ path = "\$",
+ incompatibilities = incompatibilities
+ )
+ return OpenAIToolSchema(
+ tool = tool.copy(parametersJsonSchema = sanitized),
+ strict = incompatibilities.isEmpty(),
+ strictIncompatibilities = incompatibilities
+ )
+ }
+
+ fun forAnthropic(tool: ToolSchema): ToolSchema =
+ tool.copy(parametersJsonSchema = sanitizeForAnthropic(tool.parametersJsonSchema))
+
+ fun forGemini(tool: ToolSchema): ToolSchema =
+ tool.copy(parametersJsonSchema = sanitizeForGemini(tool.parametersJsonSchema))
+
+ private fun sanitizeForOpenAI(schema: Map): Map {
+ @Suppress("UNCHECKED_CAST")
+ return sanitizeOpenAINode(schema) as Map
+ }
+
+ private fun sanitizeOpenAINode(node: Any?): Any? {
+ return when (node) {
+ is Map<*, *> -> {
+ val result = LinkedHashMap()
+ for ((rawKey, rawValue) in node) {
+ val key = rawKey as? String ?: continue
+ if (key == "\$schema" || key == "default") continue
+ result[key] = sanitizeOpenAINode(rawValue)
+ }
+ result
+ }
+ is List<*> -> node.map { sanitizeOpenAINode(it) }
+ else -> node
+ }
+ }
+
+ private fun collectOpenAIStrictIncompatibilities(
+ node: Any?,
+ path: String,
+ incompatibilities: MutableList
+ ) {
+ when (node) {
+ is Map<*, *> -> {
+ val typeValue = node["type"]
+ val properties = node["properties"] as? Map<*, *>
+ val additionalProperties = node["additionalProperties"]
+ val required = (node["required"] as? List<*>)?.mapNotNull { it as? String }?.toSet()
+ val propertyNames = properties?.keys?.mapNotNull { it as? String }?.toSet().orEmpty()
+
+ if (node.containsKey("oneOf") || node.containsKey("allOf") || node.containsKey("anyOf")) {
+ incompatibilities += "$path uses schema composition keywords"
+ }
+
+ if (isObjectType(typeValue) || properties != null) {
+ if (additionalProperties != false) {
+ incompatibilities += "$path is object-like without additionalProperties=false"
+ }
+ if (required != propertyNames) {
+ incompatibilities += "$path required keys do not exactly match properties"
+ }
+ }
+
+ if (additionalProperties is Map<*, *>) {
+ incompatibilities += "$path uses dynamic object properties"
+ collectOpenAIStrictIncompatibilities(
+ additionalProperties,
+ "$path.additionalProperties",
+ incompatibilities
+ )
+ }
+
+ if (properties != null) {
+ for ((rawPropName, rawPropSchema) in properties) {
+ val propName = rawPropName as? String ?: continue
+ collectOpenAIStrictIncompatibilities(
+ rawPropSchema,
+ "$path.properties.$propName",
+ incompatibilities
+ )
+ }
+ }
+
+ for ((rawKey, rawValue) in node) {
+ val key = rawKey as? String ?: continue
+ if (key == "properties" || key == "additionalProperties") continue
+ collectOpenAIStrictIncompatibilities(rawValue, "$path.$key", incompatibilities)
+ }
+ }
+ is List<*> -> node.forEachIndexed { index, item ->
+ collectOpenAIStrictIncompatibilities(item, "$path[$index]", incompatibilities)
+ }
+ }
+ }
+
+ private fun sanitizeForAnthropic(schema: Map): Map {
+ @Suppress("UNCHECKED_CAST")
+ return sanitizeAnthropicNode(schema) as Map
+ }
+
+ private fun sanitizeAnthropicNode(node: Any?): Any? {
+ val forbidden = setOf("oneOf", "allOf", "anyOf", "\$schema", "default")
+ return when (node) {
+ is Map<*, *> -> {
+ val result = LinkedHashMap()
+ for ((rawKey, rawValue) in node) {
+ val key = rawKey as? String ?: continue
+ if (key in forbidden) continue
+ result[key] = sanitizeAnthropicNode(rawValue)
+ }
+ result
+ }
+ is List<*> -> node.map { sanitizeAnthropicNode(it) }
+ else -> node
+ }
+ }
+
+ private fun sanitizeForGemini(schema: Map): Map {
+ @Suppress("UNCHECKED_CAST")
+ return sanitizeGeminiNode(schema) as Map
+ }
+
+ private fun sanitizeGeminiNode(node: Any?): Any? {
+ val forbidden = setOf("oneOf", "allOf", "anyOf", "\$schema", "additionalProperties", "default")
+ return when (node) {
+ is Map<*, *> -> {
+ val result = LinkedHashMap()
+ val originalType = node["type"]
+ val nullable = isNullableType(originalType)
+ for ((rawKey, rawValue) in node) {
+ val key = rawKey as? String ?: continue
+ if (key in forbidden) continue
+ result[key] = when (key) {
+ "type" -> toGeminiType(rawValue)
+ else -> sanitizeGeminiNode(rawValue)
+ }
+ }
+ if (nullable) {
+ result["nullable"] = true
+ }
+ result
+ }
+ is List<*> -> node.map { sanitizeGeminiNode(it) }
+ else -> node
+ }
+ }
+
+ private fun isObjectType(typeValue: Any?): Boolean {
+ return when (typeValue) {
+ is String -> typeValue.equals("object", ignoreCase = true)
+ is List<*> -> typeValue.filterIsInstance().any { it.equals("object", ignoreCase = true) }
+ else -> false
+ }
+ }
+
+ private fun isNullableType(typeValue: Any?): Boolean {
+ return when (typeValue) {
+ is List<*> -> typeValue.filterIsInstance().any { it.equals("null", ignoreCase = true) }
+ else -> false
+ }
+ }
+
+ private fun toGeminiType(typeValue: Any?): Any? {
+ val normalized = when (typeValue) {
+ is String -> typeValue
+ is List<*> -> typeValue
+ .filterIsInstance()
+ .firstOrNull { !it.equals("null", ignoreCase = true) }
+ ?: typeValue.filterIsInstance().firstOrNull()
+ else -> null
+ } ?: return typeValue
+
+ return when (normalized.lowercase()) {
+ "string" -> "STRING"
+ "integer" -> "INTEGER"
+ "boolean" -> "BOOLEAN"
+ "number" -> "NUMBER"
+ "array" -> "ARRAY"
+ "object" -> "OBJECT"
+ else -> normalized.uppercase()
+ }
+ }
+}
diff --git a/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/AnthropicAdapter.kt b/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/AnthropicAdapter.kt
index 3ee21884..203d697d 100644
--- a/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/AnthropicAdapter.kt
+++ b/core/src/main/kotlin/pl/jclab/refio/core/llm/adapters/AnthropicAdapter.kt
@@ -8,8 +8,12 @@ import pl.jclab.refio.core.llm.LLMMessage
import pl.jclab.refio.core.llm.LLMResponse
import pl.jclab.refio.core.llm.LLMUsage
import pl.jclab.refio.core.llm.ModelConfig
+import pl.jclab.refio.core.llm.NativeToolCall
import pl.jclab.refio.core.llm.StreamChunk
+import pl.jclab.refio.core.llm.ToolSchemaSanitizer
+import pl.jclab.refio.core.llm.ToolsNotSupportedException
import pl.jclab.refio.core.llm.toModelConfig
+import pl.jclab.refio.core.tools.base.ToolSchema
import pl.jclab.refio.core.utils.GsonInstance.gson
import io.ktor.client.*
import io.ktor.client.call.*
@@ -86,6 +90,8 @@ class AnthropicAdapter(
// Stream aborted by a guardrail (see core/llm/streaming/) — must propagate
// so the caller can see StreamAbortedException instead of RefioError.LLMError.
throw e
+ } catch (e: ToolsNotSupportedException) {
+ throw e
} catch (e: Exception) {
throw LLMErrorMapper.fromThrowable(provider, model, timeout, e)
}
@@ -123,11 +129,12 @@ class AnthropicAdapter(
// "user"/"assistant" in the messages array — remap "tool" (used by
// LLMMessageMapper for tool results) to "assistant" so the request
// body stays valid.
+ // Tool results must be on the "user" role for Anthropic — mapping them to
+ // "assistant" makes the conversation end with an assistant message, which Anthropic
+ // interprets as a prefill (and some models like opus-4-6 reject prefill entirely).
val claudeMessages = nonSystemMessages.mapIndexed { index, msg ->
- val mappedRole = if (msg.role == "tool") "assistant" else msg.role
+ val mappedRole = if (msg.role == "tool") "user" else msg.role
val content = toAnthropicMessageContent(msg)
- // Anthropic API rejects requests where assistant content ends with trailing whitespace.
- // Trim trailing whitespace from the last assistant message to prevent HTTP 400.
val sanitizedContent = if (index == nonSystemMessages.lastIndex && mappedRole == "assistant" && content is String) {
content.trimEnd()
} else {
@@ -170,7 +177,14 @@ class AnthropicAdapter(
"[ANTHROPIC] Using maxTokens=$effectiveMaxTokens (requested=$maxTokens, configLimit=$maxOutputLimit, modelLimit=${modelLimit ?: "n/a"})"
}
- put("temperature", temperature)
+ // Respect model-level removeParams (e.g. claude-opus-4-7 deprecated `temperature`).
+ val removeParams = pl.jclab.refio.core.llm.ModelDefinitions
+ .getDefinition("anthropic", model)
+ ?.removeParams
+ ?: emptyList()
+ if ("temperature" !in removeParams) {
+ put("temperature", temperature)
+ }
// Add system prompt as separate parameter (not a message)
// Use combinedSystemPrompt which combines all system messages from systemMessages parameter
@@ -208,6 +222,14 @@ class AnthropicAdapter(
(kwargs["top_p"] as? Number)?.let { put("top_p", it) }
(kwargs["top_k"] as? Number)?.let { put("top_k", it) }
(kwargs["stop_sequences"] as? List<*>)?.let { put("stop_sequences", it) }
+
+ // Native function-calling tools
+ @Suppress("UNCHECKED_CAST")
+ val nativeTools = kwargs["native_tools"] as? List
+ if (!nativeTools.isNullOrEmpty()) {
+ put("tools", buildAnthropicToolsArray(nativeTools))
+ logger.info { "[ANTHROPIC][NATIVE_TOOLS] Sending ${nativeTools.size} tool schemas" }
+ }
}
val requestJson = gson.toJson(requestBody)
@@ -226,6 +248,46 @@ class AnthropicAdapter(
}
}
+ private fun buildAnthropicToolsArray(tools: List): List