rootflo · vizsatiz · May 31, 2026 · Jan 5, 2026 · May 27, 2026 · May 28, 2026
diff --git a/wavefront/server/apps/floware/floware/server.py b/wavefront/server/apps/floware/floware/server.py
@@ -170,6 +170,16 @@
     message_processor_bucket_name=bucket_name,
 )
 
+inference_container = InferenceContainer(
+    db_client=db_repo_container.db_client,
+    cache_manager=db_repo_container.cache_manager,
+)
+
+llm_inference_config_container = LlmInferenceConfigContainer(
+    db_client=db_repo_container.db_client,
+    cache_manager=db_repo_container.cache_manager,
+)
+
 agents_container = AgentsContainer(
     db_client=db_repo_container.db_client,
     cloud_storage_manager=common_container.cloud_storage_manager,
@@ -185,16 +195,7 @@
     api_services_manager=api_services_container.api_service_manager,
     async_agentic_execution_repository=db_repo_container.async_agentic_execution_repository,
     executions_bucket=config['agents']['executions_bucket'],
-)
-
-inference_container = InferenceContainer(
-    db_client=db_repo_container.db_client,
-    cache_manager=db_repo_container.cache_manager,
-)
-
-llm_inference_config_container = LlmInferenceConfigContainer(
-    db_client=db_repo_container.db_client,
-    cache_manager=db_repo_container.cache_manager,
+    llm_inference_config_service=llm_inference_config_container.llm_inference_config_service,
 )
 
 voice_agents_container = VoiceAgentsContainer(

diff --git a/wavefront/server/modules/agents_module/agents_module/agents_container.py b/wavefront/server/modules/agents_module/agents_module/agents_container.py
@@ -41,6 +41,8 @@ class AgentsContainer(containers.DeclarativeContainer):
 
     executions_bucket = providers.Dependency()
 
+    llm_inference_config_service = providers.Dependency(default=None)
+
     namespace_service = providers.Singleton(
         NamespaceService,
         namespace_repository=namespace_repository,
@@ -69,6 +71,7 @@ class AgentsContainer(containers.DeclarativeContainer):
         cloud_storage_manager=cloud_storage_manager,
         message_processor_bucket_name=message_processor_bucket_name,
         api_services_manager=api_services_manager,
+        llm_inference_config_service=llm_inference_config_service,
     )
 
     workflow_crud_service = providers.Singleton(

diff --git a/wavefront/server/modules/agents_module/agents_module/controllers/agent_controller.py b/wavefront/server/modules/agents_module/agents_module/controllers/agent_controller.py
@@ -137,9 +137,6 @@ async def agent_inference_v2(
     response_formatter: ResponseFormatter = Depends(
         Provide[CommonContainer.response_formatter]
     ),
-    llm_inference_config_service: LlmInferenceConfigService = Depends(
-        Provide[LlmInferenceConfigContainer.llm_inference_config_service]
-    ),
 ):
     """
     Run inference using a flo_ai agent (v2 - UUID-based)
@@ -151,6 +148,11 @@ async def agent_inference_v2(
     4. Runs inference with the provided variables
     5. Returns the result along with execution metadata
 
+    The LLM is resolved from the agent YAML. When `agent.model.provider` is
+    `rootflo`, the `model_id` is treated as a LlmInferenceConfig UUID and the
+    LLM is built from that config. Any `llm_inference_config_id` on the payload
+    is ignored in v2.
+
     Args:
         agent_id: The UUID of the agent
         request: Request containing variables for the agent
@@ -163,22 +165,6 @@ async def agent_inference_v2(
     # Extract authentication credentials
     access_token, app_key = extract_auth_credentials(request)
 
-    # Fetch LLM config if provided
-    llm_config = None
-    if agent_inference_payload.llm_inference_config_id:
-        llm_config_dict = await llm_inference_config_service.get_config(
-            agent_inference_payload.llm_inference_config_id
-        )
-        if not llm_config_dict:
-            return JSONResponse(
-                status_code=status.HTTP_404_NOT_FOUND,
-                content=response_formatter.buildErrorResponse(
-                    f'LLM inference configuration not found: {agent_inference_payload.llm_inference_config_id}'
-                ),
-            )
-        else:
-            llm_config = LlmInferenceConfig(**llm_config_dict)
-
     # Process inputs using common utility function
     resolved_inputs = process_inference_inputs(agent_inference_payload.inputs)
 
@@ -194,7 +180,6 @@ async def agent_inference_v2(
             inputs=resolved_inputs
             if isinstance(resolved_inputs, list)
             else [resolved_inputs],
-            llm_config=llm_config,
             output_json_enabled=agent_inference_payload.output_json_enabled,
             access_token=access_token,
             app_key=app_key,

diff --git a/wavefront/server/modules/agents_module/agents_module/services/agent_inference_service.py b/wavefront/server/modules/agents_module/agents_module/services/agent_inference_service.py
@@ -12,6 +12,9 @@
 from flo_ai.tool.base_tool import Tool
 from flo_cloud.cloud_storage import CloudStorageManager
 from common_module.log.logger import logger
+from llm_inference_config_module.services.llm_inference_config_service import (
+    LlmInferenceConfigService,
+)
 from tools_module.registry.tool_loader import ToolLoader
 from tools_module.utils.message_processor_fn import execute_message_processor_fn
 from tools_module.utils.api_service_tool_loader import load_api_service_tool
@@ -31,6 +34,7 @@ def __init__(
         cloud_storage_manager: CloudStorageManager,
         message_processor_bucket_name: str,
         api_services_manager: Optional[ApiServicesManager] = None,
+        llm_inference_config_service: Optional[LlmInferenceConfigService] = None,
     ):
         """
         Initialize the agent inference service
@@ -43,6 +47,8 @@ def __init__(
             cloud_storage_manager: Cloud storage manager instance
             message_processor_bucket_name: Name of the bucket containing message processor YAML files
             api_services_manager: API services manager instance (optional)
+            llm_inference_config_service: LLM inference config service for resolving
+                rootflo model_id references in agent YAMLs (required for v2 inference)
         """
         self.cache_manager = cache_manager
         self.tool_loader = tool_loader
@@ -51,6 +57,7 @@ def __init__(
         self.message_processor_repository = message_processor_repository
         self.cloud_storage_manager = cloud_storage_manager
         self.message_processor_bucket_name = message_processor_bucket_name
+        self.llm_inference_config_service = llm_inference_config_service
 
     async def create_agent_from_yaml(
         self,
@@ -309,31 +316,85 @@ async def perform_inference(
 
         return result, execution_time
 
+    async def _resolve_rootflo_llm_config(
+        self, yaml_content: str
+    ) -> Optional[LlmInferenceConfig]:
+        """
+        Resolve an LlmInferenceConfig from a YAML's `agent.model` block when the
+        provider is `rootflo` and the `model_id` is a UUID pointing to a
+        LlmInferenceConfig row.
+
+        Returns None for any other case (no model block, or provider != rootflo)
+        so that the caller can fall through to AgentBuilder.from_yaml's default
+        behavior (which builds the LLM directly from the YAML model block).
+
+        Raises:
+            ValueError: when provider is rootflo but model_id is missing,
+                not a valid UUID, or does not resolve to a LlmInferenceConfig row.
+        """
+        yaml_data = yaml.safe_load(yaml_content)
+        model_config = yaml_data.get('agent', {}).get('model')
+        if not model_config:
+            return None
+
+        if model_config.get('provider') != 'rootflo':
+            return None
+
+        model_id = model_config.get('model_id')
+        if not model_id:
+            raise ValueError(
+                'rootflo provider requires "model_id" in agent.model block'
+            )
+
+        try:
+            config_uuid = UUID(str(model_id))
+        except (ValueError, TypeError):
+            raise ValueError(f'rootflo model_id must be a valid UUID, got: {model_id}')
+
+        if not self.llm_inference_config_service:
+            raise ValueError(
+                'llm_inference_config_service not initialized. '
+                'Required to resolve rootflo model_id references.'
+            )
+
+        llm_config_dict = await self.llm_inference_config_service.get_config(
+            config_uuid
+        )
+        if not llm_config_dict:
+            raise ValueError(f'LLM inference configuration not found: {config_uuid}')
+
+        return LlmInferenceConfig(**llm_config_dict)
+
     async def perform_inference_v2(
         self,
         agent_id: UUID,
         variables: Dict[str, Any],
         inputs: List[BaseMessage] | str,
-        llm_config: Optional[LlmInferenceConfig] = None,
         output_json_enabled: bool = True,
         access_token: Optional[str] = None,
         app_key: Optional[str] = None,
     ) -> tuple[List[BaseMessage], float, str]:
         """
         Complete inference workflow (v2): fetch agent from DB + cloud storage, run inference
 
+        The LLM is resolved from the agent YAML itself: when the YAML's
+        `agent.model.provider` is `rootflo`, the `model_id` is treated as a
+        LlmInferenceConfig UUID and the corresponding LLM is built and applied
+        via with_llm(). For any other provider, AgentBuilder.from_yaml builds
+        the LLM directly from the YAML.
+
         Args:
             agent_id: The UUID of the agent
             variables: Variables to pass to the agent
             inputs: Inputs to use for inference
-            llm_config: Optional LLM configuration to override agent's default LLM
             output_json_enabled: Whether to extract JSON from the response
 
         Returns:
             tuple: (result, execution_time, namespace)
 
         Raises:
-            ValueError: If agent_crud_service is not initialized or agent not found
+            ValueError: If agent_crud_service is not initialized, agent not found,
+                or the YAML's rootflo model_id cannot be resolved.
         """
         if not self.agent_crud_service:
             raise ValueError(
@@ -354,6 +415,9 @@ async def perform_inference_v2(
             f'Retrieved agent - namespace: {namespace}, name: {name}, agent_id: {agent_id}'
         )
 
+        # Resolve rootflo model_id references from the YAML, if any
+        llm_config = await self._resolve_rootflo_llm_config(yaml_content)
+
         # Create agent from YAML with optional LLM override and tools
         agent = await self.create_agent_from_yaml(
             yaml_content, name, llm_config, access_token, app_key