From f53aa35819dc6055f2e7442c85f24fcbdbb92811 Mon Sep 17 00:00:00 2001 From: ruwinirathnamalala Date: Mon, 15 Jun 2026 17:54:44 +0530 Subject: [PATCH 1/8] Consolidate Test LLM screens into a single screen with LLM connection selector --- .../POST/get-llm-connections-paginated.sql | 2 +- GUI/src/App.tsx | 3 +- GUI/src/components/MainNavigation/index.tsx | 12 +- GUI/src/pages/TestModel/TestLLM.scss | 428 +++++++++--------- .../TestProductionLLM/TestProductionLLM.scss | 15 + GUI/src/pages/TestProductionLLM/index.tsx | 65 ++- 6 files changed, 300 insertions(+), 225 deletions(-) diff --git a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql index 922c16ec..d4c15efb 100644 --- a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql +++ b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql @@ -24,7 +24,7 @@ SELECT END AS budget_status FROM rag_search.llm_connections WHERE connection_status <> 'deleted' - AND environment = 'testing' + -- AND environment = 'testing' AND (:llm_platform IS NULL OR :llm_platform = '' OR llm_platform = :llm_platform) AND (:llm_model IS NULL OR :llm_model = '' OR llm_model = :llm_model) AND (:environment IS NULL OR :environment = '' OR environment = :environment) diff --git a/GUI/src/App.tsx b/GUI/src/App.tsx index 5839b180..43c50753 100644 --- a/GUI/src/App.tsx +++ b/GUI/src/App.tsx @@ -64,8 +64,7 @@ const App: FC = () => { } /> } /> } /> - } /> - } /> + } /> diff --git a/GUI/src/components/MainNavigation/index.tsx b/GUI/src/components/MainNavigation/index.tsx index 070c4b9a..265f464c 100644 --- a/GUI/src/components/MainNavigation/index.tsx +++ b/GUI/src/components/MainNavigation/index.tsx @@ -45,12 +45,12 @@ const MainNavigation: FC = () => { path: '/test-llm', icon: }, - { - id: 'testProductionLLM', - label: t('menu.testProductionLLM'), - path: '/test-production-llm', - icon: - } + // { + // id: 'testProductionLLM', + // label: t('menu.testProductionLLM'), + // path: '/test-production-llm', + // icon: + // } ]; const filterItemsByRole = (role: string[], items: MenuItem[]) => { diff --git a/GUI/src/pages/TestModel/TestLLM.scss b/GUI/src/pages/TestModel/TestLLM.scss index 3d0c2156..35bced8d 100644 --- a/GUI/src/pages/TestModel/TestLLM.scss +++ b/GUI/src/pages/TestModel/TestLLM.scss @@ -1,217 +1,217 @@ -.testModalFormTextArea { - margin-top: 30px; -} - -.mcq-buttons { - display: flex; - flex-wrap: wrap; - gap: 0.75rem; - margin-top: 1rem; -} - -.testModalClassifyButton { - text-align: right; - margin-top: 20px; -} - -.llm-connection-section { - width: 50%; -} - -.llm-connection-controls { - display: flex; - gap: 1rem; - align-items: center; -} - -.inference-results-container { - max-width: 100%; - background-color: #d7efff; - padding: 20px; - border-radius: 8px; - margin-top: 20px; +// .testModalFormTextArea { +// margin-top: 30px; +// } + +// .mcq-buttons { +// display: flex; +// flex-wrap: wrap; +// gap: 0.75rem; +// margin-top: 1rem; +// } + +// .testModalClassifyButton { +// text-align: right; +// margin-top: 20px; +// } + +// .llm-connection-section { +// width: 50%; +// } + +// .llm-connection-controls { +// display: flex; +// gap: 1rem; +// align-items: center; +// } + +// .inference-results-container { +// max-width: 100%; +// background-color: #d7efff; +// padding: 20px; +// border-radius: 8px; +// margin-top: 20px; - .result-item { - margin-bottom: 15px; +// .result-item { +// margin-bottom: 15px; - strong { - color: #333; - } - } +// strong { +// color: #333; +// } +// } - .response-content { - margin-top: 8px; - padding: 12px; - background-color: #f5f5f5; - border-radius: 4px; - white-space: pre-wrap; - line-height: 1.5; - color: #555; - } - - .context-section { - margin-top: 20px; - - .context-list { - display: flex; - flex-direction: column; - gap: 12px; - margin-top: 8px; - } - - .context-item { - padding: 12px; - background-color: #ffffff; - border: 1px solid #e0e0e0; - border-radius: 6px; - box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1); - - .context-rank { - margin-bottom: 8px; - padding-bottom: 4px; - border-bottom: 1px solid #f0f0f0; - - strong { - color: #2563eb; - font-size: 0.875rem; - font-weight: 600; - } - } - - .context-content { - color: #374151; - line-height: 1.5; - font-size: 0.9rem; - white-space: pre-wrap; - } - } - } -} - -.testModalList { - list-style: disc; - margin-left: 30px; -} - -.mt-20 { - margin-top: 20px; -} - -.classification-results { - margin-top: 1rem; - padding: 1rem; - border: 1px solid #e0e0e0; - border-radius: 8px; - background-color: #f9f9f9; - - h3 { - margin: 0 0 1rem 0; - color: #333; - } - - h4 { - margin: 0 0 0.75rem 0; - color: #555; - font-size: 1rem; - } - - .results-container { - display: flex; - flex-direction: column; - gap: 1.5rem; - } - - .top-prediction { - .prediction-card { - display: flex; - justify-content: space-between; - align-items: center; - padding: 1rem; - border-radius: 8px; - background-color: #e8f5e8; - border: 2px solid #4caf50; - - .agency-name { - font-weight: 600; - color: #2e7d32; - font-size: 1.1rem; - } - - .confidence-score { - font-weight: 700; - color: #2e7d32; - font-size: 1.2rem; - } - } - } - - .predictions-list { - display: flex; - flex-direction: column; - gap: 0.75rem; - - .prediction-item { - display: flex; - align-items: center; - gap: 1rem; - padding: 0.75rem; - background-color: white; - border-radius: 6px; - border: 1px solid #ddd; - - &.highest { - border-color: #4caf50; - background-color: #f8fff8; - } - - .rank { - font-weight: 600; - color: #666; - min-width: 2rem; - } - - .agency-info { - flex: 1; - display: flex; - flex-direction: column; - gap: 0.25rem; - - .agency-name { - font-weight: 500; - color: #333; - } - - .confidence-bar-container { - width: 100%; - height: 4px; - background-color: #e0e0e0; - border-radius: 2px; - overflow: hidden; - - .confidence-bar { - height: 100%; - background-color: #4caf50; - transition: width 0.3s ease; - } - } - } - - .confidence-percentage { - font-weight: 600; - color: #555; - min-width: 4rem; - text-align: right; - } - } - } -} - -.classification-error { - margin-top: 1rem; - padding: 1rem; - background-color: #ffebee; - border: 1px solid #f44336; - border-radius: 6px; - color: #c62828; - text-align: center; -} \ No newline at end of file +// .response-content { +// margin-top: 8px; +// padding: 12px; +// background-color: #f5f5f5; +// border-radius: 4px; +// white-space: pre-wrap; +// line-height: 1.5; +// color: #555; +// } + +// .context-section { +// margin-top: 20px; + +// .context-list { +// display: flex; +// flex-direction: column; +// gap: 12px; +// margin-top: 8px; +// } + +// .context-item { +// padding: 12px; +// background-color: #ffffff; +// border: 1px solid #e0e0e0; +// border-radius: 6px; +// box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1); + +// .context-rank { +// margin-bottom: 8px; +// padding-bottom: 4px; +// border-bottom: 1px solid #f0f0f0; + +// strong { +// color: #2563eb; +// font-size: 0.875rem; +// font-weight: 600; +// } +// } + +// .context-content { +// color: #374151; +// line-height: 1.5; +// font-size: 0.9rem; +// white-space: pre-wrap; +// } +// } +// } +// } + +// .testModalList { +// list-style: disc; +// margin-left: 30px; +// } + +// .mt-20 { +// margin-top: 20px; +// } + +// .classification-results { +// margin-top: 1rem; +// padding: 1rem; +// border: 1px solid #e0e0e0; +// border-radius: 8px; +// background-color: #f9f9f9; + +// h3 { +// margin: 0 0 1rem 0; +// color: #333; +// } + +// h4 { +// margin: 0 0 0.75rem 0; +// color: #555; +// font-size: 1rem; +// } + +// .results-container { +// display: flex; +// flex-direction: column; +// gap: 1.5rem; +// } + +// .top-prediction { +// .prediction-card { +// display: flex; +// justify-content: space-between; +// align-items: center; +// padding: 1rem; +// border-radius: 8px; +// background-color: #e8f5e8; +// border: 2px solid #4caf50; + +// .agency-name { +// font-weight: 600; +// color: #2e7d32; +// font-size: 1.1rem; +// } + +// .confidence-score { +// font-weight: 700; +// color: #2e7d32; +// font-size: 1.2rem; +// } +// } +// } + +// .predictions-list { +// display: flex; +// flex-direction: column; +// gap: 0.75rem; + +// .prediction-item { +// display: flex; +// align-items: center; +// gap: 1rem; +// padding: 0.75rem; +// background-color: white; +// border-radius: 6px; +// border: 1px solid #ddd; + +// &.highest { +// border-color: #4caf50; +// background-color: #f8fff8; +// } + +// .rank { +// font-weight: 600; +// color: #666; +// min-width: 2rem; +// } + +// .agency-info { +// flex: 1; +// display: flex; +// flex-direction: column; +// gap: 0.25rem; + +// .agency-name { +// font-weight: 500; +// color: #333; +// } + +// .confidence-bar-container { +// width: 100%; +// height: 4px; +// background-color: #e0e0e0; +// border-radius: 2px; +// overflow: hidden; + +// .confidence-bar { +// height: 100%; +// background-color: #4caf50; +// transition: width 0.3s ease; +// } +// } +// } + +// .confidence-percentage { +// font-weight: 600; +// color: #555; +// min-width: 4rem; +// text-align: right; +// } +// } +// } +// } + +// .classification-error { +// margin-top: 1rem; +// padding: 1rem; +// background-color: #ffebee; +// border: 1px solid #f44336; +// border-radius: 6px; +// color: #c62828; +// text-align: center; +// } \ No newline at end of file diff --git a/GUI/src/pages/TestProductionLLM/TestProductionLLM.scss b/GUI/src/pages/TestProductionLLM/TestProductionLLM.scss index 9cb5c00c..2fa456fb 100644 --- a/GUI/src/pages/TestProductionLLM/TestProductionLLM.scss +++ b/GUI/src/pages/TestProductionLLM/TestProductionLLM.scss @@ -3,6 +3,21 @@ margin: 0 auto; padding: 2rem; + .llm-connection-section { + width: 50%; + margin-bottom: 1.5rem; + p { + margin-bottom: 0.5rem; + font-weight: 500; + color: #333; + } + } + .llm-connection-controls { + display: flex; + gap: 1rem; + align-items: center; + } + .mcq-buttons { display: flex; flex-wrap: wrap; diff --git a/GUI/src/pages/TestProductionLLM/index.tsx b/GUI/src/pages/TestProductionLLM/index.tsx index f29cfcf9..b9ba6be7 100644 --- a/GUI/src/pages/TestProductionLLM/index.tsx +++ b/GUI/src/pages/TestProductionLLM/index.tsx @@ -1,11 +1,16 @@ import { FC, useState, useRef, useEffect, useMemo } from 'react'; +import { useQuery } from '@tanstack/react-query'; import { useTranslation } from 'react-i18next'; -import { Button, FormTextarea } from 'components'; +import { Button, FormTextarea, FormSelect } from 'components'; import { useToast } from 'hooks/useToast'; import { useStreamingResponse } from 'hooks/useStreamingResponse'; import { ChoiceButton } from 'services/inference'; import './TestProductionLLM.scss'; import MessageContent from 'components/MessageContent'; +import { llmConnectionsQueryKeys } from 'utils/queryKeys'; +import { fetchLLMConnectionsPaginated } from 'services/llmConnections'; + + interface Message { id: string; content: string; @@ -23,11 +28,49 @@ const TestProductionLLM: FC = () => { const [messages, setMessages] = useState([]); const [isLoading, setIsLoading] = useState(false); const messagesEndRef = useRef(null); + const [testLLM, setTestLLM] = useState({ + connectionId: null, + text: '', + }); // Generate a unique channel ID for this session const channelId = useMemo(() => `channel-${Math.random().toString(36).substring(2, 15)}`, []); const { startStreaming, stopStreaming, isStreaming } = useStreamingResponse(channelId); + const [selectedConnectionId, setSelectedConnectionId] = useState(null); + + // Fetch LLM connections for dropdown - using the working legacy endpoint for now + const { data: connections, isLoading: isLoadingConnections } = useQuery({ + queryKey: llmConnectionsQueryKeys.list({ + page: 1, + pageSize: 100, // Get all connections for dropdown + sorting: 'created_at desc', + }), + queryFn: () => fetchLLMConnectionsPaginated({ + pageNumber: 1, + pageSize: 100, + sortBy: 'created_at desc', + }), + }); + // Transform connections data for dropdown + const connectionOptions = useMemo( + () => + connections?.map((connection: any) => ({ + label: `${connection.llmPlatform} - ${connection.llmModel} (${connection.environment})`, + value: String(connection.id), + })) || [], + [connections] + ); + const selectedConnection = useMemo(() => { + return connections?.find((conn: any) => String(conn.id) === selectedConnectionId) || null; + }, [connections, selectedConnectionId]); + + const handleConnectionChange = (value: string | number) => { + console.log('Selected connection ID:', value); + if (isLoading || isStreaming) return; + setSelectedConnectionId(value ? String(value) : null); + }; + // Auto-scroll to bottom useEffect(() => { messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' }); @@ -82,6 +125,8 @@ const TestProductionLLM: FC = () => { authorId: 'test-user-456', conversationHistory, url: 'opensearch-dashboard-test', + environment: selectedConnection?.environment || 'production', + connection_id: selectedConnection?.vaultUuid || undefined, }; // Callbacks for streaming @@ -257,11 +302,27 @@ const TestProductionLLM: FC = () => {
-

{t('testProductionLLM.title')}

+

{t('testModels.title')}

+
+

{t('testModels.llmConnectionLabel') || 'LLM Connection'}

+
+ { + handleConnectionChange(selection?.value as string); + }} + defaultValue={selectedConnectionId ?? undefined} + disabled={isLoading || isStreaming} + /> +
+
From 9d954c4b2cc80fb4b1b1b82b4a9d6589708cba84 Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Tue, 16 Jun 2026 12:14:57 +0530 Subject: [PATCH 2/8] fixed vault configuration issue --- docs/VAULT_SECURITY_ARCHITECTURE.md | 62 +++++++------ vault-init.sh | 131 ++++++++++++++++------------ vault/config/vault.hcl | 44 +++++----- 3 files changed, 130 insertions(+), 107 deletions(-) diff --git a/docs/VAULT_SECURITY_ARCHITECTURE.md b/docs/VAULT_SECURITY_ARCHITECTURE.md index fe6fd741..3f7f7ff2 100644 --- a/docs/VAULT_SECURITY_ARCHITECTURE.md +++ b/docs/VAULT_SECURITY_ARCHITECTURE.md @@ -197,9 +197,12 @@ Day 0+: Automatic Token Renewal: Container Restart: vault-init: Check if Vault is sealed ↓ - If unsealed: Regenerate secret_id only + If unsealed: Validate existing secret_ids ↓ - vault-agent: Re-authenticate with new secret_id + If valid: Reuse existing secret_id (no churn) + If invalid: Mint new secret_id and write to disk + ↓ + vault-agent: Re-authenticate with secret_id ↓ New token issued and cached ``` @@ -413,8 +416,10 @@ Connected Services: - GUI (React Frontend) Token Lifecycle: - - Default Lease: 768h (32 days) - - Auto-renewal: Before expiration + - Token TTL: 15m + - Token Max TTL: 1h + - Auto-renewal: Every ~11 minutes (75% of TTL) + - Re-auth: When max_ttl reached (every ~1h) ``` #### Agent 2: vault-agent-cron @@ -429,8 +434,10 @@ Connected Services: - CronManager (Python worker) Token Lifecycle: - - Default Lease: 768h (32 days) - - Auto-renewal: Before expiration + - Token TTL: 30m + - Token Max TTL: 8h + - Auto-renewal: Every ~22 minutes (75% of TTL) + - Re-auth: When max_ttl reached (every ~8h) ``` #### Agent 3: vault-agent-llm @@ -445,8 +452,10 @@ Connected Services: - LLM Orchestration Service (FastAPI) Token Lifecycle: - - Default Lease: 1h (shorter for higher security) - - Auto-renewal: Every ~45 minutes + - Token TTL: 1h + - Token Max TTL: 8h + - Auto-renewal: Every ~45 minutes (75% of TTL) + - Re-auth: When max_ttl reached (every ~8h) ``` ### Token Caching and Auto-Renewal @@ -856,15 +865,16 @@ Step 12: Check Vault Seal Status └─► GET /v1/sys/seal-status └─► If unsealed: Skip unseal steps -Step 13: Regenerate Secret IDs Only - └─► POST /v1/auth/approle/role/gui-service/secret-id - └─► POST /v1/auth/approle/role/cron-manager-service/secret-id - └─► POST /v1/auth/approle/role/llm-orchestration-service/secret-id - └─► Write new secret_ids to /agent/credentials/ +Step 13: Validate and Reconcile Secret IDs + └─► For each role (gui, cron-manager, llm-orchestration): + ├─► Test existing on-disk secret_id via AppRole login + ├─► If valid: Reuse (no change to credential file) + └─► If invalid/missing: Mint new secret_id and write to disk Note: role_ids remain unchanged (static identifiers) Note: Existing secrets and policies preserved Note: RSA keypair NOT regenerated (preserved) +Note: Stable secret_ids across restarts reduce credential churn ═══════════════════════════════════════════════════════════════════ COMPLETION @@ -1128,13 +1138,14 @@ Startup Order: vault-init Behavior: - Detects Vault already initialized - Skips initialization steps - - Regenerates secret_ids only - - Updates credential files + - Validates existing secret_ids (reuses if still valid) + - Mints new secret_ids only if existing ones are invalid Result: - All services start with fresh credentials + All services start with validated credentials Existing secrets preserved No manual intervention needed + Stable secret_ids reduce unnecessary credential churn ``` ### Token Regeneration Strategy @@ -1143,21 +1154,22 @@ Result: Current Implementation: 1. On Every Container Restart: - └─► vault-init regenerates secret_ids - └─► Vault agents get new tokens - └─► Old tokens remain valid until expiration + └─► vault-init validates existing secret_ids + ├─► If valid: Reuse (agents continue with same credentials) + └─► If invalid: Mint new secret_id, agents re-authenticate 2. Token Lifecycle: └─► Issue: vault-agent authenticates └─► Use: Application makes requests - └─► Renew: vault-agent extends TTL - └─► Expire: Automatic renewal failed - └─► Re-issue: vault-agent re-authenticates + └─► Renew: vault-agent extends TTL (at ~75% of TTL) + └─► Max TTL reached: Renewal rejected by Vault + └─► Re-issue: vault-agent re-authenticates with secret_id 3. Security Benefits: - Short-lived tokens (1 hour for LLM, 32 days for others) - Automatic rotation on agent restart - No manual token management + Short-lived tokens (1 hour for LLM, 30m for Cron, 15m for GUI) + Continuous renewal within max_ttl window + Automatic re-authentication when max_ttl reached + Stable secret_ids (no unnecessary churn on restart) Compromised tokens have limited lifetime ``` diff --git a/vault-init.sh b/vault-init.sh index eada7518..164d8bdc 100644 --- a/vault-init.sh +++ b/vault-init.sh @@ -7,6 +7,67 @@ INIT_FLAG="/vault/data/.initialized" echo "=== Vault Initialization Script ===" +# --------------------------------------------------------------------------- +# Helpers (used by the SUBSEQUENT DEPLOYMENT branch) +# --------------------------------------------------------------------------- + +# Ensure a role_id file exists on disk; fetch from Vault if missing. +# Usage: ensure_role_id +ensure_role_id() { + role="$1"; rid_file="$2" + if [ -f "$rid_file" ] && [ -s "$rid_file" ]; then + return 0 + fi + echo "Fetching role_id for $role..." + rid=$(wget -q -O- \ + --header="X-Vault-Token: $ROOT_TOKEN" \ + "$VAULT_ADDR/v1/auth/approle/role/$role/role-id" | \ + grep -o '"role_id":"[^"]*"' | cut -d':' -f2 | tr -d '"') + echo "$rid" > "$rid_file" + chmod 640 "$rid_file" +} + +# Return 0 if the on-disk role_id + secret_id still authenticate, 1 otherwise. +# Usage: validate_secret_id +validate_secret_id() { + rid_file="$1"; sid_file="$2" + [ -f "$rid_file" ] && [ -f "$sid_file" ] || return 1 + rid=$(cat "$rid_file"); sid=$(cat "$sid_file") + [ -n "$rid" ] && [ -n "$sid" ] || return 1 + # wget returns non-zero on HTTP 400 (invalid creds); also confirm a token came back. + resp=$(wget -q -O- \ + --post-data="{\"role_id\":\"$rid\",\"secret_id\":\"$sid\"}" \ + --header='Content-Type: application/json' \ + "$VAULT_ADDR/v1/auth/approle/login" 2>/dev/null) || return 1 + echo "$resp" | grep -q '"client_token"' || return 1 + return 0 +} + +# Mint a fresh secret_id for a role and write it to disk. +# Usage: mint_secret_id +mint_secret_id() { + role="$1"; sid_file="$2" + sid=$(wget -q -O- --post-data='' \ + --header="X-Vault-Token: $ROOT_TOKEN" \ + "$VAULT_ADDR/v1/auth/approle/role/$role/secret-id" | \ + grep -o '"secret_id":"[^"]*"' | cut -d':' -f2 | tr -d '"') + echo "$sid" > "$sid_file" + chmod 640 "$sid_file" +} + +# Reuse the existing secret_id if it still authenticates; otherwise mint a new one. +# Usage: reconcile_secret_id +reconcile_secret_id() { + role="$1"; rid_file="$2"; sid_file="$3" + ensure_role_id "$role" "$rid_file" + if validate_secret_id "$rid_file" "$sid_file"; then + echo "$role: existing secret_id still valid - reusing" + else + echo "$role: secret_id invalid or missing - minting a new one" + mint_secret_id "$role" "$sid_file" + fi +} + # Wait for Vault to be ready echo "Waiting for Vault..." for i in $(seq 1 30); do @@ -116,21 +177,21 @@ path "auth/token/lookup-self" { capabilities = ["read"] }' # Create GUI AppRole echo "Creating gui-service AppRole..." - wget -q -O- --post-data='{"token_policies":["gui-policy"],"token_no_default_policy":true,"token_ttl":"15m","token_max_ttl":"1h","secret_id_ttl":"24h","secret_id_num_uses":0,"bind_secret_id":true}' \ + wget -q -O- --post-data='{"token_policies":["gui-policy"],"token_ttl":"15m","token_max_ttl":"1h","secret_id_ttl":"0","secret_id_num_uses":0,"bind_secret_id":true}' \ --header="X-Vault-Token: $ROOT_TOKEN" \ --header='Content-Type: application/json' \ "$VAULT_ADDR/v1/auth/approle/role/gui-service" >/dev/null # Create CronManager AppRole echo "Creating cron-manager-service AppRole..." - wget -q -O- --post-data='{"token_policies":["cron-manager-policy"],"token_no_default_policy":true,"token_ttl":"30m","token_max_ttl":"8h","secret_id_ttl":"24h","secret_id_num_uses":0,"bind_secret_id":true}' \ + wget -q -O- --post-data='{"token_policies":["cron-manager-policy"],"token_ttl":"30m","token_max_ttl":"8h","secret_id_ttl":"0","secret_id_num_uses":0,"bind_secret_id":true}' \ --header="X-Vault-Token: $ROOT_TOKEN" \ --header='Content-Type: application/json' \ "$VAULT_ADDR/v1/auth/approle/role/cron-manager-service" >/dev/null # Create LLM Orchestration AppRole echo "Creating llm-orchestration-service AppRole..." - wget -q -O- --post-data='{"token_policies":["llm-orchestration-policy"],"token_no_default_policy":true,"token_ttl":"1h","token_max_ttl":"8h","secret_id_ttl":"24h","secret_id_num_uses":0,"bind_secret_id":true}' \ + wget -q -O- --post-data='{"token_policies":["llm-orchestration-policy"],"token_ttl":"1h","token_max_ttl":"8h","secret_id_ttl":"0","secret_id_num_uses":0,"bind_secret_id":true}' \ --header="X-Vault-Token: $ROOT_TOKEN" \ --header='Content-Type: application/json' \ "$VAULT_ADDR/v1/auth/approle/role/llm-orchestration-service" >/dev/null @@ -280,61 +341,15 @@ else # Ensure credentials directory exists mkdir -p /agent/credentials - # Always regenerate all secret_ids on restart - echo "Regenerating GUI secret_id..." - GUI_SECRET_ID=$(wget -q -O- --post-data='' \ - --header="X-Vault-Token: $ROOT_TOKEN" \ - "$VAULT_ADDR/v1/auth/approle/role/gui-service/secret-id" | \ - grep -o '"secret_id":"[^"]*"' | cut -d':' -f2 | tr -d '"') - echo "$GUI_SECRET_ID" > /agent/credentials/gui_secret_id - - echo "Regenerating CronManager secret_id..." - CRON_SECRET_ID=$(wget -q -O- --post-data='' \ - --header="X-Vault-Token: $ROOT_TOKEN" \ - "$VAULT_ADDR/v1/auth/approle/role/cron-manager-service/secret-id" | \ - grep -o '"secret_id":"[^"]*"' | cut -d':' -f2 | tr -d '"') - echo "$CRON_SECRET_ID" > /agent/credentials/cron_secret_id - - echo "Regenerating LLM secret_id..." - LLM_SECRET_ID=$(wget -q -O- --post-data='' \ - --header="X-Vault-Token: $ROOT_TOKEN" \ - "$VAULT_ADDR/v1/auth/approle/role/llm-orchestration-service/secret-id" | \ - grep -o '"secret_id":"[^"]*"' | cut -d':' -f2 | tr -d '"') - echo "$LLM_SECRET_ID" > /agent/credentials/llm_secret_id - - # Set permissions - chmod 640 /agent/credentials/*_secret_id - - # Ensure role_ids exist - if [ ! -f /agent/credentials/gui_role_id ]; then - echo "Copying GUI role_id..." - GUI_ROLE_ID=$(wget -q -O- \ - --header="X-Vault-Token: $ROOT_TOKEN" \ - "$VAULT_ADDR/v1/auth/approle/role/gui-service/role-id" | \ - grep -o '"role_id":"[^"]*"' | cut -d':' -f2 | tr -d '"') - echo "$GUI_ROLE_ID" > /agent/credentials/gui_role_id - chmod 640 /agent/credentials/gui_role_id - fi - - if [ ! -f /agent/credentials/cron_role_id ]; then - echo "Copying CronManager role_id..." - CRON_ROLE_ID=$(wget -q -O- \ - --header="X-Vault-Token: $ROOT_TOKEN" \ - "$VAULT_ADDR/v1/auth/approle/role/cron-manager-service/role-id" | \ - grep -o '"role_id":"[^"]*"' | cut -d':' -f2 | tr -d '"') - echo "$CRON_ROLE_ID" > /agent/credentials/cron_role_id - chmod 640 /agent/credentials/cron_role_id - fi - - if [ ! -f /agent/credentials/llm_role_id ]; then - echo "Copying LLM role_id..." - LLM_ROLE_ID=$(wget -q -O- \ - --header="X-Vault-Token: $ROOT_TOKEN" \ - "$VAULT_ADDR/v1/auth/approle/role/llm-orchestration-service/role-id" | \ - grep -o '"role_id":"[^"]*"' | cut -d':' -f2 | tr -d '"') - echo "$LLM_ROLE_ID" > /agent/credentials/llm_role_id - chmod 640 /agent/credentials/llm_role_id - fi + # Reconcile secret_ids: reuse the existing one if it still authenticates, + # mint a new one only if it is invalid or missing. This keeps a single + # long-lived secret_id stable across normal restarts (secret_id_ttl=0, + # secret_id_num_uses=0), instead of rotating it every boot. + # ensure_role_id (called inside reconcile_secret_id) guarantees the role_id + # file exists before validation, since validation needs both. + reconcile_secret_id "gui-service" /agent/credentials/gui_role_id /agent/credentials/gui_secret_id + reconcile_secret_id "cron-manager-service" /agent/credentials/cron_role_id /agent/credentials/cron_secret_id + reconcile_secret_id "llm-orchestration-service" /agent/credentials/llm_role_id /agent/credentials/llm_secret_id fi echo "=== Vault init complete ===" \ No newline at end of file diff --git a/vault/config/vault.hcl b/vault/config/vault.hcl index eaef415a..64ab325e 100644 --- a/vault/config/vault.hcl +++ b/vault/config/vault.hcl @@ -1,22 +1,27 @@ # HashiCorp Vault Server Configuration -# Production-ready configuration for LLM Orchestration Service +# Single-node Raft for the RAG-Module services -# Storage backend - Raft for high availability +# Storage backend - Raft storage "raft" { path = "/vault/file" node_id = "vault-node-1" - - # Retry join configuration for clustering (single node for now) - retry_join { - leader_api_addr = "http://vault:8200" - } + + # NOTE: No retry_join for a single node. A lone node self-bootstraps. + # A retry_join pointing at itself causes repeated + # "failed to get raft challenge ... Vault is sealed" errors and a + # messy double Raft init on every boot. Add retry_join back only when + # you actually have peer nodes to join. } -# HTTP listener configuration +# HTTP API listener. +# Vault automatically uses the next port up (8201) as its internal +# cluster port, so do NOT define a separate listener on 8201 — that +# collides with the cluster listener ("bind: address already in use") +# and degrades the login/request-forwarding path the agents rely on. listener "tcp" { - address = "0.0.0.0:8200" - tls_disable = true - + address = "0.0.0.0:8200" + tls_disable = true + # Enable CORS for web UI access cors_enabled = true cors_allowed_origins = [ @@ -25,14 +30,9 @@ listener "tcp" { ] } -# Cluster listener for HA (required even for single node) -listener "tcp" { - address = "0.0.0.0:8201" - cluster_addr = "http://0.0.0.0:8201" - tls_disable = true -} - -# API and cluster addresses +# API and cluster addresses. +# cluster_addr tells Vault where its internal cluster port (8201) is +# reachable; Vault binds that port itself — no listener block needed. api_addr = "http://vault:8200" cluster_addr = "http://vault:8201" @@ -46,9 +46,5 @@ default_lease_ttl = "168h" # 7 days max_lease_ttl = "720h" # 30 days # Logging configuration -log_level = "INFO" +log_level = "INFO" log_format = "json" - -# Development settings (remove in production) -# Note: In production, you should not use dev mode -# and should properly initialize and unseal the vault \ No newline at end of file From 9d4528233a8e0bfe709864794166a9787b16f749 Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Wed, 17 Jun 2026 11:53:44 +0530 Subject: [PATCH 3/8] address Vault via unique rag-vault alias to avoid cross -stack DNS collision --- DSL/CronManager/DSL/data_resync.yml | 2 +- DSL/CronManager/DSL/delete_from_vault.yml | 2 +- DSL/CronManager/DSL/store_in_vault.yml | 2 +- .../script/delete_secrets_from_vault.sh | 4 +- .../script/store_secrets_in_vault.sh | 4 +- docker-compose-ec2.yml | 12 +- docker-compose.yml | 14 +- docs/VAULT_SECURITY_ARCHITECTURE.md | 55 ++- docs/VAULT_SETUP_AND_USAGE.md | 355 ++++++++++++++++++ vault-init.sh | 60 +-- vault/agents/cron/cron-agent.hcl | 6 +- vault/agents/gui/gui-agent.hcl | 6 +- vault/agents/llm/agent.hcl | 6 +- 13 files changed, 448 insertions(+), 80 deletions(-) create mode 100644 docs/VAULT_SETUP_AND_USAGE.md diff --git a/DSL/CronManager/DSL/data_resync.yml b/DSL/CronManager/DSL/data_resync.yml index b5994d1e..a232ba39 100644 --- a/DSL/CronManager/DSL/data_resync.yml +++ b/DSL/CronManager/DSL/data_resync.yml @@ -2,4 +2,4 @@ agency_data_resync: trigger: "0 0 0/1 * * ?" # trigger: off type: exec - command: "../app/scripts/agency_data_resync.sh -s 10" \ No newline at end of file + command: "/app/scripts/agency_data_resync.sh -s 10" \ No newline at end of file diff --git a/DSL/CronManager/DSL/delete_from_vault.yml b/DSL/CronManager/DSL/delete_from_vault.yml index d7f06cea..cde1df27 100644 --- a/DSL/CronManager/DSL/delete_from_vault.yml +++ b/DSL/CronManager/DSL/delete_from_vault.yml @@ -2,4 +2,4 @@ delete_secrets: trigger: off type: exec command: "/app/scripts/delete_secrets_from_vault.sh" - allowedEnvs: ['cookie','vaultUuid','llmPlatform', 'llmModel','embeddingModel','embeddingPlatform'] + allowedEnvs: ['cookie','vaultUuid','llmPlatform', 'llmModel','embeddingModel','embeddingPlatform', 'vaultAgentUrl'] diff --git a/DSL/CronManager/DSL/store_in_vault.yml b/DSL/CronManager/DSL/store_in_vault.yml index fa1a6ac1..46f861e6 100644 --- a/DSL/CronManager/DSL/store_in_vault.yml +++ b/DSL/CronManager/DSL/store_in_vault.yml @@ -2,4 +2,4 @@ store_secrets: trigger: off type: exec command: "/app/scripts/store_secrets_in_vault.sh" - allowedEnvs: ['cookie','vaultUuid','llmPlatform', 'llmModel','secretKey','accessKey','deploymentName','targetUrl','apiKey','embeddingModel','embeddingPlatform','embeddingAccessKey','embeddingSecretKey','embeddingDeploymentName','embeddingTargetUri','embeddingAzureApiKey','deploymentEnvironment'] \ No newline at end of file + allowedEnvs: ['cookie','vaultUuid','llmPlatform', 'llmModel','secretKey','accessKey','deploymentName','targetUrl','apiKey','embeddingModel','embeddingPlatform','embeddingAccessKey','embeddingSecretKey','embeddingDeploymentName','embeddingTargetUri','embeddingAzureApiKey','deploymentEnvironment', 'vaultAgentUrl'] \ No newline at end of file diff --git a/DSL/CronManager/script/delete_secrets_from_vault.sh b/DSL/CronManager/script/delete_secrets_from_vault.sh index a6423566..3b405927 100644 --- a/DSL/CronManager/script/delete_secrets_from_vault.sh +++ b/DSL/CronManager/script/delete_secrets_from_vault.sh @@ -6,9 +6,9 @@ set -e # Exit on any error # Configuration -# Use VAULT_AGENT_URL which points to vault-agent-cron proxy +# Use vaultAgentUrl which points to vault-agent-cron proxy # The agent automatically injects the authentication token -VAULT_ADDR="${VAULT_AGENT_URL:-http://vault-agent-cron:8203}" +VAULT_ADDR="${vaultAgentUrl:-http://vault-agent-cron:8203}" # Logging function log() { diff --git a/DSL/CronManager/script/store_secrets_in_vault.sh b/DSL/CronManager/script/store_secrets_in_vault.sh index 8f4056f8..60784eed 100644 --- a/DSL/CronManager/script/store_secrets_in_vault.sh +++ b/DSL/CronManager/script/store_secrets_in_vault.sh @@ -6,9 +6,9 @@ set -e # Exit on any error # Configuration -# Use VAULT_AGENT_URL which points to vault-agent-cron proxy +# Use vaultAgentUrl which points to vault-agent-cron proxy # The agent automatically injects the authentication token -VAULT_ADDR="${VAULT_AGENT_URL:-http://vault-agent-cron:8203}" +VAULT_ADDR="${vaultAgentUrl:-http://vault-agent-cron:8203}" # Decryption Configuration PRIVATE_KEY_CACHE="" diff --git a/docker-compose-ec2.yml b/docker-compose-ec2.yml index a9052865..136c097c 100644 --- a/docker-compose-ec2.yml +++ b/docker-compose-ec2.yml @@ -502,7 +502,11 @@ services: - ./vault/config:/vault/config:ro - ./vault/logs:/vault/logs networks: - - vault-network # Only on vault-network for security + vault-network: # Only on vault-network for security + # Local testing: bare "vault" collides with the ckb stack on the shared + # bykstack network, so expose this Vault under a unique alias instead. + aliases: + - rag-vault restart: unless-stopped healthcheck: test: ["CMD", "sh", "-c", "wget -q -O- http://127.0.0.1:8200/v1/sys/health || exit 0"] @@ -519,7 +523,7 @@ services: vault: condition: service_healthy environment: - VAULT_ADDR: http://vault:8200 + VAULT_ADDR: http://rag-vault:8200 volumes: - vault-data:/vault/data - vault-agent-creds:/agent/credentials @@ -528,8 +532,8 @@ services: - vault-agent-llm-token:/agent/llm-token - ./vault-init.sh:/vault-init.sh:ro networks: - - vault-network # Access vault - - bykstack # Access to write agent tokens + # vault-network only: tokens/creds go via shared volumes, not the network. + - vault-network entrypoint: ["/bin/sh"] command: - -c diff --git a/docker-compose.yml b/docker-compose.yml index ec324649..3e6cfba2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -191,7 +191,7 @@ services: environment: - server.port=9010 - PYTHONPATH=/app:/app/src/vector_indexer:/app/src/intent_data_enrichment:/app/src/api_tool_indexer - - VAULT_AGENT_URL=http://vault-agent-cron:8203 + - vaultAgentUrl=http://vault-agent-cron:8203 ports: - 9010:8080 depends_on: @@ -449,7 +449,11 @@ services: - ./vault/config:/vault/config:ro - ./vault/logs:/vault/logs networks: - - vault-network # Only on vault-network for security + vault-network: # Only on vault-network for security + # Local testing: bare "vault" collides with the ckb stack on the shared + # bykstack network, so expose this Vault under a unique alias instead. + aliases: + - rag-vault restart: unless-stopped healthcheck: test: ["CMD", "sh", "-c", "wget -q -O- http://127.0.0.1:8200/v1/sys/health || exit 0"] @@ -466,7 +470,7 @@ services: vault: condition: service_healthy environment: - VAULT_ADDR: http://vault:8200 + VAULT_ADDR: http://rag-vault:8200 volumes: - vault-data:/vault/data - vault-agent-creds:/agent/credentials @@ -475,8 +479,8 @@ services: - vault-agent-llm-token:/agent/llm-token - ./vault-init.sh:/vault-init.sh:ro networks: - - vault-network # Access vault - - bykstack # Access to write agent tokens + # vault-network only: tokens/creds go via shared volumes, not the network. + - vault-network entrypoint: ["/bin/sh"] command: - -c diff --git a/docs/VAULT_SECURITY_ARCHITECTURE.md b/docs/VAULT_SECURITY_ARCHITECTURE.md index 3f7f7ff2..2c2c6836 100644 --- a/docs/VAULT_SECURITY_ARCHITECTURE.md +++ b/docs/VAULT_SECURITY_ARCHITECTURE.md @@ -416,10 +416,9 @@ Connected Services: - GUI (React Frontend) Token Lifecycle: - - Token TTL: 15m - - Token Max TTL: 1h - - Auto-renewal: Every ~11 minutes (75% of TTL) - - Re-auth: When max_ttl reached (every ~1h) + - Token type: periodic (token_period 20m, no max-TTL) + - Auto-renewal: Every ~13 minutes (~2/3 of period) + - Re-auth: only on agent restart (never in steady state) ``` #### Agent 2: vault-agent-cron @@ -434,10 +433,9 @@ Connected Services: - CronManager (Python worker) Token Lifecycle: - - Token TTL: 30m - - Token Max TTL: 8h - - Auto-renewal: Every ~22 minutes (75% of TTL) - - Re-auth: When max_ttl reached (every ~8h) + - Token type: periodic (token_period 30m, no max-TTL) + - Auto-renewal: Every ~20 minutes (~2/3 of period) + - Re-auth: only on agent restart (never in steady state) ``` #### Agent 3: vault-agent-llm @@ -452,10 +450,9 @@ Connected Services: - LLM Orchestration Service (FastAPI) Token Lifecycle: - - Token TTL: 1h - - Token Max TTL: 8h - - Auto-renewal: Every ~45 minutes (75% of TTL) - - Re-auth: When max_ttl reached (every ~8h) + - Token type: periodic (token_period 1h, no max-TTL) + - Auto-renewal: Every ~40 minutes (~2/3 of period) + - Re-auth: only on agent restart (never in steady state) ``` ### Token Caching and Auto-Renewal @@ -473,29 +470,31 @@ T=0: Initial Authentication ├─► POST /v1/auth/approle/login │ Body: { role_id, secret_id } │ - └─► Receives: { token, ttl: 3600s, renewable: true } + └─► Receives: { token, period: 3600s, renewable: true } ← periodic token, no max-TTL │ └─► Cache token in: /agent/llm-token/token -T=45min: Proactive Renewal (75% of TTL) +T≈40min: Proactive Renewal (~2/3 of period) vault-agent monitors expiration │ ├─► POST /v1/auth/token/renew-self │ Header: X-Vault-Token: │ - └─► Receives: { token, ttl: 3600s } (same token, extended) + └─► Receives: { token, period: 3600s } (same token, period reset) │ └─► Update cache: /agent/llm-token/token + │ + └─► Repeats forever — a periodic token never hits a max-TTL, + so steady-state operation never needs approle/login again. -T=59min: Renewal Failed (fallback) - If renewal fails: +On agent restart only: + vault-agent re-reads role_id + secret_id from disk │ - ├─► Re-authenticate from scratch - │ POST /v1/auth/approle/login + ├─► POST /v1/auth/approle/login (secret_id must still be valid) │ - └─► New token issued and cached + └─► New periodic token issued and cached Application Request (anytime): @@ -1159,18 +1158,18 @@ Current Implementation: └─► If invalid: Mint new secret_id, agents re-authenticate 2. Token Lifecycle: - └─► Issue: vault-agent authenticates + └─► Issue: vault-agent authenticates (periodic token, token_period per role) └─► Use: Application makes requests - └─► Renew: vault-agent extends TTL (at ~75% of TTL) - └─► Max TTL reached: Renewal rejected by Vault - └─► Re-issue: vault-agent re-authenticates with secret_id + └─► Renew: vault-agent renews within the period (~2/3 of period) + └─► No max-TTL: renewal continues indefinitely + └─► Re-issue: only on agent restart, via secret_id login 3. Security Benefits: - Short-lived tokens (1 hour for LLM, 30m for Cron, 15m for GUI) - Continuous renewal within max_ttl window - Automatic re-authentication when max_ttl reached + Periodic tokens (period 1h LLM, 30m Cron, 20m GUI), renewed continuously + Steady-state operation never re-runs approle/login (a stale secret_id + cannot strand a running agent) Stable secret_ids (no unnecessary churn on restart) - Compromised tokens have limited lifetime + Compromised tokens limited to one un-renewed period ``` ### Audit Logging Capabilities diff --git a/docs/VAULT_SETUP_AND_USAGE.md b/docs/VAULT_SETUP_AND_USAGE.md new file mode 100644 index 00000000..e61d362b --- /dev/null +++ b/docs/VAULT_SETUP_AND_USAGE.md @@ -0,0 +1,355 @@ +# Vault Setup & Usage Guide + +A single reference for how HashiCorp Vault is deployed, initialized, and consumed in the +RAG-Module. It covers the topology, the three Vault Agents, the secret layout, and — in +depth — **how each agent renews its token and how secrets are rotated**. + +Source files this document describes: + +- `docker-compose.yml` — service/topology definition +- `vault/config/vault.hcl` — Vault server config +- `vault-init.sh` — one-time bootstrap + per-restart reconcile +- `vault/agents/{gui,cron,llm}/*.hcl` — the three Vault Agent configs +- `DSL/CronManager/script/store_secrets_in_vault.sh` — writes/rotates secrets +- `DSL/CronManager/script/delete_secrets_from_vault.sh` — deletes secrets + +For the security rationale (threat model, defense-in-depth, access matrix) see the +companion `docs/VAULT_SECURITY_ARCHITECTURE.md`. This guide focuses on the *operational* +mechanics. + +--- + +## 1. Topology at a glance + +``` + bykstack (application network) vault-network (internal: true) + ┌───────────────────────────────────────────────┐ ┌──────────────────────────────┐ + │ gui ──────────────► vault-agent-gui :8202 ───┼────────┤ │ + │ cron-manager ─────► vault-agent-cron :8203 ───┼────────┤ vault :8200 │ + │ llm-orchestration ► vault-agent-llm :8201 ───┼────────┤ (Raft storage, KV v2, │ + │ │ │ AppRole auth) │ + │ vault-init (also on vault-network) ───────────┼────────┤ │ + └───────────────────────────────────────────────┘ └──────────────────────────────┘ +``` + +- **`vault`** runs only on `vault-network`, which is `internal: true` — it has **no route to + or from the host or the internet**. Port 8200 is never published. +- **Vault Agents** straddle both networks: they reach `vault` on `vault-network` and are + reachable by their owning application on `bykstack`. +- **Applications** talk *only* to their agent (`VAULT_ADDR=http://vault-agent-*:820x`) and + never hold a Vault token themselves. The agent injects the token transparently. + +| Service | Agent it uses | Agent address | AppRole | Policy | +|---|---|---|---|---| +| `gui` | `vault-agent-gui` | `:8202` | `gui-service` | `gui-policy` | +| `cron-manager` | `vault-agent-cron` | `:8203` | `cron-manager-service` | `cron-manager-policy` | +| `llm-orchestration-service` | `vault-agent-llm` | `:8201` | `llm-orchestration-service` | `llm-orchestration-policy` | + +--- + +## 2. Vault server (`vault/config/vault.hcl`) + +- **Storage:** Raft, single node (`node_id = vault-node-1`, path `/vault/file`, persisted in + the `vault-data` volume). No `retry_join` — a lone node self-bootstraps; adding a self- + pointing join was found to cause "Vault is sealed" boot loops. +- **Listener:** `0.0.0.0:8200`, `tls_disable = true` (TLS is terminated at the network + boundary; the network itself is the isolation layer here). Port `8201` is *not* given its + own listener because Vault uses it as the internal cluster port automatically. +- **Lease defaults:** `default_lease_ttl = 168h` (7 days), `max_lease_ttl = 720h` (30 days). + These are *system ceilings*; the per-AppRole token TTLs (below) are much shorter and are + what actually governs agent renewal cadence. +- `disable_mlock = false`, `ui = false`, JSON logs at INFO. + +Vault boots **sealed**. It must be unsealed before any operation — that is `vault-init`'s +first job. + +--- + +## 3. Bootstrap & reconcile (`vault-init.sh`) + +`vault-init` is a **run-once-then-exit** container (`restart: "no"`). The agents declare +`depends_on: vault-init: condition: service_completed_successfully`, so they only start +after init has finished cleanly. It runs `su vault -s /bin/sh /vault-init.sh` after creating +and `chown`ing the shared agent directories. + +The script has two branches, selected by the presence of `/vault/data/.initialized`. + +### 3.1 First-time deployment + +1. Wait for `/v1/sys/health` to respond. +2. **Initialize** with Shamir's Secret Sharing: `secret_shares=5`, `secret_threshold=3`. + The full response (5 unseal keys + root token) is written to + `/vault/data/unseal-keys.json`. +3. **Unseal** by submitting 3 of the 5 keys. +4. **Enable engines:** KV v2 at `secret/`, and the AppRole auth method. +5. **Create three ACL policies** (see §5). +6. **Create three AppRoles** issuing periodic tokens (see §4 — this is the heart of renewal), + via the `ensure_approles` helper. The same helper re-runs on subsequent deploys, so AppRole + config changes land without re-initializing Vault. +7. **Issue credentials:** for each role, fetch the static `role_id` and mint a `secret_id`, + writing both to `/agent/credentials/_role_id` and `_secret_id` (`chmod 640`). +8. **Generate an RSA-2048 keypair** with `openssl` and store it in Vault at + `secret/encryption/public_key` and `secret/encryption/private_key` + (algorithm `RSA-OAEP`, with `key_id` and `created_at` metadata). +9. Seed a test LLM secret, then `touch /vault/data/.initialized`. + +### 3.2 Subsequent deployment (restart) + +1. Check `/v1/sys/seal-status`; if sealed, reload the 3 unseal keys from + `unseal-keys.json` and unseal. +2. **Reconcile each secret_id** via `reconcile_secret_id`: + - `ensure_role_id` — make sure the `role_id` file exists (re-fetch from Vault if missing). + - `validate_secret_id` — attempt an AppRole login with the on-disk `role_id` + `secret_id`. + If it returns a `client_token`, the credential is still good. + - **Valid → reuse** the existing `secret_id` (no churn). + - **Invalid/missing → `mint_secret_id`** writes a fresh one. + +This is deliberate: because the AppRoles are created with `secret_id_ttl=0` and +`secret_id_num_uses=0` (non-expiring, unlimited-use), a single long-lived `secret_id` +survives normal restarts instead of being regenerated every boot. The RSA keypair, policies, +and stored secrets are all preserved across restarts. + +> **Note on file permissions:** `vault-init.sh` writes credential files with `chmod 640`. +> (The older architecture doc mentions `644`; the script is the source of truth — `640`.) + +--- + +## 4. The three Vault Agents — auth, renewal & rotation + +This is the core of the question. All three agents are the same Vault binary +(`hashicorp/vault:1.20.3`) run as `vault agent -config=...`. They differ only in which +credentials they read, which token sink they write, and their listener port. + +### 4.1 What an agent config actually does + +Example (`vault/agents/llm/agent.hcl`; gui/cron are identical in shape): + +```hcl +vault { address = "http://vault:8200"; retry { num_retries = 5 } } + +auto_auth { + method "approle" { + mount_path = "auth/approle" + config = { + role_id_file_path = "/agent/credentials/llm_role_id" + secret_id_file_path = "/agent/credentials/llm_secret_id" + remove_secret_id_file_after_reading = false + } + } + sink "file" { config = { path = "/agent/llm-token/token"; mode = 0640 } } +} + +cache { default_lease_duration = "1h" } +listener "tcp" { address = "0.0.0.0:8201"; tls_disable = true } +api_proxy { use_auto_auth_token = true } +``` + +Three mechanisms are at work: + +1. **`auto_auth` (authentication + renewal):** On startup the agent reads `role_id` + + `secret_id` and calls `POST /v1/auth/approle/login`. Vault returns a **periodic token** + (the AppRoles set `token_period`, defined in `vault-init.sh`, *not* in the HCL). The agent + then runs Vault's **auto-auth lifecycle manager**, which **renews the token automatically + in the background** before each period elapses. A periodic token has **no max-TTL**, so the + agent renews it indefinitely and — during normal operation — **never has to call + `approle/login` again**. The agent only re-authenticates (and thus only needs the + `secret_id` again) if it is **restarted** or if a renewal is missed long enough for the + token to lapse. `remove_secret_id_file_after_reading = false` keeps the `secret_id` on disk + so the agent can re-auth after a restart without `vault-init` re-minting. + + > **Why periodic tokens?** An earlier design issued tokens with `token_ttl`/`token_max_ttl`, + > which forced a full re-login every time `token_max_ttl` was reached. If the `secret_id` + > had become invalid by then (expiry, clock skew, server re-init), the agent got stuck in an + > `invalid role or secret ID` 400 backoff loop with no way to self-heal. Periodic tokens + > remove that re-login from the steady state, so a stale `secret_id` can no longer strand a + > running agent. +2. **`sink "file"` (token hand-off):** Every time the agent obtains/renews a token it writes + it to a file (`/agent/-token/token`, mode `0640`). The compose **health check** for + each agent is simply `test -f && test -s ` — a non-empty token file means + the agent has authenticated successfully. +3. **`api_proxy { use_auto_auth_token = true }` (transparent injection):** The agent also + listens as an HTTP proxy on its port. When the application sends a token-less request, the + agent injects `X-Vault-Token: ` and forwards it to `vault:8200`. + This is why application code never sets `VAULT_TOKEN`. + +> **`cache.default_lease_duration` is not the token TTL.** It is the agent's cache lease +> hint. The authoritative token lifetime comes from the AppRole's `token_period` in +> `vault-init.sh`. The per-agent cache hint is set to match the period. + +### 4.2 Per-agent renewal parameters + +AppRole token settings are created in `vault-init.sh`; all three use +`token_period` (periodic token, **no max-TTL**), `secret_id_ttl=0`, `secret_id_num_uses=0`, +`token_num_uses=0`, `bind_secret_id=true`. + +| Agent | AppRole | `token_period` | Proactive renewal (~⅔ of period) | Re-login (`approle/login`) | +|---|---|---|---|---| +| `vault-agent-gui` | `gui-service` | **20m** | ~every 13 min | only on agent restart | +| `vault-agent-cron` | `cron-manager-service` | **30m** | ~every 20 min | only on agent restart | +| `vault-agent-llm` | `llm-orchestration-service` | **1h** | ~every 40 min | only on agent restart | + +Reading the lifecycle for, e.g., the LLM agent: + +``` +T=0 login → periodic token (period 1h) → written to /agent/llm-token/token +T≈40m renew-self → period resets to 1h → token file refreshed +... renew repeats forever; token never hits a max-TTL +(restart) agent re-runs approle/login with the on-disk secret_id → fresh token +``` + +The periods are tuned per service (shorter for the GUI, which only reads the public key; +longer for the high-traffic LLM read path), but functionally all three behave the same: +**renew forever, re-login only on restart.** + +### 4.3 Two distinct "rotation" concepts — keep them separate + +1. **Token rotation (automatic, continuous):** Handled entirely by the agent's `auto_auth` + loop as described above — the periodic token is renewed indefinitely with no human action + and no `vault-init` involvement. +2. **`secret_id` rotation (rare):** The `secret_id` is the long-lived credential the agent + uses to *log in* (at startup/restart only, now that tokens are periodic). It is configured + non-expiring (`secret_id_ttl=0`, `secret_id_num_uses=0`) and is only replaced by + `vault-init` on a restart when the existing one fails validation (§3.2). To force rotation, + delete the `secret_id` file (or invalidate it in Vault) and re-run `vault-init`, then + restart the agent so it logs in with the freshly minted one. + + > **Operational caveat (learned the hard way):** if a `secret_id` ever does become invalid + > while an agent is running, the periodic-token design means a *running* agent keeps working + > (it only renews, never re-logs-in). But a **restarted** agent needs a valid `secret_id` to + > log in. Recovery is always: re-run `vault-init` (mints a fresh `secret_id` via the §3.2 + > reconcile) → restart the affected agent. See `docs/` runbook / the troubleshooting note + > below. + +### 4.4 Restart behavior + +- **Restart an agent:** It re-reads `role_id`/`secret_id` from the (read-only) creds volume + and re-authenticates. New token, written to the sink. App sees a brief blip. +- **Restart `vault`:** Data persists; `vault-init` (or the existing agent tokens, if still + valid) handle re-unseal/re-auth. Existing tokens remain valid if not expired. +- **Full `down && up`:** Order is `vault → vault-init → agents → apps`. `vault-init` detects + the `.initialized` flag, skips first-time setup, reconciles secret_ids, and the agents + start with validated credentials. + +--- + +## 5. Authorization — policies (who can touch what) + +Created in `vault-init.sh`. Paths are KV v2, so data lives under `secret/data/...` and +listing/metadata under `secret/metadata/...`. + +| Path | `gui-policy` | `cron-manager-policy` | `llm-orchestration-policy` | +|---|---|---|---| +| `secret/data/encryption/public_key` | **read** | read | — | +| `secret/data/encryption/private_key` | **deny** | **read** | — | +| `secret/data/encryption/*` | — | — | **deny** | +| `secret/data/llm/connections/*` | deny | **create/read/update/delete** | **read, list** | +| `secret/data/embeddings/connections/*` | deny | **create/read/update/delete** | **read, list** | +| `auth/token/lookup-self` | — | read | read | + +The intent, by tier: + +- **GUI** — can read *only* the public key, to encrypt user-entered credentials in the + browser before they ever leave it. Everything else is explicitly denied. +- **CronManager** — the only writer. Reads the **private key** to decrypt what the GUI + encrypted, then writes plaintext credentials into Vault. Full CRUD on connection secrets. +- **LLM Orchestration** — read-only consumer of connection secrets. **Explicitly denied** all + encryption keys, so a compromise of this hot-path service cannot exfiltrate the private key. + +--- + +## 6. Secret layout (KV v2 under `secret/`) + +``` +secret/ +├── llm/connections// ← e.g. aws_bedrock, azure_openai +├── embeddings/connections// +└── encryption/ + ├── public_key { key, algorithm: RSA-OAEP, key_size: 2048, key_id, created_at } + └── private_key { key, algorithm: RSA-OAEP, key_size: 2048, key_id, created_at } +``` + +The current write/delete scripts key connection secrets by a stable **`vaultUuid`** as the +final path segment (environment is tracked in the DB, not the path). KV v2 versions every +write, so updating a credential keeps prior versions for audit/rollback. + +LLM secret shape (AWS): `{ connection_id, access_key, secret_key, model, tags }`. +Azure: `{ connection_id, endpoint, api_key, deployment_name, model, api_version, tags }`. + +--- + +## 7. Usage flows + +### 7.1 Storing / rotating a credential (`store_secrets_in_vault.sh`, via cron-manager) + +1. GUI encrypts the raw key with the RSA **public** key and submits it. +2. The cron-manager job runs the script against `vault-agent-cron:8203` (no token — the agent + injects it). +3. The script **fetches the private key** (`GET secret/data/encryption/private_key`), then + decrypts each sensitive field in-memory via `decrypt_vault_secrets.py` (RSA-OAEP). +4. It builds the JSON payload with `jq` and `POST`s plaintext to + `secret/data//connections//`. Re-posting the same path + = a KV v2 version bump = credential rotation. +5. Sensitive shell variables are `unset` immediately after use. + +### 7.2 Deleting a credential (`delete_secrets_from_vault.sh`) + +`DELETE`s both `secret/data/...` and `secret/metadata/...` for the connection (404 treated as +success), again through `vault-agent-cron` with no explicit token. + +### 7.3 Reading a credential (LLM orchestration) + +The LLM service issues a token-less `GET http://vault-agent-llm:8201/v1/secret/data/llm/...`. +`vault-agent-llm` injects its cached token, Vault validates it against +`llm-orchestration-policy`, and returns the secret. The service then calls AWS/Azure with it. + +--- + +## 8. Operational notes & known trade-offs + +- **Unseal keys + root token sit in the `vault-data` volume** (`unseal-keys.json`). This makes + auto-unseal on restart trivial but is a **dev/test convenience**. For production, switch to + auto-unseal backed by a cloud KMS/HSM and remove the keys from the volume. +- **Root token** is used only by `vault-init` and is never injected into app containers. Best + practice for production is to revoke it after bootstrap and use scoped admin policies. +- **TLS is disabled** on the Vault listener and agent listeners; isolation relies on the + `internal: true` `vault-network`. Add TLS for any non-local deployment. +- **Audit logging is available but not enabled.** Turn it on with + `vault audit enable file file_path=/vault/logs/audit.log` (the `./vault/logs` mount already + exists) for a full request trail. +- **Credential files are world-readable within the shared volume** (mode 640, single owner, + but all agents mount the same `vault-agent-creds` volume read-only) — isolation is at the + volume level, not per-file. Fine for this trust boundary; note it if the threat model + tightens. + +--- + +## 9. Troubleshooting: agents looping on `invalid role or secret ID` + +**Symptom:** an agent logs `lifetime watcher done channel triggered, re-authenticating` +followed by repeating `PUT .../auth/approle/login → Code: 400 ... invalid role or secret ID` +with growing backoff. Token *renewals* had been succeeding up to that point. + +**Cause:** the agent's `secret_id` became invalid server-side (expiry, clock skew, or a Vault +re-init), and the agent reached a point where it had to do a full `approle/login`. With the +old `token_ttl`/`token_max_ttl` design this happened on every `token_max_ttl` cycle; the +switch to **periodic tokens** (§4) removes re-login from steady state, so a *running* agent no +longer hits this — but a **restarted** agent still needs a valid `secret_id`. + +**Recovery:** + +```bash +# Mint fresh secret_ids (vault-init's reconcile detects the invalid ones and replaces them) +docker compose up -d --force-recreate vault-init +docker wait vault-init +# Restart the affected agents so they log in with the fresh secret_id +docker compose restart vault-agent-gui vault-agent-cron vault-agent-llm +``` + +**Confirm root cause (read-only):** + +```bash +ROOT=$(docker exec vault sh -c "grep -o '\"root_token\":\"[^\"]*\"' /vault/file/unseal-keys.json | cut -d: -f2 | tr -d '\"'") +docker exec -e VAULT_TOKEN=$ROOT -e VAULT_ADDR=http://127.0.0.1:8200 vault \ + vault read auth/approle/role/gui-service # expect token_period set, secret_id_ttl=0 +echo "host: $(date -u)"; docker exec vault date -u # check for WSL2/Docker clock drift +``` diff --git a/vault-init.sh b/vault-init.sh index 164d8bdc..0e759f8e 100644 --- a/vault-init.sh +++ b/vault-init.sh @@ -68,6 +68,27 @@ reconcile_secret_id() { fi } +# Create or update an AppRole that issues a PERIODIC token (no max_ttl): the +# agent renews it forever and never re-runs approle/login in steady state. +# secret_id_ttl=0 + secret_id_num_uses=0 keep the secret_id valid across +# restarts. Idempotent: does not invalidate existing secret_ids, safe per run. +# Usage: upsert_approle +upsert_approle() { + role="$1"; policy="$2"; period="$3" + wget -q -O- --post-data='{"token_policies":["'"$policy"'"],"token_period":"'"$period"'","token_num_uses":0,"secret_id_ttl":"0","secret_id_num_uses":0,"bind_secret_id":true}' \ + --header="X-Vault-Token: $ROOT_TOKEN" \ + --header='Content-Type: application/json' \ + "$VAULT_ADDR/v1/auth/approle/role/$role" >/dev/null +} + +# Apply the current AppRole definitions for all three services. +ensure_approles() { + echo "Ensuring AppRole configs (periodic tokens)..." + upsert_approle "gui-service" "gui-policy" "20m" + upsert_approle "cron-manager-service" "cron-manager-policy" "30m" + upsert_approle "llm-orchestration-service" "llm-orchestration-policy" "1h" +} + # Wait for Vault to be ready echo "Waiting for Vault..." for i in $(seq 1 30); do @@ -175,27 +196,9 @@ path "auth/token/lookup-self" { capabilities = ["read"] }' --header='Content-Type: application/json' \ "$VAULT_ADDR/v1/sys/policies/acl/llm-orchestration-policy" >/dev/null - # Create GUI AppRole - echo "Creating gui-service AppRole..." - wget -q -O- --post-data='{"token_policies":["gui-policy"],"token_ttl":"15m","token_max_ttl":"1h","secret_id_ttl":"0","secret_id_num_uses":0,"bind_secret_id":true}' \ - --header="X-Vault-Token: $ROOT_TOKEN" \ - --header='Content-Type: application/json' \ - "$VAULT_ADDR/v1/auth/approle/role/gui-service" >/dev/null - - # Create CronManager AppRole - echo "Creating cron-manager-service AppRole..." - wget -q -O- --post-data='{"token_policies":["cron-manager-policy"],"token_ttl":"30m","token_max_ttl":"8h","secret_id_ttl":"0","secret_id_num_uses":0,"bind_secret_id":true}' \ - --header="X-Vault-Token: $ROOT_TOKEN" \ - --header='Content-Type: application/json' \ - "$VAULT_ADDR/v1/auth/approle/role/cron-manager-service" >/dev/null - - # Create LLM Orchestration AppRole - echo "Creating llm-orchestration-service AppRole..." - wget -q -O- --post-data='{"token_policies":["llm-orchestration-policy"],"token_ttl":"1h","token_max_ttl":"8h","secret_id_ttl":"0","secret_id_num_uses":0,"bind_secret_id":true}' \ - --header="X-Vault-Token: $ROOT_TOKEN" \ - --header='Content-Type: application/json' \ - "$VAULT_ADDR/v1/auth/approle/role/llm-orchestration-service" >/dev/null - + # Create the three AppRoles (periodic tokens - see upsert_approle). + ensure_approles + # Ensure credentials directory exists mkdir -p /agent/credentials @@ -337,16 +340,19 @@ else # Get root token ROOT_TOKEN=$(grep -o '"root_token":"[^"]*"' "$UNSEAL_KEYS_FILE" | cut -d':' -f2 | tr -d '"') export VAULT_TOKEN="$ROOT_TOKEN" - + + # Re-apply AppRole definitions so config changes (e.g. periodic tokens) + # take effect on redeploy without re-initializing Vault. Idempotent and + # does not invalidate existing secret_ids. + ensure_approles + # Ensure credentials directory exists mkdir -p /agent/credentials # Reconcile secret_ids: reuse the existing one if it still authenticates, - # mint a new one only if it is invalid or missing. This keeps a single - # long-lived secret_id stable across normal restarts (secret_id_ttl=0, - # secret_id_num_uses=0), instead of rotating it every boot. - # ensure_role_id (called inside reconcile_secret_id) guarantees the role_id - # file exists before validation, since validation needs both. + # mint a new one only if invalid or missing - keeps one stable secret_id + # across restarts instead of rotating every boot. reconcile_secret_id also + # ensures the role_id file exists first (validation needs both). reconcile_secret_id "gui-service" /agent/credentials/gui_role_id /agent/credentials/gui_secret_id reconcile_secret_id "cron-manager-service" /agent/credentials/cron_role_id /agent/credentials/cron_secret_id reconcile_secret_id "llm-orchestration-service" /agent/credentials/llm_role_id /agent/credentials/llm_secret_id diff --git a/vault/agents/cron/cron-agent.hcl b/vault/agents/cron/cron-agent.hcl index f2db227e..9454c9b7 100644 --- a/vault/agents/cron/cron-agent.hcl +++ b/vault/agents/cron/cron-agent.hcl @@ -2,7 +2,9 @@ # This agent provides CronManager with access to encryption keys and write access to secrets vault { - address = "http://vault:8200" + # Local testing: use rag-vault, not bare "vault" — that name collides with the + # ckb stack on the shared bykstack network and authenticates the wrong Vault. + address = "http://rag-vault:8200" retry { num_retries = 5 } @@ -42,6 +44,4 @@ listener "tcp" { # API proxy configuration api_proxy { use_auto_auth_token = true - enforce_consistency = "always" - when_inconsistent = "forward" } diff --git a/vault/agents/gui/gui-agent.hcl b/vault/agents/gui/gui-agent.hcl index a28db871..672d6d4d 100644 --- a/vault/agents/gui/gui-agent.hcl +++ b/vault/agents/gui/gui-agent.hcl @@ -2,7 +2,9 @@ # This agent provides GUI with access to public encryption key only vault { - address = "http://vault:8200" + # Local testing: use rag-vault, not bare "vault" — that name collides with the + # ckb stack on the shared bykstack network and authenticates the wrong Vault. + address = "http://rag-vault:8200" retry { num_retries = 5 } @@ -42,6 +44,4 @@ listener "tcp" { # API proxy configuration api_proxy { use_auto_auth_token = true - enforce_consistency = "always" - when_inconsistent = "forward" } diff --git a/vault/agents/llm/agent.hcl b/vault/agents/llm/agent.hcl index d7237be7..1a575260 100644 --- a/vault/agents/llm/agent.hcl +++ b/vault/agents/llm/agent.hcl @@ -1,5 +1,7 @@ vault { - address = "http://vault:8200" + # Local testing: use rag-vault, not bare "vault" — that name collides with the + # ckb stack on the shared bykstack network and authenticates the wrong Vault. + address = "http://rag-vault:8200" retry { num_retries = 5 } @@ -34,6 +36,4 @@ listener "tcp" { api_proxy { use_auto_auth_token = true - enforce_consistency = "always" - when_inconsistent = "forward" } From e78f63286da27728a38c41c01d77a0eab39f9601 Mon Sep 17 00:00:00 2001 From: ruwinirathnamalala Date: Wed, 17 Jun 2026 12:21:50 +0530 Subject: [PATCH 4/8] Removed testmodel index and css --- GUI/src/components/MainNavigation/index.tsx | 8 +- GUI/src/pages/TestModel/TestLLM.scss | 217 ------------------- GUI/src/pages/TestModel/index.tsx | 228 -------------------- 3 files changed, 1 insertion(+), 452 deletions(-) delete mode 100644 GUI/src/pages/TestModel/TestLLM.scss delete mode 100644 GUI/src/pages/TestModel/index.tsx diff --git a/GUI/src/components/MainNavigation/index.tsx b/GUI/src/components/MainNavigation/index.tsx index 265f464c..8ae278ca 100644 --- a/GUI/src/components/MainNavigation/index.tsx +++ b/GUI/src/components/MainNavigation/index.tsx @@ -44,13 +44,7 @@ const MainNavigation: FC = () => { label: t('menu.testLLM'), path: '/test-llm', icon: - }, - // { - // id: 'testProductionLLM', - // label: t('menu.testProductionLLM'), - // path: '/test-production-llm', - // icon: - // } + } ]; const filterItemsByRole = (role: string[], items: MenuItem[]) => { diff --git a/GUI/src/pages/TestModel/TestLLM.scss b/GUI/src/pages/TestModel/TestLLM.scss deleted file mode 100644 index 35bced8d..00000000 --- a/GUI/src/pages/TestModel/TestLLM.scss +++ /dev/null @@ -1,217 +0,0 @@ -// .testModalFormTextArea { -// margin-top: 30px; -// } - -// .mcq-buttons { -// display: flex; -// flex-wrap: wrap; -// gap: 0.75rem; -// margin-top: 1rem; -// } - -// .testModalClassifyButton { -// text-align: right; -// margin-top: 20px; -// } - -// .llm-connection-section { -// width: 50%; -// } - -// .llm-connection-controls { -// display: flex; -// gap: 1rem; -// align-items: center; -// } - -// .inference-results-container { -// max-width: 100%; -// background-color: #d7efff; -// padding: 20px; -// border-radius: 8px; -// margin-top: 20px; - -// .result-item { -// margin-bottom: 15px; - -// strong { -// color: #333; -// } -// } - -// .response-content { -// margin-top: 8px; -// padding: 12px; -// background-color: #f5f5f5; -// border-radius: 4px; -// white-space: pre-wrap; -// line-height: 1.5; -// color: #555; -// } - -// .context-section { -// margin-top: 20px; - -// .context-list { -// display: flex; -// flex-direction: column; -// gap: 12px; -// margin-top: 8px; -// } - -// .context-item { -// padding: 12px; -// background-color: #ffffff; -// border: 1px solid #e0e0e0; -// border-radius: 6px; -// box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1); - -// .context-rank { -// margin-bottom: 8px; -// padding-bottom: 4px; -// border-bottom: 1px solid #f0f0f0; - -// strong { -// color: #2563eb; -// font-size: 0.875rem; -// font-weight: 600; -// } -// } - -// .context-content { -// color: #374151; -// line-height: 1.5; -// font-size: 0.9rem; -// white-space: pre-wrap; -// } -// } -// } -// } - -// .testModalList { -// list-style: disc; -// margin-left: 30px; -// } - -// .mt-20 { -// margin-top: 20px; -// } - -// .classification-results { -// margin-top: 1rem; -// padding: 1rem; -// border: 1px solid #e0e0e0; -// border-radius: 8px; -// background-color: #f9f9f9; - -// h3 { -// margin: 0 0 1rem 0; -// color: #333; -// } - -// h4 { -// margin: 0 0 0.75rem 0; -// color: #555; -// font-size: 1rem; -// } - -// .results-container { -// display: flex; -// flex-direction: column; -// gap: 1.5rem; -// } - -// .top-prediction { -// .prediction-card { -// display: flex; -// justify-content: space-between; -// align-items: center; -// padding: 1rem; -// border-radius: 8px; -// background-color: #e8f5e8; -// border: 2px solid #4caf50; - -// .agency-name { -// font-weight: 600; -// color: #2e7d32; -// font-size: 1.1rem; -// } - -// .confidence-score { -// font-weight: 700; -// color: #2e7d32; -// font-size: 1.2rem; -// } -// } -// } - -// .predictions-list { -// display: flex; -// flex-direction: column; -// gap: 0.75rem; - -// .prediction-item { -// display: flex; -// align-items: center; -// gap: 1rem; -// padding: 0.75rem; -// background-color: white; -// border-radius: 6px; -// border: 1px solid #ddd; - -// &.highest { -// border-color: #4caf50; -// background-color: #f8fff8; -// } - -// .rank { -// font-weight: 600; -// color: #666; -// min-width: 2rem; -// } - -// .agency-info { -// flex: 1; -// display: flex; -// flex-direction: column; -// gap: 0.25rem; - -// .agency-name { -// font-weight: 500; -// color: #333; -// } - -// .confidence-bar-container { -// width: 100%; -// height: 4px; -// background-color: #e0e0e0; -// border-radius: 2px; -// overflow: hidden; - -// .confidence-bar { -// height: 100%; -// background-color: #4caf50; -// transition: width 0.3s ease; -// } -// } -// } - -// .confidence-percentage { -// font-weight: 600; -// color: #555; -// min-width: 4rem; -// text-align: right; -// } -// } -// } -// } - -// .classification-error { -// margin-top: 1rem; -// padding: 1rem; -// background-color: #ffebee; -// border: 1px solid #f44336; -// border-radius: 6px; -// color: #c62828; -// text-align: center; -// } \ No newline at end of file diff --git a/GUI/src/pages/TestModel/index.tsx b/GUI/src/pages/TestModel/index.tsx deleted file mode 100644 index 2fc116bd..00000000 --- a/GUI/src/pages/TestModel/index.tsx +++ /dev/null @@ -1,228 +0,0 @@ -import { useMutation, useQuery } from '@tanstack/react-query'; -import { Button, FormSelect, FormTextarea, Collapsible } from 'components'; -import CircularSpinner from 'components/molecules/CircularSpinner/CircularSpinner'; -import { ComponentPropsWithoutRef, FC, useState } from 'react'; -import { useTranslation } from 'react-i18next'; -import ReactMarkdown from 'react-markdown'; -import remarkGfm from 'remark-gfm'; -import './TestLLM.scss'; -import { useDialog } from 'hooks/useDialog'; -import { fetchLLMConnectionsPaginated, LegacyLLMConnectionFilters } from 'services/llmConnections'; -import { viewInferenceResult, InferenceRequest, InferenceResponse, ChoiceButton } from 'services/inference'; -import { llmConnectionsQueryKeys } from 'utils/queryKeys'; -import { ButtonAppearanceTypes } from 'enums/commonEnums'; - -const TestLLM: FC = () => { - const { t } = useTranslation(); - const { open: openDialog, close: closeDialog } = useDialog(); - const [inferenceResult, setInferenceResult] = useState(null); - const [pendingButtons, setPendingButtons] = useState([]); - const [testLLM, setTestLLM] = useState({ - connectionId: null, - text: '', - }); - - // Sort context by rank - const sortedContext = inferenceResult?.chunks?.toSorted((a, b) => a.rank - b.rank) ?? []; - - // Fetch LLM connections for dropdown - using the working legacy endpoint for now - const { data: connections, isLoading: isLoadingConnections } = useQuery({ - queryKey: llmConnectionsQueryKeys.list({ - page: 1, - pageSize: 100, // Get all connections for dropdown - sorting: 'created_at desc', - }), - queryFn: () => fetchLLMConnectionsPaginated({ - pageNumber: 1, - pageSize: 100, - sortBy: 'created_at desc', - }), - }); - - // Transform connections data for dropdown - const connectionOptions = connections?.map((connection: any) => ({ - label: `${connection.llmPlatform} - ${connection.llmModel} (${connection.environment})`, - value: connection.id, - })) || []; - - // Inference mutation - const inferenceMutation = useMutation({ - mutationFn: (request: InferenceRequest) => viewInferenceResult(request), - onSuccess: (data: InferenceResponse) => { - setInferenceResult(data?.response); - setPendingButtons(data?.response?.buttons ?? []); - }, - onError: (error: any) => { - console.error('Error getting inference result:', error); - openDialog({ - title: t('testModels.inferenceErrorTitle') || 'Inference Error', - content:

{t('testModels.inferenceErrorMessage') || 'Failed to get inference result. Please try again.'}

, - footer: ( - - ), - }); - }, - }); - - const handleSend = () => { - if (testLLM.connectionId && testLLM.text) { - inferenceMutation.mutate({ - llmConnectionId: Number(testLLM.connectionId), - message: testLLM.text, - }); - } - }; - - const handleButtonClick = (payload: string) => { - if (!testLLM.connectionId) return; - setPendingButtons([]); - inferenceMutation.mutate({ - llmConnectionId: Number(testLLM.connectionId), - message: payload, - }); - }; - - const handleChange = (key: string, value: string | number) => { - // Prevent changes while inference is loading - if (inferenceMutation.isLoading) { - return; - } - setTestLLM((prev) => ({ - ...prev, - [key]: value, - })); - }; - - const markdownComponents = { - ol: ({children}: any) => ( -
    - {children} -
- ), - a: (props: ComponentPropsWithoutRef<"a">) => ( - - ), - }; - - return ( -
- {isLoadingConnections ? ( - - ) : ( -
-
-
{t('testModels.title') || 'Test LLM'}
-
-
-

{t('testModels.llmConnectionLabel') || 'LLM Connection'}

-
- - { - handleChange('connectionId', selection?.value as string); - }} - value={testLLM?.connectionId === null ? t('testModels.connectionNotExist') || 'Connection does not exist' : undefined} - defaultValue={testLLM?.connectionId ?? undefined} - disabled={inferenceMutation.isLoading} - /> -
-
- -
-

{t('testModels.classifyTextLabel') || 'Enter text to test'}

- handleChange('text', e.target.value)} - showMaxLength={true} - /> -
-
- -
- - {/* Inference Result */} - - {inferenceResult && !inferenceMutation.isLoading && ( -
-
- Response: -
- - {inferenceResult.content} - -
-
- - {/* MCQ Buttons */} - {pendingButtons.length > 0 && ( -
- {pendingButtons.map((btn) => ( - - ))} -
- )} - - {/* Context Section */} - { - sortedContext && sortedContext?.length > 0 && ( -
- -
- {sortedContext?.map((contextItem, index) => ( -
-
- Rank {contextItem.rank} -
-
- - {contextItem.chunkRetrieved} - -
-
- ))} -
-
-
- ) - } - -
- )} - - {/* Error State */} - {inferenceMutation.isError && ( -
-

{t('testModels.classificationFailed') || 'Inference failed. Please try again.'}

-
- )} -
- )} -
- ); -}; - -export default TestLLM; \ No newline at end of file From cebcabe056b412dbae0708b2798f9e72506096f3 Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Thu, 18 Jun 2026 03:42:50 +0530 Subject: [PATCH 5/8] fixed vector indexer statistics analysis issue --- src/vector_indexer/contextual_processor.py | 14 ++-- src/vector_indexer/error_logger.py | 4 +- src/vector_indexer/main_indexer.py | 80 ++++++++++++++-------- src/vector_indexer/models.py | 8 +++ 4 files changed, 72 insertions(+), 34 deletions(-) diff --git a/src/vector_indexer/contextual_processor.py b/src/vector_indexer/contextual_processor.py index b225cf30..6b21d326 100644 --- a/src/vector_indexer/contextual_processor.py +++ b/src/vector_indexer/contextual_processor.py @@ -41,7 +41,7 @@ def __init__( async def process_document( self, document: ProcessingDocument - ) -> List[ContextualChunk]: + ) -> tuple[List[ContextualChunk], int]: """ Process single document into contextual chunks. @@ -49,7 +49,8 @@ async def process_document( document: Document to process Returns: - List of contextual chunks with embeddings + Tuple of (contextual chunks with embeddings, number of chunks + dropped due to context-generation failure) """ logger.info( f"Processing document {document.document_hash} ({len(document.content)} characters)" @@ -69,11 +70,13 @@ async def process_document( # Step 3: Create contextual chunks (filter out failed context generations) contextual_chunks: List[ContextualChunk] = [] valid_contextual_contents: List[str] = [] + failed_chunks = 0 for i, (base_chunk, context) in enumerate( zip(base_chunks, contexts, strict=True) ): if isinstance(context, Exception): + failed_chunks += 1 self.error_logger.log_context_generation_failure( document.document_hash, i, str(context), self.config.max_retries ) @@ -128,7 +131,7 @@ async def process_document( logger.error( f"No valid chunks created for document {document.document_hash}" ) - return [] + return [], failed_chunks # Step 4: Create embeddings for all valid contextual chunks try: @@ -154,9 +157,10 @@ async def process_document( raise logger.info( - f"Successfully processed document {document.document_hash}: {len(contextual_chunks)} chunks" + f"Successfully processed document {document.document_hash}: " + f"{len(contextual_chunks)} chunks ({failed_chunks} dropped)" ) - return contextual_chunks + return contextual_chunks, failed_chunks except Exception as e: logger.error( diff --git a/src/vector_indexer/error_logger.py b/src/vector_indexer/error_logger.py index 1d11cba1..c62de79c 100644 --- a/src/vector_indexer/error_logger.py +++ b/src/vector_indexer/error_logger.py @@ -158,15 +158,17 @@ def log_processing_stats(self, stats: ProcessingStats) -> None: stats_dict["end_time"] = stats.end_time.isoformat() stats_dict["duration"] = stats.duration stats_dict["success_rate"] = stats.success_rate + stats_dict["chunk_success_rate"] = stats.chunk_success_rate with open(self.config.stats_log_file, "w", encoding="utf-8") as f: json.dump(stats_dict, f, indent=2) logger.info( f"Processing completed - Success rate: {stats.success_rate:.1%}, " + f"Chunk success rate: {stats.chunk_success_rate:.1%}, " f"Duration: {stats.duration}, " f"Processed: {stats.documents_processed}/{stats.total_documents} documents, " - f"Chunks: {stats.total_chunks_processed}" + f"Chunks: {stats.total_chunks_processed} ok / {stats.total_chunks_failed} failed" ) except Exception as e: logger.error(f"Failed to write stats log: {e}") diff --git a/src/vector_indexer/main_indexer.py b/src/vector_indexer/main_indexer.py index 45ce5ff6..bf407682 100644 --- a/src/vector_indexer/main_indexer.py +++ b/src/vector_indexer/main_indexer.py @@ -15,7 +15,7 @@ sys.path.append(str(Path(__file__).parent.parent)) from vector_indexer.config.config_loader import ConfigLoader -from vector_indexer.document_loader import DocumentLoader +from vector_indexer.document_loader import DocumentLoader, DocumentLoadError from vector_indexer.contextual_processor import ContextualProcessor from vector_indexer.qdrant_manager import QdrantManager from vector_indexer.error_logger import ErrorLogger @@ -169,7 +169,7 @@ async def process_all_documents(self) -> ProcessingStats: # Process documents with controlled concurrency semaphore = asyncio.Semaphore(self.config.max_concurrent_documents) - tasks: List[asyncio.Task[tuple[int, str]]] = [] + tasks: List[asyncio.Task[tuple[int, str, int]]] = [] for doc_info in documents: task = asyncio.create_task( @@ -189,6 +189,9 @@ async def process_all_documents(self) -> ProcessingStats: chunks_info: Dict[ str, Dict[str, Any] ] = {} # Track chunk counts for metadata update + # Only documents that processed successfully are marked as + # processed in DVC tracking, so failures are retried next run. + processed_documents: List[DocumentInfo] = [] for i, result in enumerate(results): if isinstance(result, Exception): doc_info = documents[i] @@ -200,16 +203,18 @@ async def process_all_documents(self) -> ProcessingStats: doc_info.document_hash, str(result) ) else: - # Result should be tuple of (chunk_count, content_hash) + # Result should be tuple of (chunk_count, content_hash, failed_chunks) doc_info = documents[i] self.stats.documents_processed += 1 - if isinstance(result, tuple) and len(result) == 2: - chunk_count, content_hash = result + processed_documents.append(doc_info) + if isinstance(result, tuple) and len(result) == 3: + chunk_count, content_hash, failed_chunks = result self.stats.total_chunks_processed += chunk_count + self.stats.total_chunks_failed += failed_chunks # Track chunk count using content_hash (not directory hash) chunks_info[content_hash] = {"chunk_count": chunk_count} logger.info( - f"CHUNK COUNT: Document {doc_info.document_hash[:12]}... (content: {content_hash[:12]}...) -> {chunk_count} chunks" + f"CHUNK COUNT: Document {doc_info.document_hash[:12]}... (content: {content_hash[:12]}...) -> {chunk_count} chunks ({failed_chunks} failed)" ) # Log the complete chunks_info dictionary @@ -227,10 +232,10 @@ async def process_all_documents(self) -> ProcessingStats: # Step 4: Update processed files tracking (even if no new documents processed) if diff_detector: try: - # Update metadata for newly processed files - if documents: + # Update metadata for newly processed files (successful only) + if processed_documents: processed_paths = [ - doc.cleaned_txt_path for doc in documents + doc.cleaned_txt_path for doc in processed_documents ] if processed_paths: logger.debug( @@ -290,7 +295,7 @@ async def _process_single_document( doc_info: DocumentInfo, qdrant_manager: QdrantManager, semaphore: asyncio.Semaphore, - ) -> tuple[int, str]: + ) -> tuple[int, str, int]: """ Process a single document with contextual retrieval. @@ -300,7 +305,9 @@ async def _process_single_document( semaphore: Concurrency control semaphore Returns: - tuple: (chunk_count: int, content_hash: str) or Exception on error + tuple: (chunk_count: int, content_hash: str, failed_chunks: int). + Raises on any failure (including load failure or zero usable chunks), + so the document is counted as failed rather than as success. """ async with semaphore: logger.info(f"Processing document: {doc_info.document_hash}") @@ -310,29 +317,31 @@ async def _process_single_document( document = self.document_loader.load_document(doc_info) if not document: - logger.warning(f"Could not load document: {doc_info.document_hash}") - return (0, doc_info.document_hash) + raise DocumentLoadError( + f"Could not load document: {doc_info.document_hash}" + ) # Process document with contextual retrieval - contextual_chunks = await self.contextual_processor.process_document( - document - ) + ( + contextual_chunks, + failed_chunks, + ) = await self.contextual_processor.process_document(document) if not contextual_chunks: - logger.warning( - f"No chunks created for document: {doc_info.document_hash}" + raise RuntimeError( + f"No chunks created for document: {doc_info.document_hash} " + f"({failed_chunks} chunks failed context generation)" ) - return (0, document.document_hash) # Store chunks in Qdrant await qdrant_manager.store_chunks(contextual_chunks) logger.info( f"Successfully processed document {doc_info.document_hash}: " - f"{len(contextual_chunks)} chunks" + f"{len(contextual_chunks)} chunks ({failed_chunks} dropped)" ) - return (len(contextual_chunks), document.document_hash) + return (len(contextual_chunks), document.document_hash, failed_chunks) except Exception as e: logger.error(f"Error processing document {doc_info.document_hash}: {e}") @@ -352,10 +361,12 @@ def _log_final_summary(self) -> None: logger.info(f" • Failed Chunks: {self.stats.total_chunks_failed}") if self.stats.total_documents > 0: - success_rate = ( - self.stats.documents_processed / self.stats.total_documents - ) * 100 - logger.info(f"Success Rate: {success_rate:.1f}%") + logger.info(f"Success Rate: {self.stats.success_rate * 100:.1f}%") + + if self.stats.total_chunks_processed + self.stats.total_chunks_failed > 0: + logger.info( + f"Chunk Success Rate: {self.stats.chunk_success_rate * 100:.1f}%" + ) logger.info(f"Processing Duration: {self.stats.duration}") @@ -365,6 +376,11 @@ def _log_final_summary(self) -> None: ) logger.info("Check failure logs for details") + if self.stats.total_chunks_failed > 0: + logger.warning( + f" {self.stats.total_chunks_failed} chunks failed processing" + ) + async def run_health_check(self) -> bool: """ Run health check on all components. @@ -617,12 +633,20 @@ async def _execute_cleanup_operations( return total_deleted def _cleanup_datasets(self) -> None: - """Remove datasets folder after processing.""" + """Remove datasets folder contents after processing. + + Only the folder's contents are removed, not the folder itself, since + the datasets path is a mounted volume in the container. + """ try: datasets_path = Path(self.config.dataset_base_path) if datasets_path.exists(): - shutil.rmtree(str(datasets_path)) - logger.info(f"Datasets folder cleaned up: {datasets_path}") + for child in datasets_path.iterdir(): + if child.is_dir(): + shutil.rmtree(str(child)) + else: + child.unlink() + logger.info(f"Datasets folder contents cleaned up: {datasets_path}") else: logger.debug(f"Datasets folder does not exist: {datasets_path}") except Exception as e: diff --git a/src/vector_indexer/models.py b/src/vector_indexer/models.py index 752ea02a..41ae1ce1 100644 --- a/src/vector_indexer/models.py +++ b/src/vector_indexer/models.py @@ -96,6 +96,14 @@ def success_rate(self) -> float: return self.documents_processed / self.total_documents return 0.0 + @property + def chunk_success_rate(self) -> float: + """Calculate chunk success rate (processed vs processed + failed).""" + total_chunks = self.total_chunks_processed + self.total_chunks_failed + if total_chunks > 0: + return self.total_chunks_processed / total_chunks + return 0.0 + class ProcessingError(BaseModel): """Error information for failed processing.""" From 8b3d232f9ff080df6c4db88e57f8518cad05ce72 Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Thu, 18 Jun 2026 09:55:25 +0530 Subject: [PATCH 6/8] fixed vault packages issue --- Dockerfile.vault-init | 10 ++++++++++ docker-compose-ec2.yml | 6 ++++-- docker-compose.yml | 6 ++++-- 3 files changed, 18 insertions(+), 4 deletions(-) create mode 100644 Dockerfile.vault-init diff --git a/Dockerfile.vault-init b/Dockerfile.vault-init new file mode 100644 index 00000000..7743fa6e --- /dev/null +++ b/Dockerfile.vault-init @@ -0,0 +1,10 @@ +FROM hashicorp/vault:1.20.3 + +# Bake the only CLI tools vault-init.sh actually needs (jq + openssl) so container +# startup never depends on the Alpine CDN. Previously these were installed via +# `apk add` on every boot, which failed intermittently on EC2. Retry guards the +# one-time build against a transient mirror hiccup. +RUN for i in 1 2 3; do \ + apk add --no-cache jq openssl && break; \ + echo "apk add failed (attempt $i), retrying..."; sleep 3; \ + done diff --git a/docker-compose-ec2.yml b/docker-compose-ec2.yml index 136c097c..c20acca8 100644 --- a/docker-compose-ec2.yml +++ b/docker-compose-ec2.yml @@ -516,7 +516,10 @@ services: start_period: 10s vault-init: - image: hashicorp/vault:1.20.3 + build: + context: . + dockerfile: Dockerfile.vault-init + image: rag-vault-init:1.20.3 container_name: vault-init user: "0" depends_on: @@ -538,7 +541,6 @@ services: command: - -c - | - apk add --no-cache curl jq uuidgen openssl # Create and set permissions for all agent directories mkdir -p /agent/credentials /agent/gui-token /agent/cron-token /agent/llm-token /agent/out chown -R vault:vault /agent/credentials /agent/gui-token /agent/cron-token /agent/llm-token /agent/out diff --git a/docker-compose.yml b/docker-compose.yml index 3e6cfba2..29f8139d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -463,7 +463,10 @@ services: start_period: 10s vault-init: - image: hashicorp/vault:1.20.3 + build: + context: . + dockerfile: Dockerfile.vault-init + image: rag-vault-init:1.20.3 container_name: vault-init user: "0" depends_on: @@ -485,7 +488,6 @@ services: command: - -c - | - apk add --no-cache curl jq uuidgen openssl # Create and set permissions for all agent directories mkdir -p /agent/credentials /agent/gui-token /agent/cron-token /agent/llm-token /agent/out chown -R vault:vault /agent/credentials /agent/gui-token /agent/cron-token /agent/llm-token /agent/out From 5cc9c82a2fe1e5e708ab4343cbe92fd4c679e5d9 Mon Sep 17 00:00:00 2001 From: ruwinirathnamalala Date: Thu, 18 Jun 2026 12:32:57 +0530 Subject: [PATCH 7/8] Issue fix for test llm connections displaying production llm --- .../get-all-llm-connections-paginated.sql | 45 ++++++++++ .../POST/get-llm-connections-paginated.sql | 2 +- .../rag-search/GET/llm-connections/all.yml | 84 +++++++++++++++++++ GUI/src/pages/TestProductionLLM/index.tsx | 4 +- GUI/src/services/llmConnections.ts | 16 ++++ GUI/src/utils/endpoints.ts | 1 + 6 files changed, 149 insertions(+), 3 deletions(-) create mode 100644 DSL/Resql/rag-search/POST/get-all-llm-connections-paginated.sql create mode 100644 DSL/Ruuter.private/rag-search/GET/llm-connections/all.yml diff --git a/DSL/Resql/rag-search/POST/get-all-llm-connections-paginated.sql b/DSL/Resql/rag-search/POST/get-all-llm-connections-paginated.sql new file mode 100644 index 00000000..cb1e2394 --- /dev/null +++ b/DSL/Resql/rag-search/POST/get-all-llm-connections-paginated.sql @@ -0,0 +1,45 @@ +SELECT + id, + vault_uuid, + connection_name, + llm_platform, + llm_model, + embedding_platform, + embedding_model, + monthly_budget, + warn_budget_threshold, + stop_budget_threshold, + disconnect_on_budget_exceed, + used_budget, + environment, + connection_status, + created_at, + CEIL(COUNT(*) OVER() / :page_size::DECIMAL) AS totalPages, + CASE + WHEN used_budget IS NULL OR used_budget = 0 OR (used_budget::DECIMAL / monthly_budget::DECIMAL) < (warn_budget_threshold::DECIMAL / 100.0) THEN 'within_budget' + WHEN stop_budget_threshold != 0 AND (used_budget::DECIMAL / monthly_budget::DECIMAL) >= (stop_budget_threshold::DECIMAL / 100.0) THEN 'over_budget' + WHEN stop_budget_threshold = 0 AND (used_budget::DECIMAL / monthly_budget::DECIMAL) >= 1 THEN 'over_budget' + WHEN (used_budget::DECIMAL / monthly_budget::DECIMAL) >= (warn_budget_threshold::DECIMAL / 100.0) THEN 'close_to_exceed' + ELSE 'within_budget' + END AS budget_status +FROM rag_search.llm_connections +WHERE connection_status <> 'deleted' + -- AND environment = 'testing' + AND (:llm_platform IS NULL OR :llm_platform = '' OR llm_platform = :llm_platform) + AND (:llm_model IS NULL OR :llm_model = '' OR llm_model = :llm_model) + AND (:environment IS NULL OR :environment = '' OR environment = :environment) +ORDER BY + CASE WHEN :sorting = 'connection_name asc' THEN connection_name END ASC, + CASE WHEN :sorting = 'connection_name desc' THEN connection_name END DESC, + CASE WHEN :sorting = 'llm_platform asc' THEN llm_platform END ASC, + CASE WHEN :sorting = 'llm_platform desc' THEN llm_platform END DESC, + CASE WHEN :sorting = 'llm_model asc' THEN llm_model END ASC, + CASE WHEN :sorting = 'llm_model desc' THEN llm_model END DESC, + CASE WHEN :sorting = 'monthly_budget asc' THEN monthly_budget END ASC, + CASE WHEN :sorting = 'monthly_budget desc' THEN monthly_budget END DESC, + CASE WHEN :sorting = 'environment asc' THEN environment END ASC, + CASE WHEN :sorting = 'environment desc' THEN environment END DESC, + CASE WHEN :sorting = 'created_at asc' THEN created_at END ASC, + CASE WHEN :sorting = 'created_at desc' THEN created_at END DESC, + created_at DESC -- Default fallback sorting +OFFSET ((GREATEST(:page, 1) - 1) * :page_size) LIMIT :page_size; \ No newline at end of file diff --git a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql index d4c15efb..922c16ec 100644 --- a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql +++ b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql @@ -24,7 +24,7 @@ SELECT END AS budget_status FROM rag_search.llm_connections WHERE connection_status <> 'deleted' - -- AND environment = 'testing' + AND environment = 'testing' AND (:llm_platform IS NULL OR :llm_platform = '' OR llm_platform = :llm_platform) AND (:llm_model IS NULL OR :llm_model = '' OR llm_model = :llm_model) AND (:environment IS NULL OR :environment = '' OR environment = :environment) diff --git a/DSL/Ruuter.private/rag-search/GET/llm-connections/all.yml b/DSL/Ruuter.private/rag-search/GET/llm-connections/all.yml new file mode 100644 index 00000000..3b69aebd --- /dev/null +++ b/DSL/Ruuter.private/rag-search/GET/llm-connections/all.yml @@ -0,0 +1,84 @@ +declaration: + call: declare + version: 0.1 + description: "Get paginated list of LLM connections" + method: get + accepts: json + returns: json + namespace: rag-search + allowlist: + params: + - field: pageNumber + type: number + description: "Page number (1-based)" + - field: pageSize + type: number + description: "Number of items per page" + - field: sortBy + type: string + description: "Field to sort by (e.g. 'llm_platform', 'created_at')" + - field: sortOrder + type: string + description: "Sort order: 'asc' or 'desc'" + - field: llmPlatform + type: string + description: "Filter by LLM platform" + - field: llmModel + type: string + description: "Filter by LLM model" + - field: environment + type: string + description: "Filter by deployment environment" + +extract_request_data: + assign: + pageNumber: ${Number(incoming.params.pageNumber) ?? 1} + pageSize: ${Number(incoming.params.pageSize) ?? 10} + sortBy: ${incoming.params.sortBy ?? "created_at"} + sortOrder: ${incoming.params.sortOrder ?? "desc"} + sorting: ${sortBy + " " + sortOrder} + llmPlatform: ${incoming.params.llmPlatform ?? ""} + llmModel: ${incoming.params.llmModel ?? ""} + environment: ${incoming.params.environment ?? ""} + next: validate_page_params + +validate_page_params: + switch: + - condition: ${pageNumber < 1} + next: return_invalid_page + - condition: ${pageSize < 1 || pageSize > 100} + next: return_invalid_page_size + next: get_llm_connections + +get_llm_connections: + call: http.post + args: + url: "[#RAG_SEARCH_RESQL]/get-all-llm-connections-paginated" + body: + page: ${pageNumber} + page_size: ${pageSize} + sorting: ${sorting} + llm_platform: ${llmPlatform} + llm_model: ${llmModel} + environment: ${environment} + result: connections_result + next: transform_response + +transform_response: + assign: + response_data: ${connections_result.response.body} + next: return_success + +return_success: + return: ${response_data} + next: end + +return_invalid_page: + status: 400 + return: "Page number must be greater than 0" + next: end + +return_invalid_page_size: + status: 400 + return: "Page size must be between 1 and 100" + next: end \ No newline at end of file diff --git a/GUI/src/pages/TestProductionLLM/index.tsx b/GUI/src/pages/TestProductionLLM/index.tsx index b9ba6be7..5f22d714 100644 --- a/GUI/src/pages/TestProductionLLM/index.tsx +++ b/GUI/src/pages/TestProductionLLM/index.tsx @@ -8,7 +8,7 @@ import { ChoiceButton } from 'services/inference'; import './TestProductionLLM.scss'; import MessageContent from 'components/MessageContent'; import { llmConnectionsQueryKeys } from 'utils/queryKeys'; -import { fetchLLMConnectionsPaginated } from 'services/llmConnections'; +import { fetchAllLLMConnectionsPaginated } from 'services/llmConnections'; interface Message { @@ -45,7 +45,7 @@ const TestProductionLLM: FC = () => { pageSize: 100, // Get all connections for dropdown sorting: 'created_at desc', }), - queryFn: () => fetchLLMConnectionsPaginated({ + queryFn: () => fetchAllLLMConnectionsPaginated({ pageNumber: 1, pageSize: 100, sortBy: 'created_at desc', diff --git a/GUI/src/services/llmConnections.ts b/GUI/src/services/llmConnections.ts index 647addfc..cd07324e 100644 --- a/GUI/src/services/llmConnections.ts +++ b/GUI/src/services/llmConnections.ts @@ -378,3 +378,19 @@ export async function updateLLMConnectionStatus( }); return data?.response; } + +export async function fetchAllLLMConnectionsPaginated(filters: LLMConnectionFilters): Promise { + const queryParams = new URLSearchParams(); + + if (filters.pageNumber) queryParams.append('pageNumber', filters.pageNumber.toString()); + if (filters.pageSize) queryParams.append('pageSize', filters.pageSize.toString()); + if (filters.sortBy) queryParams.append('sortBy', filters.sortBy); + if (filters.sortOrder) queryParams.append('sortOrder', filters.sortOrder); + if (filters.llmPlatform) queryParams.append('llmPlatform', filters.llmPlatform); + if (filters.llmModel) queryParams.append('llmModel', filters.llmModel); + if (filters.environment) queryParams.append('environment', filters.environment); + + const url = `${llmConnectionsEndpoints.FETCH_ALL_LLM_CONNECTIONS_PAGINATED()}?${queryParams.toString()}`; + const { data } = await apiDev.get(url); + return data?.response; +} \ No newline at end of file diff --git a/GUI/src/utils/endpoints.ts b/GUI/src/utils/endpoints.ts index 386db296..30624914 100644 --- a/GUI/src/utils/endpoints.ts +++ b/GUI/src/utils/endpoints.ts @@ -15,6 +15,7 @@ export const authEndpoints = { export const llmConnectionsEndpoints = { FETCH_LLM_CONNECTIONS_PAGINATED: (): string => `/rag-search/llm-connections/list`, + FETCH_ALL_LLM_CONNECTIONS_PAGINATED: (): string => `/rag-search/llm-connections/all`, GET_LLM_CONNECTION: (): string => `/rag-search/llm-connections/get`, GET_PRODUCTION_CONNECTION: (): string => `/rag-search/llm-connections/production`, CREATE_LLM_CONNECTION: (): string => `/rag-search/llm-connections/add`, From 2ab4c5101c5b3713d876c21d41ed1e1632a267b0 Mon Sep 17 00:00:00 2001 From: ruwinirathnamalala Date: Thu, 18 Jun 2026 13:53:19 +0530 Subject: [PATCH 8/8] Issue fix for test llm connections displaying production llm --- DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql index d4c15efb..922c16ec 100644 --- a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql +++ b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql @@ -24,7 +24,7 @@ SELECT END AS budget_status FROM rag_search.llm_connections WHERE connection_status <> 'deleted' - -- AND environment = 'testing' + AND environment = 'testing' AND (:llm_platform IS NULL OR :llm_platform = '' OR llm_platform = :llm_platform) AND (:llm_model IS NULL OR :llm_model = '' OR llm_model = :llm_model) AND (:environment IS NULL OR :environment = '' OR environment = :environment)