From f53aa35819dc6055f2e7442c85f24fcbdbb92811 Mon Sep 17 00:00:00 2001
From: ruwinirathnamalala <ruwini.rathnamalala@rootcodelabs.com>
Date: Mon, 15 Jun 2026 17:54:44 +0530
Subject: [PATCH 1/8] Consolidate Test LLM screens into a single screen with
 LLM connection selector

---
 .../POST/get-llm-connections-paginated.sql    |   2 +-
 GUI/src/App.tsx                               |   3 +-
 GUI/src/components/MainNavigation/index.tsx   |  12 +-
 GUI/src/pages/TestModel/TestLLM.scss          | 428 +++++++++---------
 .../TestProductionLLM/TestProductionLLM.scss  |  15 +
 GUI/src/pages/TestProductionLLM/index.tsx     |  65 ++-
 6 files changed, 300 insertions(+), 225 deletions(-)
diff --git a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql
index 922c16ec..d4c15efb 100644
--- a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql
+++ b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql
@@ -24,7 +24,7 @@ SELECT
     END AS budget_status
 FROM rag_search.llm_connections
 WHERE connection_status <> 'deleted'
-    AND environment = 'testing'
+    -- AND environment = 'testing'
     AND (:llm_platform IS NULL OR :llm_platform = '' OR llm_platform = :llm_platform)
     AND (:llm_model IS NULL OR :llm_model = '' OR llm_model = :llm_model)
     AND (:environment IS NULL OR :environment = '' OR environment = :environment)
diff --git a/GUI/src/App.tsx b/GUI/src/App.tsx
index 5839b180..43c50753 100644
--- a/GUI/src/App.tsx
+++ b/GUI/src/App.tsx
@@ -64,8 +64,7 @@ const App: FC = () => {
             <Route path="/create-llm-connection" element={<CreateLLMConnection />} />
             <Route path="/view-llm-connection" element={<ViewLLMConnection />} />
             <Route path="/prompt-configurations" element={<PromptConfigurations />} />
-            <Route path="/test-llm" element={<TestLLM />} />
-            <Route path="/test-production-llm" element={<TestProductionLLM />} /> 
+            <Route path="/test-llm" element={<TestProductionLLM />} /> 
 
 
             </Route>
diff --git a/GUI/src/components/MainNavigation/index.tsx b/GUI/src/components/MainNavigation/index.tsx
index 070c4b9a..265f464c 100644
--- a/GUI/src/components/MainNavigation/index.tsx
+++ b/GUI/src/components/MainNavigation/index.tsx
@@ -45,12 +45,12 @@ const MainNavigation: FC = () => {
       path: '/test-llm',
       icon: <MdSearch />
     },
-    {
-      id: 'testProductionLLM',
-      label: t('menu.testProductionLLM'),
-      path: '/test-production-llm',
-      icon: <MdSearch />
-    }
+    // {
+    //   id: 'testProductionLLM',
+    //   label: t('menu.testProductionLLM'),
+    //   path: '/test-production-llm',
+    //   icon: <MdSearch />
+    // }
   ];
 
   const filterItemsByRole = (role: string[], items: MenuItem[]) => {
diff --git a/GUI/src/pages/TestModel/TestLLM.scss b/GUI/src/pages/TestModel/TestLLM.scss
index 3d0c2156..35bced8d 100644
--- a/GUI/src/pages/TestModel/TestLLM.scss
+++ b/GUI/src/pages/TestModel/TestLLM.scss
@@ -1,217 +1,217 @@
-.testModalFormTextArea {
-  margin-top: 30px;
-}
-
-.mcq-buttons {
-  display: flex;
-  flex-wrap: wrap;
-  gap: 0.75rem;
-  margin-top: 1rem;
-}
-
-.testModalClassifyButton {
-  text-align: right;
-  margin-top: 20px;
-}
-
-.llm-connection-section {
-  width: 50%;
-}
-
-.llm-connection-controls {
-  display: flex;
-  gap: 1rem;
-  align-items: center;
-}
-
-.inference-results-container {
-  max-width: 100%;
-  background-color: #d7efff;
-  padding: 20px;
-  border-radius: 8px;
-  margin-top: 20px;
+// .testModalFormTextArea {
+//   margin-top: 30px;
+// }
+
+// .mcq-buttons {
+//   display: flex;
+//   flex-wrap: wrap;
+//   gap: 0.75rem;
+//   margin-top: 1rem;
+// }
+
+// .testModalClassifyButton {
+//   text-align: right;
+//   margin-top: 20px;
+// }
+
+// .llm-connection-section {
+//   width: 50%;
+// }
+
+// .llm-connection-controls {
+//   display: flex;
+//   gap: 1rem;
+//   align-items: center;
+// }
+
+// .inference-results-container {
+//   max-width: 100%;
+//   background-color: #d7efff;
+//   padding: 20px;
+//   border-radius: 8px;
+//   margin-top: 20px;
   
-  .result-item {
-    margin-bottom: 15px;
+//   .result-item {
+//     margin-bottom: 15px;
     
-    strong {
-      color: #333;
-    }
-  }
+//     strong {
+//       color: #333;
+//     }
+//   }
   
-  .response-content {
-    margin-top: 8px;
-    padding: 12px;
-    background-color: #f5f5f5;
-    border-radius: 4px;
-    white-space: pre-wrap;
-    line-height: 1.5;
-    color: #555;
-  }
-
-  .context-section {
-    margin-top: 20px;
-
-    .context-list {
-      display: flex;
-      flex-direction: column;
-      gap: 12px;
-      margin-top: 8px;
-    }
-
-    .context-item {
-      padding: 12px;
-      background-color: #ffffff;
-      border: 1px solid #e0e0e0;
-      border-radius: 6px;
-      box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
-
-      .context-rank {
-        margin-bottom: 8px;
-        padding-bottom: 4px;
-        border-bottom: 1px solid #f0f0f0;
-
-        strong {
-          color: #2563eb;
-          font-size: 0.875rem;
-          font-weight: 600;
-        }
-      }
-
-      .context-content {
-        color: #374151;
-        line-height: 1.5;
-        font-size: 0.9rem;
-        white-space: pre-wrap;
-      }
-    }
-  }
-}
-
-.testModalList {
-  list-style: disc;
-  margin-left: 30px;
-}
-
-.mt-20 {
-  margin-top: 20px;
-}
-
-.classification-results {
-  margin-top: 1rem;
-  padding: 1rem;
-  border: 1px solid #e0e0e0;
-  border-radius: 8px;
-  background-color: #f9f9f9;
-
-  h3 {
-    margin: 0 0 1rem 0;
-    color: #333;
-  }
-
-  h4 {
-    margin: 0 0 0.75rem 0;
-    color: #555;
-    font-size: 1rem;
-  }
-
-  .results-container {
-    display: flex;
-    flex-direction: column;
-    gap: 1.5rem;
-  }
-
-  .top-prediction {
-    .prediction-card {
-      display: flex;
-      justify-content: space-between;
-      align-items: center;
-      padding: 1rem;
-      border-radius: 8px;
-      background-color: #e8f5e8;
-      border: 2px solid #4caf50;
-
-      .agency-name {
-        font-weight: 600;
-        color: #2e7d32;
-        font-size: 1.1rem;
-      }
-
-      .confidence-score {
-        font-weight: 700;
-        color: #2e7d32;
-        font-size: 1.2rem;
-      }
-    }
-  }
-
-  .predictions-list {
-    display: flex;
-    flex-direction: column;
-    gap: 0.75rem;
-
-    .prediction-item {
-      display: flex;
-      align-items: center;
-      gap: 1rem;
-      padding: 0.75rem;
-      background-color: white;
-      border-radius: 6px;
-      border: 1px solid #ddd;
-
-      &.highest {
-        border-color: #4caf50;
-        background-color: #f8fff8;
-      }
-
-      .rank {
-        font-weight: 600;
-        color: #666;
-        min-width: 2rem;
-      }
-
-      .agency-info {
-        flex: 1;
-        display: flex;
-        flex-direction: column;
-        gap: 0.25rem;
-
-        .agency-name {
-          font-weight: 500;
-          color: #333;
-        }
-
-        .confidence-bar-container {
-          width: 100%;
-          height: 4px;
-          background-color: #e0e0e0;
-          border-radius: 2px;
-          overflow: hidden;
-
-          .confidence-bar {
-            height: 100%;
-            background-color: #4caf50;
-            transition: width 0.3s ease;
-          }
-        }
-      }
-
-      .confidence-percentage {
-        font-weight: 600;
-        color: #555;
-        min-width: 4rem;
-        text-align: right;
-      }
-    }
-  }
-}
-
-.classification-error {
-  margin-top: 1rem;
-  padding: 1rem;
-  background-color: #ffebee;
-  border: 1px solid #f44336;
-  border-radius: 6px;
-  color: #c62828;
-  text-align: center;
-}
\ No newline at end of file
+//   .response-content {
+//     margin-top: 8px;
+//     padding: 12px;
+//     background-color: #f5f5f5;
+//     border-radius: 4px;
+//     white-space: pre-wrap;
+//     line-height: 1.5;
+//     color: #555;
+//   }
+
+//   .context-section {
+//     margin-top: 20px;
+
+//     .context-list {
+//       display: flex;
+//       flex-direction: column;
+//       gap: 12px;
+//       margin-top: 8px;
+//     }
+
+//     .context-item {
+//       padding: 12px;
+//       background-color: #ffffff;
+//       border: 1px solid #e0e0e0;
+//       border-radius: 6px;
+//       box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
+
+//       .context-rank {
+//         margin-bottom: 8px;
+//         padding-bottom: 4px;
+//         border-bottom: 1px solid #f0f0f0;
+
+//         strong {
+//           color: #2563eb;
+//           font-size: 0.875rem;
+//           font-weight: 600;
+//         }
+//       }
+
+//       .context-content {
+//         color: #374151;
+//         line-height: 1.5;
+//         font-size: 0.9rem;
+//         white-space: pre-wrap;
+//       }
+//     }
+//   }
+// }
+
+// .testModalList {
+//   list-style: disc;
+//   margin-left: 30px;
+// }
+
+// .mt-20 {
+//   margin-top: 20px;
+// }
+
+// .classification-results {
+//   margin-top: 1rem;
+//   padding: 1rem;
+//   border: 1px solid #e0e0e0;
+//   border-radius: 8px;
+//   background-color: #f9f9f9;
+
+//   h3 {
+//     margin: 0 0 1rem 0;
+//     color: #333;
+//   }
+
+//   h4 {
+//     margin: 0 0 0.75rem 0;
+//     color: #555;
+//     font-size: 1rem;
+//   }
+
+//   .results-container {
+//     display: flex;
+//     flex-direction: column;
+//     gap: 1.5rem;
+//   }
+
+//   .top-prediction {
+//     .prediction-card {
+//       display: flex;
+//       justify-content: space-between;
+//       align-items: center;
+//       padding: 1rem;
+//       border-radius: 8px;
+//       background-color: #e8f5e8;
+//       border: 2px solid #4caf50;
+
+//       .agency-name {
+//         font-weight: 600;
+//         color: #2e7d32;
+//         font-size: 1.1rem;
+//       }
+
+//       .confidence-score {
+//         font-weight: 700;
+//         color: #2e7d32;
+//         font-size: 1.2rem;
+//       }
+//     }
+//   }
+
+//   .predictions-list {
+//     display: flex;
+//     flex-direction: column;
+//     gap: 0.75rem;
+
+//     .prediction-item {
+//       display: flex;
+//       align-items: center;
+//       gap: 1rem;
+//       padding: 0.75rem;
+//       background-color: white;
+//       border-radius: 6px;
+//       border: 1px solid #ddd;
+
+//       &.highest {
+//         border-color: #4caf50;
+//         background-color: #f8fff8;
+//       }
+
+//       .rank {
+//         font-weight: 600;
+//         color: #666;
+//         min-width: 2rem;
+//       }
+
+//       .agency-info {
+//         flex: 1;
+//         display: flex;
+//         flex-direction: column;
+//         gap: 0.25rem;
+
+//         .agency-name {
+//           font-weight: 500;
+//           color: #333;
+//         }
+
+//         .confidence-bar-container {
+//           width: 100%;
+//           height: 4px;
+//           background-color: #e0e0e0;
+//           border-radius: 2px;
+//           overflow: hidden;
+
+//           .confidence-bar {
+//             height: 100%;
+//             background-color: #4caf50;
+//             transition: width 0.3s ease;
+//           }
+//         }
+//       }
+
+//       .confidence-percentage {
+//         font-weight: 600;
+//         color: #555;
+//         min-width: 4rem;
+//         text-align: right;
+//       }
+//     }
+//   }
+// }
+
+// .classification-error {
+//   margin-top: 1rem;
+//   padding: 1rem;
+//   background-color: #ffebee;
+//   border: 1px solid #f44336;
+//   border-radius: 6px;
+//   color: #c62828;
+//   text-align: center;
+// }
\ No newline at end of file
diff --git a/GUI/src/pages/TestProductionLLM/TestProductionLLM.scss b/GUI/src/pages/TestProductionLLM/TestProductionLLM.scss
index 9cb5c00c..2fa456fb 100644
--- a/GUI/src/pages/TestProductionLLM/TestProductionLLM.scss
+++ b/GUI/src/pages/TestProductionLLM/TestProductionLLM.scss
@@ -3,6 +3,21 @@
   margin: 0 auto;
   padding: 2rem;
 
+ .llm-connection-section {
+    width: 50%;
+    margin-bottom: 1.5rem;
+    p {
+      margin-bottom: 0.5rem;
+      font-weight: 500;
+      color: #333;
+    }
+  }
+  .llm-connection-controls {
+    display: flex;
+    gap: 1rem;
+    align-items: center;
+  }
+  
 .mcq-buttons {
   display: flex;
   flex-wrap: wrap;
diff --git a/GUI/src/pages/TestProductionLLM/index.tsx b/GUI/src/pages/TestProductionLLM/index.tsx
index f29cfcf9..b9ba6be7 100644
--- a/GUI/src/pages/TestProductionLLM/index.tsx
+++ b/GUI/src/pages/TestProductionLLM/index.tsx
@@ -1,11 +1,16 @@
 import { FC, useState, useRef, useEffect, useMemo } from 'react';
+import { useQuery } from '@tanstack/react-query';
 import { useTranslation } from 'react-i18next';
-import { Button, FormTextarea } from 'components';
+import { Button, FormTextarea, FormSelect } from 'components';
 import { useToast } from 'hooks/useToast';
 import { useStreamingResponse } from 'hooks/useStreamingResponse';
 import { ChoiceButton } from 'services/inference';
 import './TestProductionLLM.scss';
 import MessageContent from 'components/MessageContent';
+import { llmConnectionsQueryKeys } from 'utils/queryKeys';
+import { fetchLLMConnectionsPaginated } from 'services/llmConnections';
+
+
 interface Message {
   id: string;
   content: string;
@@ -23,11 +28,49 @@ const TestProductionLLM: FC = () => {
   const [messages, setMessages] = useState<Message[]>([]);
   const [isLoading, setIsLoading] = useState<boolean>(false);
   const messagesEndRef = useRef<HTMLDivElement>(null);
+  const [testLLM, setTestLLM] = useState({
+    connectionId: null,
+    text: '',
+  });
 
   // Generate a unique channel ID for this session
   const channelId = useMemo(() => `channel-${Math.random().toString(36).substring(2, 15)}`, []);
   const { startStreaming, stopStreaming, isStreaming } = useStreamingResponse(channelId);
+  const [selectedConnectionId, setSelectedConnectionId] = useState<string | null>(null);
+
+   // Fetch LLM connections for dropdown - using the working legacy endpoint for now
+    const { data: connections, isLoading: isLoadingConnections } = useQuery({
+      queryKey: llmConnectionsQueryKeys.list({
+        page: 1,
+        pageSize: 100, // Get all connections for dropdown
+        sorting: 'created_at desc',
+      }),
+      queryFn: () => fetchLLMConnectionsPaginated({
+        pageNumber: 1,
+        pageSize: 100,
+        sortBy: 'created_at desc',
+      }),
+    });
+    // Transform connections data for dropdown
+  const connectionOptions = useMemo(
+    () =>
+      connections?.map((connection: any) => ({
+        label: `${connection.llmPlatform} - ${connection.llmModel} (${connection.environment})`,
+        value: String(connection.id),
+      })) || [],
+    [connections]
+  );
 
+    const selectedConnection = useMemo(() => {
+    return connections?.find((conn: any) => String(conn.id) === selectedConnectionId) || null;
+  }, [connections, selectedConnectionId]);
+
+  const handleConnectionChange = (value: string | number) => {
+    console.log('Selected connection ID:', value);
+    if (isLoading || isStreaming) return;
+    setSelectedConnectionId(value ? String(value) : null);
+  };
+  
   // Auto-scroll to bottom
   useEffect(() => {
     messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
@@ -82,6 +125,8 @@ const TestProductionLLM: FC = () => {
       authorId: 'test-user-456',
       conversationHistory,
       url: 'opensearch-dashboard-test',
+      environment: selectedConnection?.environment || 'production',
+      connection_id: selectedConnection?.vaultUuid || undefined,
     };
 
     // Callbacks for streaming
@@ -257,11 +302,27 @@ const TestProductionLLM: FC = () => {
     <div>
       <div className="test-production-llm">
         <div className="test-production-llm__header">
-          <h1>{t('testProductionLLM.title')}</h1>
+          <h1>{t('testModels.title')}</h1>
           <Button onClick={clearChat} appearance="secondary">
             {t('testProductionLLM.clearChat')}
           </Button>
         </div>
+           <div className="llm-connection-section">
+            <p>{t('testModels.llmConnectionLabel') || 'LLM Connection'}</p>
+            <div className="llm-connection-controls">
+              <FormSelect
+                label=""
+                name="connectionId"
+                options={connectionOptions}
+                placeholder={t('testModels.selectConnectionPlaceholder') || 'Select LLM Connection'}
+                onSelectionChange={(selection) => {
+                  handleConnectionChange(selection?.value as string);
+                }}
+                defaultValue={selectedConnectionId ?? undefined}
+                disabled={isLoading || isStreaming}
+              />
+            </div>
+          </div>
 
         <div className="test-production-llm__chat-container">
           <div className="test-production-llm__messages">

From 9d954c4b2cc80fb4b1b1b82b4a9d6589708cba84 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Tue, 16 Jun 2026 12:14:57 +0530
Subject: [PATCH 2/8] fixed vault configuration issue

---
 docs/VAULT_SECURITY_ARCHITECTURE.md |  62 +++++++------
 vault-init.sh                       | 131 ++++++++++++++++------------
 vault/config/vault.hcl              |  44 +++++-----
 3 files changed, 130 insertions(+), 107 deletions(-)

diff --git a/docs/VAULT_SECURITY_ARCHITECTURE.md b/docs/VAULT_SECURITY_ARCHITECTURE.md
index fe6fd741..3f7f7ff2 100644
--- a/docs/VAULT_SECURITY_ARCHITECTURE.md
+++ b/docs/VAULT_SECURITY_ARCHITECTURE.md
@@ -197,9 +197,12 @@ Day 0+: Automatic Token Renewal:
 Container Restart:
   vault-init: Check if Vault is sealed
              ↓
-  If unsealed: Regenerate secret_id only
+  If unsealed: Validate existing secret_ids
              ↓
-  vault-agent: Re-authenticate with new secret_id
+  If valid: Reuse existing secret_id (no churn)
+  If invalid: Mint new secret_id and write to disk
+             ↓
+  vault-agent: Re-authenticate with secret_id
              ↓
   New token issued and cached
 ```
@@ -413,8 +416,10 @@ Connected Services:
   - GUI (React Frontend)
 
 Token Lifecycle:
-  - Default Lease: 768h (32 days)
-  - Auto-renewal: Before expiration
+  - Token TTL: 15m
+  - Token Max TTL: 1h
+  - Auto-renewal: Every ~11 minutes (75% of TTL)
+  - Re-auth: When max_ttl reached (every ~1h)
 ```
 
 #### Agent 2: vault-agent-cron
@@ -429,8 +434,10 @@ Connected Services:
   - CronManager (Python worker)
 
 Token Lifecycle:
-  - Default Lease: 768h (32 days)
-  - Auto-renewal: Before expiration
+  - Token TTL: 30m
+  - Token Max TTL: 8h
+  - Auto-renewal: Every ~22 minutes (75% of TTL)
+  - Re-auth: When max_ttl reached (every ~8h)
 ```
 
 #### Agent 3: vault-agent-llm
@@ -445,8 +452,10 @@ Connected Services:
   - LLM Orchestration Service (FastAPI)
 
 Token Lifecycle:
-  - Default Lease: 1h (shorter for higher security)
-  - Auto-renewal: Every ~45 minutes
+  - Token TTL: 1h
+  - Token Max TTL: 8h
+  - Auto-renewal: Every ~45 minutes (75% of TTL)
+  - Re-auth: When max_ttl reached (every ~8h)
 ```
 
 ### Token Caching and Auto-Renewal
@@ -856,15 +865,16 @@ Step 12: Check Vault Seal Status
    └─► GET /v1/sys/seal-status
        └─► If unsealed: Skip unseal steps
 
-Step 13: Regenerate Secret IDs Only
-   └─► POST /v1/auth/approle/role/gui-service/secret-id
-   └─► POST /v1/auth/approle/role/cron-manager-service/secret-id
-   └─► POST /v1/auth/approle/role/llm-orchestration-service/secret-id
-   └─► Write new secret_ids to /agent/credentials/
+Step 13: Validate and Reconcile Secret IDs
+   └─► For each role (gui, cron-manager, llm-orchestration):
+       ├─► Test existing on-disk secret_id via AppRole login
+       ├─► If valid: Reuse (no change to credential file)
+       └─► If invalid/missing: Mint new secret_id and write to disk
 
 Note: role_ids remain unchanged (static identifiers)
 Note: Existing secrets and policies preserved
 Note: RSA keypair NOT regenerated (preserved)
+Note: Stable secret_ids across restarts reduce credential churn
 
 ═══════════════════════════════════════════════════════════════════
 COMPLETION
@@ -1128,13 +1138,14 @@ Startup Order:
 vault-init Behavior:
   - Detects Vault already initialized
   - Skips initialization steps
-  - Regenerates secret_ids only
-  - Updates credential files
+  - Validates existing secret_ids (reuses if still valid)
+  - Mints new secret_ids only if existing ones are invalid
 
 Result:
-   All services start with fresh credentials
+   All services start with validated credentials
    Existing secrets preserved
    No manual intervention needed
+   Stable secret_ids reduce unnecessary credential churn
 ```
 
 ### Token Regeneration Strategy
@@ -1143,21 +1154,22 @@ Result:
 Current Implementation:
 
 1. On Every Container Restart:
-   └─► vault-init regenerates secret_ids
-       └─► Vault agents get new tokens
-           └─► Old tokens remain valid until expiration
+   └─► vault-init validates existing secret_ids
+       ├─► If valid: Reuse (agents continue with same credentials)
+       └─► If invalid: Mint new secret_id, agents re-authenticate
 
 2. Token Lifecycle:
    └─► Issue: vault-agent authenticates
    └─► Use: Application makes requests
-   └─► Renew: vault-agent extends TTL
-   └─► Expire: Automatic renewal failed
-   └─► Re-issue: vault-agent re-authenticates
+   └─► Renew: vault-agent extends TTL (at ~75% of TTL)
+   └─► Max TTL reached: Renewal rejected by Vault
+   └─► Re-issue: vault-agent re-authenticates with secret_id
 
 3. Security Benefits:
-    Short-lived tokens (1 hour for LLM, 32 days for others)
-    Automatic rotation on agent restart
-    No manual token management
+    Short-lived tokens (1 hour for LLM, 30m for Cron, 15m for GUI)
+    Continuous renewal within max_ttl window
+    Automatic re-authentication when max_ttl reached
+    Stable secret_ids (no unnecessary churn on restart)
     Compromised tokens have limited lifetime
 ```
 
diff --git a/vault-init.sh b/vault-init.sh
index eada7518..164d8bdc 100644
--- a/vault-init.sh
+++ b/vault-init.sh
@@ -7,6 +7,67 @@ INIT_FLAG="/vault/data/.initialized"
 
 echo "=== Vault Initialization Script ==="
 
+# ---------------------------------------------------------------------------
+# Helpers (used by the SUBSEQUENT DEPLOYMENT branch)
+# ---------------------------------------------------------------------------
+
+# Ensure a role_id file exists on disk; fetch from Vault if missing.
+# Usage: ensure_role_id <role-name> <role_id_file>
+ensure_role_id() {
+    role="$1"; rid_file="$2"
+    if [ -f "$rid_file" ] && [ -s "$rid_file" ]; then
+        return 0
+    fi
+    echo "Fetching role_id for $role..."
+    rid=$(wget -q -O- \
+        --header="X-Vault-Token: $ROOT_TOKEN" \
+        "$VAULT_ADDR/v1/auth/approle/role/$role/role-id" | \
+        grep -o '"role_id":"[^"]*"' | cut -d':' -f2 | tr -d '"')
+    echo "$rid" > "$rid_file"
+    chmod 640 "$rid_file"
+}
+
+# Return 0 if the on-disk role_id + secret_id still authenticate, 1 otherwise.
+# Usage: validate_secret_id <role_id_file> <secret_id_file>
+validate_secret_id() {
+    rid_file="$1"; sid_file="$2"
+    [ -f "$rid_file" ] && [ -f "$sid_file" ] || return 1
+    rid=$(cat "$rid_file"); sid=$(cat "$sid_file")
+    [ -n "$rid" ] && [ -n "$sid" ] || return 1
+    # wget returns non-zero on HTTP 400 (invalid creds); also confirm a token came back.
+    resp=$(wget -q -O- \
+        --post-data="{\"role_id\":\"$rid\",\"secret_id\":\"$sid\"}" \
+        --header='Content-Type: application/json' \
+        "$VAULT_ADDR/v1/auth/approle/login" 2>/dev/null) || return 1
+    echo "$resp" | grep -q '"client_token"' || return 1
+    return 0
+}
+
+# Mint a fresh secret_id for a role and write it to disk.
+# Usage: mint_secret_id <role-name> <secret_id_file>
+mint_secret_id() {
+    role="$1"; sid_file="$2"
+    sid=$(wget -q -O- --post-data='' \
+        --header="X-Vault-Token: $ROOT_TOKEN" \
+        "$VAULT_ADDR/v1/auth/approle/role/$role/secret-id" | \
+        grep -o '"secret_id":"[^"]*"' | cut -d':' -f2 | tr -d '"')
+    echo "$sid" > "$sid_file"
+    chmod 640 "$sid_file"
+}
+
+# Reuse the existing secret_id if it still authenticates; otherwise mint a new one.
+# Usage: reconcile_secret_id <role-name> <role_id_file> <secret_id_file>
+reconcile_secret_id() {
+    role="$1"; rid_file="$2"; sid_file="$3"
+    ensure_role_id "$role" "$rid_file"
+    if validate_secret_id "$rid_file" "$sid_file"; then
+        echo "$role: existing secret_id still valid - reusing"
+    else
+        echo "$role: secret_id invalid or missing - minting a new one"
+        mint_secret_id "$role" "$sid_file"
+    fi
+}
+
 # Wait for Vault to be ready
 echo "Waiting for Vault..."
 for i in $(seq 1 30); do
@@ -116,21 +177,21 @@ path "auth/token/lookup-self" { capabilities = ["read"] }'
     
     # Create GUI AppRole
     echo "Creating gui-service AppRole..."
-    wget -q -O- --post-data='{"token_policies":["gui-policy"],"token_no_default_policy":true,"token_ttl":"15m","token_max_ttl":"1h","secret_id_ttl":"24h","secret_id_num_uses":0,"bind_secret_id":true}' \
+    wget -q -O- --post-data='{"token_policies":["gui-policy"],"token_ttl":"15m","token_max_ttl":"1h","secret_id_ttl":"0","secret_id_num_uses":0,"bind_secret_id":true}' \
         --header="X-Vault-Token: $ROOT_TOKEN" \
         --header='Content-Type: application/json' \
         "$VAULT_ADDR/v1/auth/approle/role/gui-service" >/dev/null
     
     # Create CronManager AppRole
     echo "Creating cron-manager-service AppRole..."
-    wget -q -O- --post-data='{"token_policies":["cron-manager-policy"],"token_no_default_policy":true,"token_ttl":"30m","token_max_ttl":"8h","secret_id_ttl":"24h","secret_id_num_uses":0,"bind_secret_id":true}' \
+    wget -q -O- --post-data='{"token_policies":["cron-manager-policy"],"token_ttl":"30m","token_max_ttl":"8h","secret_id_ttl":"0","secret_id_num_uses":0,"bind_secret_id":true}' \
         --header="X-Vault-Token: $ROOT_TOKEN" \
         --header='Content-Type: application/json' \
         "$VAULT_ADDR/v1/auth/approle/role/cron-manager-service" >/dev/null
     
     # Create LLM Orchestration AppRole
     echo "Creating llm-orchestration-service AppRole..."
-    wget -q -O- --post-data='{"token_policies":["llm-orchestration-policy"],"token_no_default_policy":true,"token_ttl":"1h","token_max_ttl":"8h","secret_id_ttl":"24h","secret_id_num_uses":0,"bind_secret_id":true}' \
+    wget -q -O- --post-data='{"token_policies":["llm-orchestration-policy"],"token_ttl":"1h","token_max_ttl":"8h","secret_id_ttl":"0","secret_id_num_uses":0,"bind_secret_id":true}' \
         --header="X-Vault-Token: $ROOT_TOKEN" \
         --header='Content-Type: application/json' \
         "$VAULT_ADDR/v1/auth/approle/role/llm-orchestration-service" >/dev/null
@@ -280,61 +341,15 @@ else
     # Ensure credentials directory exists
     mkdir -p /agent/credentials
     
-    # Always regenerate all secret_ids on restart
-    echo "Regenerating GUI secret_id..."
-    GUI_SECRET_ID=$(wget -q -O- --post-data='' \
-        --header="X-Vault-Token: $ROOT_TOKEN" \
-        "$VAULT_ADDR/v1/auth/approle/role/gui-service/secret-id" | \
-        grep -o '"secret_id":"[^"]*"' | cut -d':' -f2 | tr -d '"')
-    echo "$GUI_SECRET_ID" > /agent/credentials/gui_secret_id
-    
-    echo "Regenerating CronManager secret_id..."
-    CRON_SECRET_ID=$(wget -q -O- --post-data='' \
-        --header="X-Vault-Token: $ROOT_TOKEN" \
-        "$VAULT_ADDR/v1/auth/approle/role/cron-manager-service/secret-id" | \
-        grep -o '"secret_id":"[^"]*"' | cut -d':' -f2 | tr -d '"')
-    echo "$CRON_SECRET_ID" > /agent/credentials/cron_secret_id
-    
-    echo "Regenerating LLM secret_id..."
-    LLM_SECRET_ID=$(wget -q -O- --post-data='' \
-        --header="X-Vault-Token: $ROOT_TOKEN" \
-        "$VAULT_ADDR/v1/auth/approle/role/llm-orchestration-service/secret-id" | \
-        grep -o '"secret_id":"[^"]*"' | cut -d':' -f2 | tr -d '"')
-    echo "$LLM_SECRET_ID" > /agent/credentials/llm_secret_id
-    
-    # Set permissions
-    chmod 640 /agent/credentials/*_secret_id
-    
-    # Ensure role_ids exist
-    if [ ! -f /agent/credentials/gui_role_id ]; then
-        echo "Copying GUI role_id..."
-        GUI_ROLE_ID=$(wget -q -O- \
-            --header="X-Vault-Token: $ROOT_TOKEN" \
-            "$VAULT_ADDR/v1/auth/approle/role/gui-service/role-id" | \
-            grep -o '"role_id":"[^"]*"' | cut -d':' -f2 | tr -d '"')
-        echo "$GUI_ROLE_ID" > /agent/credentials/gui_role_id
-        chmod 640 /agent/credentials/gui_role_id
-    fi
-    
-    if [ ! -f /agent/credentials/cron_role_id ]; then
-        echo "Copying CronManager role_id..."
-        CRON_ROLE_ID=$(wget -q -O- \
-            --header="X-Vault-Token: $ROOT_TOKEN" \
-            "$VAULT_ADDR/v1/auth/approle/role/cron-manager-service/role-id" | \
-            grep -o '"role_id":"[^"]*"' | cut -d':' -f2 | tr -d '"')
-        echo "$CRON_ROLE_ID" > /agent/credentials/cron_role_id
-        chmod 640 /agent/credentials/cron_role_id
-    fi
-    
-    if [ ! -f /agent/credentials/llm_role_id ]; then
-        echo "Copying LLM role_id..."
-        LLM_ROLE_ID=$(wget -q -O- \
-            --header="X-Vault-Token: $ROOT_TOKEN" \
-            "$VAULT_ADDR/v1/auth/approle/role/llm-orchestration-service/role-id" | \
-            grep -o '"role_id":"[^"]*"' | cut -d':' -f2 | tr -d '"')
-        echo "$LLM_ROLE_ID" > /agent/credentials/llm_role_id
-        chmod 640 /agent/credentials/llm_role_id
-    fi
+    # Reconcile secret_ids: reuse the existing one if it still authenticates,
+    # mint a new one only if it is invalid or missing. This keeps a single
+    # long-lived secret_id stable across normal restarts (secret_id_ttl=0,
+    # secret_id_num_uses=0), instead of rotating it every boot.
+    # ensure_role_id (called inside reconcile_secret_id) guarantees the role_id
+    # file exists before validation, since validation needs both.
+    reconcile_secret_id "gui-service"                 /agent/credentials/gui_role_id  /agent/credentials/gui_secret_id
+    reconcile_secret_id "cron-manager-service"        /agent/credentials/cron_role_id /agent/credentials/cron_secret_id
+    reconcile_secret_id "llm-orchestration-service"   /agent/credentials/llm_role_id  /agent/credentials/llm_secret_id
 fi
 
 echo "=== Vault init complete ==="
\ No newline at end of file
diff --git a/vault/config/vault.hcl b/vault/config/vault.hcl
index eaef415a..64ab325e 100644
--- a/vault/config/vault.hcl
+++ b/vault/config/vault.hcl
@@ -1,22 +1,27 @@
 # HashiCorp Vault Server Configuration
-# Production-ready configuration for LLM Orchestration Service
+# Single-node Raft for the RAG-Module services
 
-# Storage backend - Raft for high availability
+# Storage backend - Raft
 storage "raft" {
   path    = "/vault/file"
   node_id = "vault-node-1"
-  
-  # Retry join configuration for clustering (single node for now)
-  retry_join {
-    leader_api_addr = "http://vault:8200"
-  }
+
+  # NOTE: No retry_join for a single node. A lone node self-bootstraps.
+  # A retry_join pointing at itself causes repeated
+  # "failed to get raft challenge ... Vault is sealed" errors and a
+  # messy double Raft init on every boot. Add retry_join back only when
+  # you actually have peer nodes to join.
 }
 
-# HTTP listener configuration
+# HTTP API listener.
+# Vault automatically uses the next port up (8201) as its internal
+# cluster port, so do NOT define a separate listener on 8201 — that
+# collides with the cluster listener ("bind: address already in use")
+# and degrades the login/request-forwarding path the agents rely on.
 listener "tcp" {
-  address       = "0.0.0.0:8200"
-  tls_disable   = true
-  
+  address     = "0.0.0.0:8200"
+  tls_disable = true
+
   # Enable CORS for web UI access
   cors_enabled = true
   cors_allowed_origins = [
@@ -25,14 +30,9 @@ listener "tcp" {
   ]
 }
 
-# Cluster listener for HA (required even for single node)
-listener "tcp" {
-  address       = "0.0.0.0:8201"
-  cluster_addr  = "http://0.0.0.0:8201"
-  tls_disable   = true
-}
-
-# API and cluster addresses
+# API and cluster addresses.
+# cluster_addr tells Vault where its internal cluster port (8201) is
+# reachable; Vault binds that port itself — no listener block needed.
 api_addr     = "http://vault:8200"
 cluster_addr = "http://vault:8201"
 
@@ -46,9 +46,5 @@ default_lease_ttl = "168h"  # 7 days
 max_lease_ttl     = "720h"  # 30 days
 
 # Logging configuration
-log_level = "INFO"
+log_level  = "INFO"
 log_format = "json"
-
-# Development settings (remove in production)
-# Note: In production, you should not use dev mode
-# and should properly initialize and unseal the vault
\ No newline at end of file

From 9d4528233a8e0bfe709864794166a9787b16f749 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Wed, 17 Jun 2026 11:53:44 +0530
Subject: [PATCH 3/8] address Vault via unique rag-vault alias to avoid cross
 -stack DNS collision

---
 DSL/CronManager/DSL/data_resync.yml           |   2 +-
 DSL/CronManager/DSL/delete_from_vault.yml     |   2 +-
 DSL/CronManager/DSL/store_in_vault.yml        |   2 +-
 .../script/delete_secrets_from_vault.sh       |   4 +-
 .../script/store_secrets_in_vault.sh          |   4 +-
 docker-compose-ec2.yml                        |  12 +-
 docker-compose.yml                            |  14 +-
 docs/VAULT_SECURITY_ARCHITECTURE.md           |  55 ++-
 docs/VAULT_SETUP_AND_USAGE.md                 | 355 ++++++++++++++++++
 vault-init.sh                                 |  60 +--
 vault/agents/cron/cron-agent.hcl              |   6 +-
 vault/agents/gui/gui-agent.hcl                |   6 +-
 vault/agents/llm/agent.hcl                    |   6 +-
 13 files changed, 448 insertions(+), 80 deletions(-)
 create mode 100644 docs/VAULT_SETUP_AND_USAGE.md

diff --git a/DSL/CronManager/DSL/data_resync.yml b/DSL/CronManager/DSL/data_resync.yml
index b5994d1e..a232ba39 100644
--- a/DSL/CronManager/DSL/data_resync.yml
+++ b/DSL/CronManager/DSL/data_resync.yml
@@ -2,4 +2,4 @@ agency_data_resync:
   trigger: "0 0 0/1 * * ?"
   # trigger: off
   type: exec
-  command: "../app/scripts/agency_data_resync.sh -s 10"
\ No newline at end of file
+  command: "/app/scripts/agency_data_resync.sh -s 10"
\ No newline at end of file
diff --git a/DSL/CronManager/DSL/delete_from_vault.yml b/DSL/CronManager/DSL/delete_from_vault.yml
index d7f06cea..cde1df27 100644
--- a/DSL/CronManager/DSL/delete_from_vault.yml
+++ b/DSL/CronManager/DSL/delete_from_vault.yml
@@ -2,4 +2,4 @@ delete_secrets:
   trigger: off
   type: exec
   command: "/app/scripts/delete_secrets_from_vault.sh"
-  allowedEnvs: ['cookie','vaultUuid','llmPlatform', 'llmModel','embeddingModel','embeddingPlatform']
+  allowedEnvs: ['cookie','vaultUuid','llmPlatform', 'llmModel','embeddingModel','embeddingPlatform', 'vaultAgentUrl']
diff --git a/DSL/CronManager/DSL/store_in_vault.yml b/DSL/CronManager/DSL/store_in_vault.yml
index fa1a6ac1..46f861e6 100644
--- a/DSL/CronManager/DSL/store_in_vault.yml
+++ b/DSL/CronManager/DSL/store_in_vault.yml
@@ -2,4 +2,4 @@ store_secrets:
   trigger: off
   type: exec
   command: "/app/scripts/store_secrets_in_vault.sh"
-  allowedEnvs: ['cookie','vaultUuid','llmPlatform', 'llmModel','secretKey','accessKey','deploymentName','targetUrl','apiKey','embeddingModel','embeddingPlatform','embeddingAccessKey','embeddingSecretKey','embeddingDeploymentName','embeddingTargetUri','embeddingAzureApiKey','deploymentEnvironment']
\ No newline at end of file
+  allowedEnvs: ['cookie','vaultUuid','llmPlatform', 'llmModel','secretKey','accessKey','deploymentName','targetUrl','apiKey','embeddingModel','embeddingPlatform','embeddingAccessKey','embeddingSecretKey','embeddingDeploymentName','embeddingTargetUri','embeddingAzureApiKey','deploymentEnvironment', 'vaultAgentUrl']
\ No newline at end of file
diff --git a/DSL/CronManager/script/delete_secrets_from_vault.sh b/DSL/CronManager/script/delete_secrets_from_vault.sh
index a6423566..3b405927 100644
--- a/DSL/CronManager/script/delete_secrets_from_vault.sh
+++ b/DSL/CronManager/script/delete_secrets_from_vault.sh
@@ -6,9 +6,9 @@
 set -e  # Exit on any error
 
 # Configuration
-# Use VAULT_AGENT_URL which points to vault-agent-cron proxy
+# Use vaultAgentUrl which points to vault-agent-cron proxy
 # The agent automatically injects the authentication token
-VAULT_ADDR="${VAULT_AGENT_URL:-http://vault-agent-cron:8203}"
+VAULT_ADDR="${vaultAgentUrl:-http://vault-agent-cron:8203}"
 
 # Logging function
 log() {
diff --git a/DSL/CronManager/script/store_secrets_in_vault.sh b/DSL/CronManager/script/store_secrets_in_vault.sh
index 8f4056f8..60784eed 100644
--- a/DSL/CronManager/script/store_secrets_in_vault.sh
+++ b/DSL/CronManager/script/store_secrets_in_vault.sh
@@ -6,9 +6,9 @@
 set -e  # Exit on any error
 
 # Configuration
-# Use VAULT_AGENT_URL which points to vault-agent-cron proxy
+# Use vaultAgentUrl which points to vault-agent-cron proxy
 # The agent automatically injects the authentication token
-VAULT_ADDR="${VAULT_AGENT_URL:-http://vault-agent-cron:8203}"
+VAULT_ADDR="${vaultAgentUrl:-http://vault-agent-cron:8203}"
 
 # Decryption Configuration
 PRIVATE_KEY_CACHE=""
diff --git a/docker-compose-ec2.yml b/docker-compose-ec2.yml
index a9052865..136c097c 100644
--- a/docker-compose-ec2.yml
+++ b/docker-compose-ec2.yml
@@ -502,7 +502,11 @@ services:
       - ./vault/config:/vault/config:ro
       - ./vault/logs:/vault/logs
     networks:
-      - vault-network  # Only on vault-network for security
+      vault-network:  # Only on vault-network for security
+        # Local testing: bare "vault" collides with the ckb stack on the shared
+        # bykstack network, so expose this Vault under a unique alias instead.
+        aliases:
+          - rag-vault
     restart: unless-stopped
     healthcheck:
       test: ["CMD", "sh", "-c", "wget -q -O- http://127.0.0.1:8200/v1/sys/health || exit 0"]
@@ -519,7 +523,7 @@ services:
       vault:
         condition: service_healthy
     environment:
-      VAULT_ADDR: http://vault:8200
+      VAULT_ADDR: http://rag-vault:8200
     volumes:
       - vault-data:/vault/data
       - vault-agent-creds:/agent/credentials
@@ -528,8 +532,8 @@ services:
       - vault-agent-llm-token:/agent/llm-token
       - ./vault-init.sh:/vault-init.sh:ro
     networks:
-      - vault-network  # Access vault
-      - bykstack       # Access to write agent tokens
+      # vault-network only: tokens/creds go via shared volumes, not the network.
+      - vault-network
     entrypoint: ["/bin/sh"]
     command:
       - -c
diff --git a/docker-compose.yml b/docker-compose.yml
index ec324649..3e6cfba2 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -191,7 +191,7 @@ services:
     environment:
       - server.port=9010
       - PYTHONPATH=/app:/app/src/vector_indexer:/app/src/intent_data_enrichment:/app/src/api_tool_indexer
-      - VAULT_AGENT_URL=http://vault-agent-cron:8203
+      - vaultAgentUrl=http://vault-agent-cron:8203
     ports:
       - 9010:8080
     depends_on:
@@ -449,7 +449,11 @@ services:
       - ./vault/config:/vault/config:ro
       - ./vault/logs:/vault/logs
     networks:
-      - vault-network  # Only on vault-network for security
+      vault-network:  # Only on vault-network for security
+        # Local testing: bare "vault" collides with the ckb stack on the shared
+        # bykstack network, so expose this Vault under a unique alias instead.
+        aliases:
+          - rag-vault
     restart: unless-stopped
     healthcheck:
       test: ["CMD", "sh", "-c", "wget -q -O- http://127.0.0.1:8200/v1/sys/health || exit 0"]
@@ -466,7 +470,7 @@ services:
       vault:
         condition: service_healthy
     environment:
-      VAULT_ADDR: http://vault:8200
+      VAULT_ADDR: http://rag-vault:8200
     volumes:
       - vault-data:/vault/data
       - vault-agent-creds:/agent/credentials
@@ -475,8 +479,8 @@ services:
       - vault-agent-llm-token:/agent/llm-token
       - ./vault-init.sh:/vault-init.sh:ro
     networks:
-      - vault-network  # Access vault
-      - bykstack       # Access to write agent tokens
+      # vault-network only: tokens/creds go via shared volumes, not the network.
+      - vault-network
     entrypoint: ["/bin/sh"]
     command:
       - -c
diff --git a/docs/VAULT_SECURITY_ARCHITECTURE.md b/docs/VAULT_SECURITY_ARCHITECTURE.md
index 3f7f7ff2..2c2c6836 100644
--- a/docs/VAULT_SECURITY_ARCHITECTURE.md
+++ b/docs/VAULT_SECURITY_ARCHITECTURE.md
@@ -416,10 +416,9 @@ Connected Services:
   - GUI (React Frontend)
 
 Token Lifecycle:
-  - Token TTL: 15m
-  - Token Max TTL: 1h
-  - Auto-renewal: Every ~11 minutes (75% of TTL)
-  - Re-auth: When max_ttl reached (every ~1h)
+  - Token type: periodic (token_period 20m, no max-TTL)
+  - Auto-renewal: Every ~13 minutes (~2/3 of period)
+  - Re-auth: only on agent restart (never in steady state)
 ```
 
 #### Agent 2: vault-agent-cron
@@ -434,10 +433,9 @@ Connected Services:
   - CronManager (Python worker)
 
 Token Lifecycle:
-  - Token TTL: 30m
-  - Token Max TTL: 8h
-  - Auto-renewal: Every ~22 minutes (75% of TTL)
-  - Re-auth: When max_ttl reached (every ~8h)
+  - Token type: periodic (token_period 30m, no max-TTL)
+  - Auto-renewal: Every ~20 minutes (~2/3 of period)
+  - Re-auth: only on agent restart (never in steady state)
 ```
 
 #### Agent 3: vault-agent-llm
@@ -452,10 +450,9 @@ Connected Services:
   - LLM Orchestration Service (FastAPI)
 
 Token Lifecycle:
-  - Token TTL: 1h
-  - Token Max TTL: 8h
-  - Auto-renewal: Every ~45 minutes (75% of TTL)
-  - Re-auth: When max_ttl reached (every ~8h)
+  - Token type: periodic (token_period 1h, no max-TTL)
+  - Auto-renewal: Every ~40 minutes (~2/3 of period)
+  - Re-auth: only on agent restart (never in steady state)
 ```
 
 ### Token Caching and Auto-Renewal
@@ -473,29 +470,31 @@ T=0: Initial Authentication
         ├─► POST /v1/auth/approle/login
         │   Body: { role_id, secret_id }
         │
-        └─► Receives: { token, ttl: 3600s, renewable: true }
+        └─► Receives: { token, period: 3600s, renewable: true }   ← periodic token, no max-TTL
              │
              └─► Cache token in: /agent/llm-token/token
 
 
-T=45min: Proactive Renewal (75% of TTL)
+T≈40min: Proactive Renewal (~2/3 of period)
      vault-agent monitors expiration
         │
         ├─► POST /v1/auth/token/renew-self
         │   Header: X-Vault-Token: <current_token>
         │
-        └─► Receives: { token, ttl: 3600s } (same token, extended)
+        └─► Receives: { token, period: 3600s } (same token, period reset)
              │
              └─► Update cache: /agent/llm-token/token
+             │
+             └─► Repeats forever — a periodic token never hits a max-TTL,
+                 so steady-state operation never needs approle/login again.
 
 
-T=59min: Renewal Failed (fallback)
-     If renewal fails:
+On agent restart only:
+     vault-agent re-reads role_id + secret_id from disk
         │
-        ├─► Re-authenticate from scratch
-        │   POST /v1/auth/approle/login
+        ├─► POST /v1/auth/approle/login   (secret_id must still be valid)
         │
-        └─► New token issued and cached
+        └─► New periodic token issued and cached
 
 
 Application Request (anytime):
@@ -1159,18 +1158,18 @@ Current Implementation:
        └─► If invalid: Mint new secret_id, agents re-authenticate
 
 2. Token Lifecycle:
-   └─► Issue: vault-agent authenticates
+   └─► Issue: vault-agent authenticates (periodic token, token_period per role)
    └─► Use: Application makes requests
-   └─► Renew: vault-agent extends TTL (at ~75% of TTL)
-   └─► Max TTL reached: Renewal rejected by Vault
-   └─► Re-issue: vault-agent re-authenticates with secret_id
+   └─► Renew: vault-agent renews within the period (~2/3 of period)
+   └─► No max-TTL: renewal continues indefinitely
+   └─► Re-issue: only on agent restart, via secret_id login
 
 3. Security Benefits:
-    Short-lived tokens (1 hour for LLM, 30m for Cron, 15m for GUI)
-    Continuous renewal within max_ttl window
-    Automatic re-authentication when max_ttl reached
+    Periodic tokens (period 1h LLM, 30m Cron, 20m GUI), renewed continuously
+    Steady-state operation never re-runs approle/login (a stale secret_id
+      cannot strand a running agent)
     Stable secret_ids (no unnecessary churn on restart)
-    Compromised tokens have limited lifetime
+    Compromised tokens limited to one un-renewed period
 ```
 
 ### Audit Logging Capabilities
diff --git a/docs/VAULT_SETUP_AND_USAGE.md b/docs/VAULT_SETUP_AND_USAGE.md
new file mode 100644
index 00000000..e61d362b
--- /dev/null
+++ b/docs/VAULT_SETUP_AND_USAGE.md
@@ -0,0 +1,355 @@
+# Vault Setup & Usage Guide
+
+A single reference for how HashiCorp Vault is deployed, initialized, and consumed in the
+RAG-Module. It covers the topology, the three Vault Agents, the secret layout, and — in
+depth — **how each agent renews its token and how secrets are rotated**.
+
+Source files this document describes:
+
+- `docker-compose.yml` — service/topology definition
+- `vault/config/vault.hcl` — Vault server config
+- `vault-init.sh` — one-time bootstrap + per-restart reconcile
+- `vault/agents/{gui,cron,llm}/*.hcl` — the three Vault Agent configs
+- `DSL/CronManager/script/store_secrets_in_vault.sh` — writes/rotates secrets
+- `DSL/CronManager/script/delete_secrets_from_vault.sh` — deletes secrets
+
+For the security rationale (threat model, defense-in-depth, access matrix) see the
+companion `docs/VAULT_SECURITY_ARCHITECTURE.md`. This guide focuses on the *operational*
+mechanics.
+
+---
+
+## 1. Topology at a glance
+
+```
+                bykstack (application network)                  vault-network (internal: true)
+ ┌───────────────────────────────────────────────┐        ┌──────────────────────────────┐
+ │  gui ──────────────► vault-agent-gui  :8202 ───┼────────┤                              │
+ │  cron-manager ─────► vault-agent-cron :8203 ───┼────────┤        vault  :8200          │
+ │  llm-orchestration ► vault-agent-llm  :8201 ───┼────────┤   (Raft storage, KV v2,      │
+ │                                                │        │    AppRole auth)             │
+ │  vault-init (also on vault-network) ───────────┼────────┤                              │
+ └───────────────────────────────────────────────┘        └──────────────────────────────┘
+```
+
+- **`vault`** runs only on `vault-network`, which is `internal: true` — it has **no route to
+  or from the host or the internet**. Port 8200 is never published.
+- **Vault Agents** straddle both networks: they reach `vault` on `vault-network` and are
+  reachable by their owning application on `bykstack`.
+- **Applications** talk *only* to their agent (`VAULT_ADDR=http://vault-agent-*:820x`) and
+  never hold a Vault token themselves. The agent injects the token transparently.
+
+| Service | Agent it uses | Agent address | AppRole | Policy |
+|---|---|---|---|---|
+| `gui` | `vault-agent-gui` | `:8202` | `gui-service` | `gui-policy` |
+| `cron-manager` | `vault-agent-cron` | `:8203` | `cron-manager-service` | `cron-manager-policy` |
+| `llm-orchestration-service` | `vault-agent-llm` | `:8201` | `llm-orchestration-service` | `llm-orchestration-policy` |
+
+---
+
+## 2. Vault server (`vault/config/vault.hcl`)
+
+- **Storage:** Raft, single node (`node_id = vault-node-1`, path `/vault/file`, persisted in
+  the `vault-data` volume). No `retry_join` — a lone node self-bootstraps; adding a self-
+  pointing join was found to cause "Vault is sealed" boot loops.
+- **Listener:** `0.0.0.0:8200`, `tls_disable = true` (TLS is terminated at the network
+  boundary; the network itself is the isolation layer here). Port `8201` is *not* given its
+  own listener because Vault uses it as the internal cluster port automatically.
+- **Lease defaults:** `default_lease_ttl = 168h` (7 days), `max_lease_ttl = 720h` (30 days).
+  These are *system ceilings*; the per-AppRole token TTLs (below) are much shorter and are
+  what actually governs agent renewal cadence.
+- `disable_mlock = false`, `ui = false`, JSON logs at INFO.
+
+Vault boots **sealed**. It must be unsealed before any operation — that is `vault-init`'s
+first job.
+
+---
+
+## 3. Bootstrap & reconcile (`vault-init.sh`)
+
+`vault-init` is a **run-once-then-exit** container (`restart: "no"`). The agents declare
+`depends_on: vault-init: condition: service_completed_successfully`, so they only start
+after init has finished cleanly. It runs `su vault -s /bin/sh /vault-init.sh` after creating
+and `chown`ing the shared agent directories.
+
+The script has two branches, selected by the presence of `/vault/data/.initialized`.
+
+### 3.1 First-time deployment
+
+1. Wait for `/v1/sys/health` to respond.
+2. **Initialize** with Shamir's Secret Sharing: `secret_shares=5`, `secret_threshold=3`.
+   The full response (5 unseal keys + root token) is written to
+   `/vault/data/unseal-keys.json`.
+3. **Unseal** by submitting 3 of the 5 keys.
+4. **Enable engines:** KV v2 at `secret/`, and the AppRole auth method.
+5. **Create three ACL policies** (see §5).
+6. **Create three AppRoles** issuing periodic tokens (see §4 — this is the heart of renewal),
+   via the `ensure_approles` helper. The same helper re-runs on subsequent deploys, so AppRole
+   config changes land without re-initializing Vault.
+7. **Issue credentials:** for each role, fetch the static `role_id` and mint a `secret_id`,
+   writing both to `/agent/credentials/<svc>_role_id` and `<svc>_secret_id` (`chmod 640`).
+8. **Generate an RSA-2048 keypair** with `openssl` and store it in Vault at
+   `secret/encryption/public_key` and `secret/encryption/private_key`
+   (algorithm `RSA-OAEP`, with `key_id` and `created_at` metadata).
+9. Seed a test LLM secret, then `touch /vault/data/.initialized`.
+
+### 3.2 Subsequent deployment (restart)
+
+1. Check `/v1/sys/seal-status`; if sealed, reload the 3 unseal keys from
+   `unseal-keys.json` and unseal.
+2. **Reconcile each secret_id** via `reconcile_secret_id`:
+   - `ensure_role_id` — make sure the `role_id` file exists (re-fetch from Vault if missing).
+   - `validate_secret_id` — attempt an AppRole login with the on-disk `role_id` + `secret_id`.
+     If it returns a `client_token`, the credential is still good.
+   - **Valid → reuse** the existing `secret_id` (no churn).
+   - **Invalid/missing → `mint_secret_id`** writes a fresh one.
+
+This is deliberate: because the AppRoles are created with `secret_id_ttl=0` and
+`secret_id_num_uses=0` (non-expiring, unlimited-use), a single long-lived `secret_id`
+survives normal restarts instead of being regenerated every boot. The RSA keypair, policies,
+and stored secrets are all preserved across restarts.
+
+> **Note on file permissions:** `vault-init.sh` writes credential files with `chmod 640`.
+> (The older architecture doc mentions `644`; the script is the source of truth — `640`.)
+
+---
+
+## 4. The three Vault Agents — auth, renewal & rotation
+
+This is the core of the question. All three agents are the same Vault binary
+(`hashicorp/vault:1.20.3`) run as `vault agent -config=...`. They differ only in which
+credentials they read, which token sink they write, and their listener port.
+
+### 4.1 What an agent config actually does
+
+Example (`vault/agents/llm/agent.hcl`; gui/cron are identical in shape):
+
+```hcl
+vault { address = "http://vault:8200"; retry { num_retries = 5 } }
+
+auto_auth {
+  method "approle" {
+    mount_path = "auth/approle"
+    config = {
+      role_id_file_path   = "/agent/credentials/llm_role_id"
+      secret_id_file_path = "/agent/credentials/llm_secret_id"
+      remove_secret_id_file_after_reading = false
+    }
+  }
+  sink "file" { config = { path = "/agent/llm-token/token"; mode = 0640 } }
+}
+
+cache { default_lease_duration = "1h" }
+listener "tcp" { address = "0.0.0.0:8201"; tls_disable = true }
+api_proxy { use_auto_auth_token = true }
+```
+
+Three mechanisms are at work:
+
+1. **`auto_auth` (authentication + renewal):** On startup the agent reads `role_id` +
+   `secret_id` and calls `POST /v1/auth/approle/login`. Vault returns a **periodic token**
+   (the AppRoles set `token_period`, defined in `vault-init.sh`, *not* in the HCL). The agent
+   then runs Vault's **auto-auth lifecycle manager**, which **renews the token automatically
+   in the background** before each period elapses. A periodic token has **no max-TTL**, so the
+   agent renews it indefinitely and — during normal operation — **never has to call
+   `approle/login` again**. The agent only re-authenticates (and thus only needs the
+   `secret_id` again) if it is **restarted** or if a renewal is missed long enough for the
+   token to lapse. `remove_secret_id_file_after_reading = false` keeps the `secret_id` on disk
+   so the agent can re-auth after a restart without `vault-init` re-minting.
+
+   > **Why periodic tokens?** An earlier design issued tokens with `token_ttl`/`token_max_ttl`,
+   > which forced a full re-login every time `token_max_ttl` was reached. If the `secret_id`
+   > had become invalid by then (expiry, clock skew, server re-init), the agent got stuck in an
+   > `invalid role or secret ID` 400 backoff loop with no way to self-heal. Periodic tokens
+   > remove that re-login from the steady state, so a stale `secret_id` can no longer strand a
+   > running agent.
+2. **`sink "file"` (token hand-off):** Every time the agent obtains/renews a token it writes
+   it to a file (`/agent/<svc>-token/token`, mode `0640`). The compose **health check** for
+   each agent is simply `test -f <token> && test -s <token>` — a non-empty token file means
+   the agent has authenticated successfully.
+3. **`api_proxy { use_auto_auth_token = true }` (transparent injection):** The agent also
+   listens as an HTTP proxy on its port. When the application sends a token-less request, the
+   agent injects `X-Vault-Token: <current cached token>` and forwards it to `vault:8200`.
+   This is why application code never sets `VAULT_TOKEN`.
+
+> **`cache.default_lease_duration` is not the token TTL.** It is the agent's cache lease
+> hint. The authoritative token lifetime comes from the AppRole's `token_period` in
+> `vault-init.sh`. The per-agent cache hint is set to match the period.
+
+### 4.2 Per-agent renewal parameters
+
+AppRole token settings are created in `vault-init.sh`; all three use
+`token_period` (periodic token, **no max-TTL**), `secret_id_ttl=0`, `secret_id_num_uses=0`,
+`token_num_uses=0`, `bind_secret_id=true`.
+
+| Agent | AppRole | `token_period` | Proactive renewal (~⅔ of period) | Re-login (`approle/login`) |
+|---|---|---|---|---|
+| `vault-agent-gui` | `gui-service` | **20m** | ~every 13 min | only on agent restart |
+| `vault-agent-cron` | `cron-manager-service` | **30m** | ~every 20 min | only on agent restart |
+| `vault-agent-llm` | `llm-orchestration-service` | **1h** | ~every 40 min | only on agent restart |
+
+Reading the lifecycle for, e.g., the LLM agent:
+
+```
+T=0       login → periodic token (period 1h)        → written to /agent/llm-token/token
+T≈40m     renew-self → period resets to 1h          → token file refreshed
+...       renew repeats forever; token never hits a max-TTL
+(restart) agent re-runs approle/login with the on-disk secret_id → fresh token
+```
+
+The periods are tuned per service (shorter for the GUI, which only reads the public key;
+longer for the high-traffic LLM read path), but functionally all three behave the same:
+**renew forever, re-login only on restart.**
+
+### 4.3 Two distinct "rotation" concepts — keep them separate
+
+1. **Token rotation (automatic, continuous):** Handled entirely by the agent's `auto_auth`
+   loop as described above — the periodic token is renewed indefinitely with no human action
+   and no `vault-init` involvement.
+2. **`secret_id` rotation (rare):** The `secret_id` is the long-lived credential the agent
+   uses to *log in* (at startup/restart only, now that tokens are periodic). It is configured
+   non-expiring (`secret_id_ttl=0`, `secret_id_num_uses=0`) and is only replaced by
+   `vault-init` on a restart when the existing one fails validation (§3.2). To force rotation,
+   delete the `secret_id` file (or invalidate it in Vault) and re-run `vault-init`, then
+   restart the agent so it logs in with the freshly minted one.
+
+   > **Operational caveat (learned the hard way):** if a `secret_id` ever does become invalid
+   > while an agent is running, the periodic-token design means a *running* agent keeps working
+   > (it only renews, never re-logs-in). But a **restarted** agent needs a valid `secret_id` to
+   > log in. Recovery is always: re-run `vault-init` (mints a fresh `secret_id` via the §3.2
+   > reconcile) → restart the affected agent. See `docs/` runbook / the troubleshooting note
+   > below.
+
+### 4.4 Restart behavior
+
+- **Restart an agent:** It re-reads `role_id`/`secret_id` from the (read-only) creds volume
+  and re-authenticates. New token, written to the sink. App sees a brief blip.
+- **Restart `vault`:** Data persists; `vault-init` (or the existing agent tokens, if still
+  valid) handle re-unseal/re-auth. Existing tokens remain valid if not expired.
+- **Full `down && up`:** Order is `vault → vault-init → agents → apps`. `vault-init` detects
+  the `.initialized` flag, skips first-time setup, reconciles secret_ids, and the agents
+  start with validated credentials.
+
+---
+
+## 5. Authorization — policies (who can touch what)
+
+Created in `vault-init.sh`. Paths are KV v2, so data lives under `secret/data/...` and
+listing/metadata under `secret/metadata/...`.
+
+| Path | `gui-policy` | `cron-manager-policy` | `llm-orchestration-policy` |
+|---|---|---|---|
+| `secret/data/encryption/public_key` | **read** | read | — |
+| `secret/data/encryption/private_key` | **deny** | **read** | — |
+| `secret/data/encryption/*` | — | — | **deny** |
+| `secret/data/llm/connections/*` | deny | **create/read/update/delete** | **read, list** |
+| `secret/data/embeddings/connections/*` | deny | **create/read/update/delete** | **read, list** |
+| `auth/token/lookup-self` | — | read | read |
+
+The intent, by tier:
+
+- **GUI** — can read *only* the public key, to encrypt user-entered credentials in the
+  browser before they ever leave it. Everything else is explicitly denied.
+- **CronManager** — the only writer. Reads the **private key** to decrypt what the GUI
+  encrypted, then writes plaintext credentials into Vault. Full CRUD on connection secrets.
+- **LLM Orchestration** — read-only consumer of connection secrets. **Explicitly denied** all
+  encryption keys, so a compromise of this hot-path service cannot exfiltrate the private key.
+
+---
+
+## 6. Secret layout (KV v2 under `secret/`)
+
+```
+secret/
+├── llm/connections/<platform>/<vaultUuid>          ← e.g. aws_bedrock, azure_openai
+├── embeddings/connections/<platform>/<vaultUuid>
+└── encryption/
+    ├── public_key     { key, algorithm: RSA-OAEP, key_size: 2048, key_id, created_at }
+    └── private_key    { key, algorithm: RSA-OAEP, key_size: 2048, key_id, created_at }
+```
+
+The current write/delete scripts key connection secrets by a stable **`vaultUuid`** as the
+final path segment (environment is tracked in the DB, not the path). KV v2 versions every
+write, so updating a credential keeps prior versions for audit/rollback.
+
+LLM secret shape (AWS): `{ connection_id, access_key, secret_key, model, tags }`.
+Azure: `{ connection_id, endpoint, api_key, deployment_name, model, api_version, tags }`.
+
+---
+
+## 7. Usage flows
+
+### 7.1 Storing / rotating a credential (`store_secrets_in_vault.sh`, via cron-manager)
+
+1. GUI encrypts the raw key with the RSA **public** key and submits it.
+2. The cron-manager job runs the script against `vault-agent-cron:8203` (no token — the agent
+   injects it).
+3. The script **fetches the private key** (`GET secret/data/encryption/private_key`), then
+   decrypts each sensitive field in-memory via `decrypt_vault_secrets.py` (RSA-OAEP).
+4. It builds the JSON payload with `jq` and `POST`s plaintext to
+   `secret/data/<llm|embeddings>/connections/<platform>/<vaultUuid>`. Re-posting the same path
+   = a KV v2 version bump = credential rotation.
+5. Sensitive shell variables are `unset` immediately after use.
+
+### 7.2 Deleting a credential (`delete_secrets_from_vault.sh`)
+
+`DELETE`s both `secret/data/...` and `secret/metadata/...` for the connection (404 treated as
+success), again through `vault-agent-cron` with no explicit token.
+
+### 7.3 Reading a credential (LLM orchestration)
+
+The LLM service issues a token-less `GET http://vault-agent-llm:8201/v1/secret/data/llm/...`.
+`vault-agent-llm` injects its cached token, Vault validates it against
+`llm-orchestration-policy`, and returns the secret. The service then calls AWS/Azure with it.
+
+---
+
+## 8. Operational notes & known trade-offs
+
+- **Unseal keys + root token sit in the `vault-data` volume** (`unseal-keys.json`). This makes
+  auto-unseal on restart trivial but is a **dev/test convenience**. For production, switch to
+  auto-unseal backed by a cloud KMS/HSM and remove the keys from the volume.
+- **Root token** is used only by `vault-init` and is never injected into app containers. Best
+  practice for production is to revoke it after bootstrap and use scoped admin policies.
+- **TLS is disabled** on the Vault listener and agent listeners; isolation relies on the
+  `internal: true` `vault-network`. Add TLS for any non-local deployment.
+- **Audit logging is available but not enabled.** Turn it on with
+  `vault audit enable file file_path=/vault/logs/audit.log` (the `./vault/logs` mount already
+  exists) for a full request trail.
+- **Credential files are world-readable within the shared volume** (mode 640, single owner,
+  but all agents mount the same `vault-agent-creds` volume read-only) — isolation is at the
+  volume level, not per-file. Fine for this trust boundary; note it if the threat model
+  tightens.
+
+---
+
+## 9. Troubleshooting: agents looping on `invalid role or secret ID`
+
+**Symptom:** an agent logs `lifetime watcher done channel triggered, re-authenticating`
+followed by repeating `PUT .../auth/approle/login → Code: 400 ... invalid role or secret ID`
+with growing backoff. Token *renewals* had been succeeding up to that point.
+
+**Cause:** the agent's `secret_id` became invalid server-side (expiry, clock skew, or a Vault
+re-init), and the agent reached a point where it had to do a full `approle/login`. With the
+old `token_ttl`/`token_max_ttl` design this happened on every `token_max_ttl` cycle; the
+switch to **periodic tokens** (§4) removes re-login from steady state, so a *running* agent no
+longer hits this — but a **restarted** agent still needs a valid `secret_id`.
+
+**Recovery:**
+
+```bash
+# Mint fresh secret_ids (vault-init's reconcile detects the invalid ones and replaces them)
+docker compose up -d --force-recreate vault-init
+docker wait vault-init
+# Restart the affected agents so they log in with the fresh secret_id
+docker compose restart vault-agent-gui vault-agent-cron vault-agent-llm
+```
+
+**Confirm root cause (read-only):**
+
+```bash
+ROOT=$(docker exec vault sh -c "grep -o '\"root_token\":\"[^\"]*\"' /vault/file/unseal-keys.json | cut -d: -f2 | tr -d '\"'")
+docker exec -e VAULT_TOKEN=$ROOT -e VAULT_ADDR=http://127.0.0.1:8200 vault \
+  vault read auth/approle/role/gui-service          # expect token_period set, secret_id_ttl=0
+echo "host: $(date -u)"; docker exec vault date -u  # check for WSL2/Docker clock drift
+```
diff --git a/vault-init.sh b/vault-init.sh
index 164d8bdc..0e759f8e 100644
--- a/vault-init.sh
+++ b/vault-init.sh
@@ -68,6 +68,27 @@ reconcile_secret_id() {
     fi
 }
 
+# Create or update an AppRole that issues a PERIODIC token (no max_ttl): the
+# agent renews it forever and never re-runs approle/login in steady state.
+# secret_id_ttl=0 + secret_id_num_uses=0 keep the secret_id valid across
+# restarts. Idempotent: does not invalidate existing secret_ids, safe per run.
+# Usage: upsert_approle <role-name> <policy-name> <token-period>
+upsert_approle() {
+    role="$1"; policy="$2"; period="$3"
+    wget -q -O- --post-data='{"token_policies":["'"$policy"'"],"token_period":"'"$period"'","token_num_uses":0,"secret_id_ttl":"0","secret_id_num_uses":0,"bind_secret_id":true}' \
+        --header="X-Vault-Token: $ROOT_TOKEN" \
+        --header='Content-Type: application/json' \
+        "$VAULT_ADDR/v1/auth/approle/role/$role" >/dev/null
+}
+
+# Apply the current AppRole definitions for all three services.
+ensure_approles() {
+    echo "Ensuring AppRole configs (periodic tokens)..."
+    upsert_approle "gui-service"               "gui-policy"                 "20m"
+    upsert_approle "cron-manager-service"      "cron-manager-policy"        "30m"
+    upsert_approle "llm-orchestration-service" "llm-orchestration-policy"   "1h"
+}
+
 # Wait for Vault to be ready
 echo "Waiting for Vault..."
 for i in $(seq 1 30); do
@@ -175,27 +196,9 @@ path "auth/token/lookup-self" { capabilities = ["read"] }'
         --header='Content-Type: application/json' \
         "$VAULT_ADDR/v1/sys/policies/acl/llm-orchestration-policy" >/dev/null
     
-    # Create GUI AppRole
-    echo "Creating gui-service AppRole..."
-    wget -q -O- --post-data='{"token_policies":["gui-policy"],"token_ttl":"15m","token_max_ttl":"1h","secret_id_ttl":"0","secret_id_num_uses":0,"bind_secret_id":true}' \
-        --header="X-Vault-Token: $ROOT_TOKEN" \
-        --header='Content-Type: application/json' \
-        "$VAULT_ADDR/v1/auth/approle/role/gui-service" >/dev/null
-    
-    # Create CronManager AppRole
-    echo "Creating cron-manager-service AppRole..."
-    wget -q -O- --post-data='{"token_policies":["cron-manager-policy"],"token_ttl":"30m","token_max_ttl":"8h","secret_id_ttl":"0","secret_id_num_uses":0,"bind_secret_id":true}' \
-        --header="X-Vault-Token: $ROOT_TOKEN" \
-        --header='Content-Type: application/json' \
-        "$VAULT_ADDR/v1/auth/approle/role/cron-manager-service" >/dev/null
-    
-    # Create LLM Orchestration AppRole
-    echo "Creating llm-orchestration-service AppRole..."
-    wget -q -O- --post-data='{"token_policies":["llm-orchestration-policy"],"token_ttl":"1h","token_max_ttl":"8h","secret_id_ttl":"0","secret_id_num_uses":0,"bind_secret_id":true}' \
-        --header="X-Vault-Token: $ROOT_TOKEN" \
-        --header='Content-Type: application/json' \
-        "$VAULT_ADDR/v1/auth/approle/role/llm-orchestration-service" >/dev/null
-    
+    # Create the three AppRoles (periodic tokens - see upsert_approle).
+    ensure_approles
+
     # Ensure credentials directory exists
     mkdir -p /agent/credentials
     
@@ -337,16 +340,19 @@ else
     # Get root token
     ROOT_TOKEN=$(grep -o '"root_token":"[^"]*"' "$UNSEAL_KEYS_FILE" | cut -d':' -f2 | tr -d '"')
     export VAULT_TOKEN="$ROOT_TOKEN"
-    
+
+    # Re-apply AppRole definitions so config changes (e.g. periodic tokens)
+    # take effect on redeploy without re-initializing Vault. Idempotent and
+    # does not invalidate existing secret_ids.
+    ensure_approles
+
     # Ensure credentials directory exists
     mkdir -p /agent/credentials
     
     # Reconcile secret_ids: reuse the existing one if it still authenticates,
-    # mint a new one only if it is invalid or missing. This keeps a single
-    # long-lived secret_id stable across normal restarts (secret_id_ttl=0,
-    # secret_id_num_uses=0), instead of rotating it every boot.
-    # ensure_role_id (called inside reconcile_secret_id) guarantees the role_id
-    # file exists before validation, since validation needs both.
+    # mint a new one only if invalid or missing - keeps one stable secret_id
+    # across restarts instead of rotating every boot. reconcile_secret_id also
+    # ensures the role_id file exists first (validation needs both).
     reconcile_secret_id "gui-service"                 /agent/credentials/gui_role_id  /agent/credentials/gui_secret_id
     reconcile_secret_id "cron-manager-service"        /agent/credentials/cron_role_id /agent/credentials/cron_secret_id
     reconcile_secret_id "llm-orchestration-service"   /agent/credentials/llm_role_id  /agent/credentials/llm_secret_id
diff --git a/vault/agents/cron/cron-agent.hcl b/vault/agents/cron/cron-agent.hcl
index f2db227e..9454c9b7 100644
--- a/vault/agents/cron/cron-agent.hcl
+++ b/vault/agents/cron/cron-agent.hcl
@@ -2,7 +2,9 @@
 # This agent provides CronManager with access to encryption keys and write access to secrets
 
 vault {
-  address = "http://vault:8200"
+  # Local testing: use rag-vault, not bare "vault" — that name collides with the
+  # ckb stack on the shared bykstack network and authenticates the wrong Vault.
+  address = "http://rag-vault:8200"
   retry {
     num_retries = 5
   }
@@ -42,6 +44,4 @@ listener "tcp" {
 # API proxy configuration
 api_proxy {
   use_auto_auth_token = true
-  enforce_consistency = "always"
-  when_inconsistent = "forward"
 }
diff --git a/vault/agents/gui/gui-agent.hcl b/vault/agents/gui/gui-agent.hcl
index a28db871..672d6d4d 100644
--- a/vault/agents/gui/gui-agent.hcl
+++ b/vault/agents/gui/gui-agent.hcl
@@ -2,7 +2,9 @@
 # This agent provides GUI with access to public encryption key only
 
 vault {
-  address = "http://vault:8200"
+  # Local testing: use rag-vault, not bare "vault" — that name collides with the
+  # ckb stack on the shared bykstack network and authenticates the wrong Vault.
+  address = "http://rag-vault:8200"
   retry {
     num_retries = 5
   }
@@ -42,6 +44,4 @@ listener "tcp" {
 # API proxy configuration
 api_proxy {
   use_auto_auth_token = true
-  enforce_consistency = "always"
-  when_inconsistent = "forward"
 }
diff --git a/vault/agents/llm/agent.hcl b/vault/agents/llm/agent.hcl
index d7237be7..1a575260 100644
--- a/vault/agents/llm/agent.hcl
+++ b/vault/agents/llm/agent.hcl
@@ -1,5 +1,7 @@
 vault {
-  address = "http://vault:8200"
+  # Local testing: use rag-vault, not bare "vault" — that name collides with the
+  # ckb stack on the shared bykstack network and authenticates the wrong Vault.
+  address = "http://rag-vault:8200"
   retry {
     num_retries = 5
   }
@@ -34,6 +36,4 @@ listener "tcp" {
 
 api_proxy {
   use_auto_auth_token = true
-  enforce_consistency = "always"
-  when_inconsistent = "forward"
 }

From e78f63286da27728a38c41c01d77a0eab39f9601 Mon Sep 17 00:00:00 2001
From: ruwinirathnamalala <ruwini.rathnamalala@rootcodelabs.com>
Date: Wed, 17 Jun 2026 12:21:50 +0530
Subject: [PATCH 4/8] Removed testmodel index and css

---
 GUI/src/components/MainNavigation/index.tsx |   8 +-
 GUI/src/pages/TestModel/TestLLM.scss        | 217 -------------------
 GUI/src/pages/TestModel/index.tsx           | 228 --------------------
 3 files changed, 1 insertion(+), 452 deletions(-)
 delete mode 100644 GUI/src/pages/TestModel/TestLLM.scss
 delete mode 100644 GUI/src/pages/TestModel/index.tsx

diff --git a/GUI/src/components/MainNavigation/index.tsx b/GUI/src/components/MainNavigation/index.tsx
index 265f464c..8ae278ca 100644
--- a/GUI/src/components/MainNavigation/index.tsx
+++ b/GUI/src/components/MainNavigation/index.tsx
@@ -44,13 +44,7 @@ const MainNavigation: FC = () => {
       label: t('menu.testLLM'),
       path: '/test-llm',
       icon: <MdSearch />
-    },
-    // {
-    //   id: 'testProductionLLM',
-    //   label: t('menu.testProductionLLM'),
-    //   path: '/test-production-llm',
-    //   icon: <MdSearch />
-    // }
+    }
   ];
 
   const filterItemsByRole = (role: string[], items: MenuItem[]) => {
diff --git a/GUI/src/pages/TestModel/TestLLM.scss b/GUI/src/pages/TestModel/TestLLM.scss
deleted file mode 100644
index 35bced8d..00000000
--- a/GUI/src/pages/TestModel/TestLLM.scss
+++ /dev/null
@@ -1,217 +0,0 @@
-// .testModalFormTextArea {
-//   margin-top: 30px;
-// }
-
-// .mcq-buttons {
-//   display: flex;
-//   flex-wrap: wrap;
-//   gap: 0.75rem;
-//   margin-top: 1rem;
-// }
-
-// .testModalClassifyButton {
-//   text-align: right;
-//   margin-top: 20px;
-// }
-
-// .llm-connection-section {
-//   width: 50%;
-// }
-
-// .llm-connection-controls {
-//   display: flex;
-//   gap: 1rem;
-//   align-items: center;
-// }
-
-// .inference-results-container {
-//   max-width: 100%;
-//   background-color: #d7efff;
-//   padding: 20px;
-//   border-radius: 8px;
-//   margin-top: 20px;
-  
-//   .result-item {
-//     margin-bottom: 15px;
-    
-//     strong {
-//       color: #333;
-//     }
-//   }
-  
-//   .response-content {
-//     margin-top: 8px;
-//     padding: 12px;
-//     background-color: #f5f5f5;
-//     border-radius: 4px;
-//     white-space: pre-wrap;
-//     line-height: 1.5;
-//     color: #555;
-//   }
-
-//   .context-section {
-//     margin-top: 20px;
-
-//     .context-list {
-//       display: flex;
-//       flex-direction: column;
-//       gap: 12px;
-//       margin-top: 8px;
-//     }
-
-//     .context-item {
-//       padding: 12px;
-//       background-color: #ffffff;
-//       border: 1px solid #e0e0e0;
-//       border-radius: 6px;
-//       box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
-
-//       .context-rank {
-//         margin-bottom: 8px;
-//         padding-bottom: 4px;
-//         border-bottom: 1px solid #f0f0f0;
-
-//         strong {
-//           color: #2563eb;
-//           font-size: 0.875rem;
-//           font-weight: 600;
-//         }
-//       }
-
-//       .context-content {
-//         color: #374151;
-//         line-height: 1.5;
-//         font-size: 0.9rem;
-//         white-space: pre-wrap;
-//       }
-//     }
-//   }
-// }
-
-// .testModalList {
-//   list-style: disc;
-//   margin-left: 30px;
-// }
-
-// .mt-20 {
-//   margin-top: 20px;
-// }
-
-// .classification-results {
-//   margin-top: 1rem;
-//   padding: 1rem;
-//   border: 1px solid #e0e0e0;
-//   border-radius: 8px;
-//   background-color: #f9f9f9;
-
-//   h3 {
-//     margin: 0 0 1rem 0;
-//     color: #333;
-//   }
-
-//   h4 {
-//     margin: 0 0 0.75rem 0;
-//     color: #555;
-//     font-size: 1rem;
-//   }
-
-//   .results-container {
-//     display: flex;
-//     flex-direction: column;
-//     gap: 1.5rem;
-//   }
-
-//   .top-prediction {
-//     .prediction-card {
-//       display: flex;
-//       justify-content: space-between;
-//       align-items: center;
-//       padding: 1rem;
-//       border-radius: 8px;
-//       background-color: #e8f5e8;
-//       border: 2px solid #4caf50;
-
-//       .agency-name {
-//         font-weight: 600;
-//         color: #2e7d32;
-//         font-size: 1.1rem;
-//       }
-
-//       .confidence-score {
-//         font-weight: 700;
-//         color: #2e7d32;
-//         font-size: 1.2rem;
-//       }
-//     }
-//   }
-
-//   .predictions-list {
-//     display: flex;
-//     flex-direction: column;
-//     gap: 0.75rem;
-
-//     .prediction-item {
-//       display: flex;
-//       align-items: center;
-//       gap: 1rem;
-//       padding: 0.75rem;
-//       background-color: white;
-//       border-radius: 6px;
-//       border: 1px solid #ddd;
-
-//       &.highest {
-//         border-color: #4caf50;
-//         background-color: #f8fff8;
-//       }
-
-//       .rank {
-//         font-weight: 600;
-//         color: #666;
-//         min-width: 2rem;
-//       }
-
-//       .agency-info {
-//         flex: 1;
-//         display: flex;
-//         flex-direction: column;
-//         gap: 0.25rem;
-
-//         .agency-name {
-//           font-weight: 500;
-//           color: #333;
-//         }
-
-//         .confidence-bar-container {
-//           width: 100%;
-//           height: 4px;
-//           background-color: #e0e0e0;
-//           border-radius: 2px;
-//           overflow: hidden;
-
-//           .confidence-bar {
-//             height: 100%;
-//             background-color: #4caf50;
-//             transition: width 0.3s ease;
-//           }
-//         }
-//       }
-
-//       .confidence-percentage {
-//         font-weight: 600;
-//         color: #555;
-//         min-width: 4rem;
-//         text-align: right;
-//       }
-//     }
-//   }
-// }
-
-// .classification-error {
-//   margin-top: 1rem;
-//   padding: 1rem;
-//   background-color: #ffebee;
-//   border: 1px solid #f44336;
-//   border-radius: 6px;
-//   color: #c62828;
-//   text-align: center;
-// }
\ No newline at end of file
diff --git a/GUI/src/pages/TestModel/index.tsx b/GUI/src/pages/TestModel/index.tsx
deleted file mode 100644
index 2fc116bd..00000000
--- a/GUI/src/pages/TestModel/index.tsx
+++ /dev/null
@@ -1,228 +0,0 @@
-import { useMutation, useQuery } from '@tanstack/react-query';
-import { Button, FormSelect, FormTextarea, Collapsible } from 'components';
-import CircularSpinner from 'components/molecules/CircularSpinner/CircularSpinner';
-import { ComponentPropsWithoutRef, FC, useState } from 'react';
-import { useTranslation } from 'react-i18next';
-import ReactMarkdown from 'react-markdown';
-import remarkGfm from 'remark-gfm';
-import './TestLLM.scss';
-import { useDialog } from 'hooks/useDialog';
-import { fetchLLMConnectionsPaginated, LegacyLLMConnectionFilters } from 'services/llmConnections';
-import { viewInferenceResult, InferenceRequest, InferenceResponse, ChoiceButton } from 'services/inference';
-import { llmConnectionsQueryKeys } from 'utils/queryKeys';
-import { ButtonAppearanceTypes } from 'enums/commonEnums';
-
-const TestLLM: FC = () => {
-  const { t } = useTranslation();
-  const { open: openDialog, close: closeDialog } = useDialog();
-  const [inferenceResult, setInferenceResult] = useState<InferenceResponse['response'] | null>(null);
-  const [pendingButtons, setPendingButtons] = useState<ChoiceButton[]>([]);
-  const [testLLM, setTestLLM] = useState({
-    connectionId: null,
-    text: '',
-  });
-
-  // Sort context by rank
-  const sortedContext = inferenceResult?.chunks?.toSorted((a, b) => a.rank - b.rank) ?? [];
-
-  // Fetch LLM connections for dropdown - using the working legacy endpoint for now
-  const { data: connections, isLoading: isLoadingConnections } = useQuery({
-    queryKey: llmConnectionsQueryKeys.list({
-      page: 1,
-      pageSize: 100, // Get all connections for dropdown
-      sorting: 'created_at desc',
-    }),
-    queryFn: () => fetchLLMConnectionsPaginated({
-      pageNumber: 1,
-      pageSize: 100,
-      sortBy: 'created_at desc',
-    }),
-  });
-
-  // Transform connections data for dropdown
-  const connectionOptions = connections?.map((connection: any) => ({
-    label: `${connection.llmPlatform} - ${connection.llmModel} (${connection.environment})`,
-    value: connection.id,
-  })) || [];
-
-  // Inference mutation
-  const inferenceMutation = useMutation({
-    mutationFn: (request: InferenceRequest) => viewInferenceResult(request),
-    onSuccess: (data: InferenceResponse) => {
-      setInferenceResult(data?.response);
-      setPendingButtons(data?.response?.buttons ?? []);
-    },
-    onError: (error: any) => {
-      console.error('Error getting inference result:', error);
-      openDialog({
-        title: t('testModels.inferenceErrorTitle') || 'Inference Error',
-        content: <p>{t('testModels.inferenceErrorMessage') || 'Failed to get inference result. Please try again.'}</p>,
-        footer: (
-          <Button
-            appearance={ButtonAppearanceTypes.PRIMARY}
-            onClick={closeDialog}
-          >
-            {t('testModels.closeButton') || 'Close'}
-          </Button>
-        ),
-      });
-    },
-  });
-
-  const handleSend = () => {
-    if (testLLM.connectionId && testLLM.text) {
-      inferenceMutation.mutate({
-        llmConnectionId: Number(testLLM.connectionId),
-        message: testLLM.text,
-      });
-    }
-  };
-
-  const handleButtonClick = (payload: string) => {
-    if (!testLLM.connectionId) return;
-    setPendingButtons([]);
-    inferenceMutation.mutate({
-      llmConnectionId: Number(testLLM.connectionId),
-      message: payload,
-    });
-  };
-
-  const handleChange = (key: string, value: string | number) => {
-    // Prevent changes while inference is loading
-    if (inferenceMutation.isLoading) {
-      return;
-    }
-    setTestLLM((prev) => ({
-      ...prev,
-      [key]: value,
-    }));
-  };
-
-  const markdownComponents = {
-    ol: ({children}: any) => (
-      <ol style={{ paddingLeft: '1.5rem', listStyleType: 'decimal' }}>
-        {children}
-      </ol>
-    ),
-    a: (props: ComponentPropsWithoutRef<"a">) => (
-      <a {...props} target="_blank" rel="noopener noreferrer" />
-    ),
-  };
-
-  return (
-    <div>
-      {isLoadingConnections ? (
-        <CircularSpinner />
-      ) : (
-        <div className="container">
-          <div className="title_container">
-            <div className="title">{t('testModels.title') || 'Test LLM'}</div>
-          </div>
-          <div className="llm-connection-section">
-            <p>{t('testModels.llmConnectionLabel') || 'LLM Connection'}</p>
-            <div className="llm-connection-controls">
-
-              <FormSelect
-                label=""
-                name="connectionId"
-                options={connectionOptions}
-                placeholder={t('testModels.selectConnectionPlaceholder') || 'Select LLM Connection'}
-                onSelectionChange={(selection) => {
-                  handleChange('connectionId', selection?.value as string);
-                }}
-                value={testLLM?.connectionId === null ? t('testModels.connectionNotExist') || 'Connection does not exist' : undefined}
-                defaultValue={testLLM?.connectionId ?? undefined}
-                disabled={inferenceMutation.isLoading}
-              />
-            </div>
-          </div>
-
-          <div className="testModalFormTextArea">
-            <p>{t('testModels.classifyTextLabel') || 'Enter text to test'}</p>
-            <FormTextarea
-              label=""
-              name=""
-              maxLength={1000}
-              maxRows={15}
-              onChange={(e) => handleChange('text', e.target.value)}
-              showMaxLength={true}
-            />
-          </div>
-          <div className="testModalClassifyButton">
-            <Button
-              onClick={handleSend}
-              disabled={!testLLM.connectionId || !testLLM.text || inferenceMutation.isLoading}
-            >
-              {inferenceMutation.isLoading ? t('testModels.sendingButton') || 'Sending...' : t('testModels.sendButton') || 'Send'}
-            </Button>
-          </div>
-
-          {/* Inference Result */}
-
-          {inferenceResult && !inferenceMutation.isLoading && (
-            <div className="inference-results-container">
-              <div className="result-item">
-                <strong>Response:</strong>
-                <div className="response-content">
-                  <ReactMarkdown remarkPlugins={[remarkGfm]} components={markdownComponents}>
-                    {inferenceResult.content}
-                  </ReactMarkdown>
-                </div>
-              </div>
-
-              {/* MCQ Buttons */}
-              {pendingButtons.length > 0 && (
-                <div className="mcq-buttons">
-                  {pendingButtons.map((btn) => (
-                    <Button
-                      key={btn.payload}
-                      appearance={ButtonAppearanceTypes.SECONDARY}
-                      onClick={() => handleButtonClick(btn.payload)}
-                      disabled={inferenceMutation.isLoading}
-                    >
-                      {btn.title}
-                    </Button>
-                  ))}
-                </div>
-              )}
-
-              {/* Context Section */}
-              {
-                sortedContext && sortedContext?.length > 0 && (
-                  <div className="context-section">
-                    <Collapsible title={`Context (${sortedContext?.length} chunks)`} defaultOpen={false}>
-                      <div className="context-list">
-                        {sortedContext?.map((contextItem, index) => (
-                          <div key={index} className="context-item">
-                            <div className="context-rank">
-                              <strong>Rank {contextItem.rank}</strong>
-                            </div>
-                            <div className="context-content">
-                              <ReactMarkdown remarkPlugins={[remarkGfm]} components={markdownComponents}>
-                                {contextItem.chunkRetrieved}
-                              </ReactMarkdown>
-                            </div>
-                          </div>
-                        ))}
-                      </div>
-                    </Collapsible>
-                  </div>
-                )
-              }
-
-            </div>
-          )}
-
-          {/* Error State */}
-          {inferenceMutation.isError && (
-            <div className="classification-error">
-              <p>{t('testModels.classificationFailed') || 'Inference failed. Please try again.'}</p>
-            </div>
-          )}
-        </div>
-      )}
-    </div>
-  );
-};
-
-export default TestLLM;
\ No newline at end of file

From cebcabe056b412dbae0708b2798f9e72506096f3 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Thu, 18 Jun 2026 03:42:50 +0530
Subject: [PATCH 5/8] fixed vector indexer statistics analysis issue

---
 src/vector_indexer/contextual_processor.py | 14 ++--
 src/vector_indexer/error_logger.py         |  4 +-
 src/vector_indexer/main_indexer.py         | 80 ++++++++++++++--------
 src/vector_indexer/models.py               |  8 +++
 4 files changed, 72 insertions(+), 34 deletions(-)

diff --git a/src/vector_indexer/contextual_processor.py b/src/vector_indexer/contextual_processor.py
index b225cf30..6b21d326 100644
--- a/src/vector_indexer/contextual_processor.py
+++ b/src/vector_indexer/contextual_processor.py
@@ -41,7 +41,7 @@ def __init__(
 
     async def process_document(
         self, document: ProcessingDocument
-    ) -> List[ContextualChunk]:
+    ) -> tuple[List[ContextualChunk], int]:
         """
         Process single document into contextual chunks.
 
@@ -49,7 +49,8 @@ async def process_document(
             document: Document to process
 
         Returns:
-            List of contextual chunks with embeddings
+            Tuple of (contextual chunks with embeddings, number of chunks
+            dropped due to context-generation failure)
         """
         logger.info(
             f"Processing document {document.document_hash} ({len(document.content)} characters)"
@@ -69,11 +70,13 @@ async def process_document(
             # Step 3: Create contextual chunks (filter out failed context generations)
             contextual_chunks: List[ContextualChunk] = []
             valid_contextual_contents: List[str] = []
+            failed_chunks = 0
 
             for i, (base_chunk, context) in enumerate(
                 zip(base_chunks, contexts, strict=True)
             ):
                 if isinstance(context, Exception):
+                    failed_chunks += 1
                     self.error_logger.log_context_generation_failure(
                         document.document_hash, i, str(context), self.config.max_retries
                     )
@@ -128,7 +131,7 @@ async def process_document(
                 logger.error(
                     f"No valid chunks created for document {document.document_hash}"
                 )
-                return []
+                return [], failed_chunks
 
             # Step 4: Create embeddings for all valid contextual chunks
             try:
@@ -154,9 +157,10 @@ async def process_document(
                 raise
 
             logger.info(
-                f"Successfully processed document {document.document_hash}: {len(contextual_chunks)} chunks"
+                f"Successfully processed document {document.document_hash}: "
+                f"{len(contextual_chunks)} chunks ({failed_chunks} dropped)"
             )
-            return contextual_chunks
+            return contextual_chunks, failed_chunks
 
         except Exception as e:
             logger.error(
diff --git a/src/vector_indexer/error_logger.py b/src/vector_indexer/error_logger.py
index 1d11cba1..c62de79c 100644
--- a/src/vector_indexer/error_logger.py
+++ b/src/vector_indexer/error_logger.py
@@ -158,15 +158,17 @@ def log_processing_stats(self, stats: ProcessingStats) -> None:
                 stats_dict["end_time"] = stats.end_time.isoformat()
             stats_dict["duration"] = stats.duration
             stats_dict["success_rate"] = stats.success_rate
+            stats_dict["chunk_success_rate"] = stats.chunk_success_rate
 
             with open(self.config.stats_log_file, "w", encoding="utf-8") as f:
                 json.dump(stats_dict, f, indent=2)
 
             logger.info(
                 f"Processing completed - Success rate: {stats.success_rate:.1%}, "
+                f"Chunk success rate: {stats.chunk_success_rate:.1%}, "
                 f"Duration: {stats.duration}, "
                 f"Processed: {stats.documents_processed}/{stats.total_documents} documents, "
-                f"Chunks: {stats.total_chunks_processed}"
+                f"Chunks: {stats.total_chunks_processed} ok / {stats.total_chunks_failed} failed"
             )
         except Exception as e:
             logger.error(f"Failed to write stats log: {e}")
diff --git a/src/vector_indexer/main_indexer.py b/src/vector_indexer/main_indexer.py
index 45ce5ff6..bf407682 100644
--- a/src/vector_indexer/main_indexer.py
+++ b/src/vector_indexer/main_indexer.py
@@ -15,7 +15,7 @@
 sys.path.append(str(Path(__file__).parent.parent))
 
 from vector_indexer.config.config_loader import ConfigLoader
-from vector_indexer.document_loader import DocumentLoader
+from vector_indexer.document_loader import DocumentLoader, DocumentLoadError
 from vector_indexer.contextual_processor import ContextualProcessor
 from vector_indexer.qdrant_manager import QdrantManager
 from vector_indexer.error_logger import ErrorLogger
@@ -169,7 +169,7 @@ async def process_all_documents(self) -> ProcessingStats:
 
                 # Process documents with controlled concurrency
                 semaphore = asyncio.Semaphore(self.config.max_concurrent_documents)
-                tasks: List[asyncio.Task[tuple[int, str]]] = []
+                tasks: List[asyncio.Task[tuple[int, str, int]]] = []
 
                 for doc_info in documents:
                     task = asyncio.create_task(
@@ -189,6 +189,9 @@ async def process_all_documents(self) -> ProcessingStats:
                 chunks_info: Dict[
                     str, Dict[str, Any]
                 ] = {}  # Track chunk counts for metadata update
+                # Only documents that processed successfully are marked as
+                # processed in DVC tracking, so failures are retried next run.
+                processed_documents: List[DocumentInfo] = []
                 for i, result in enumerate(results):
                     if isinstance(result, Exception):
                         doc_info = documents[i]
@@ -200,16 +203,18 @@ async def process_all_documents(self) -> ProcessingStats:
                             doc_info.document_hash, str(result)
                         )
                     else:
-                        # Result should be tuple of (chunk_count, content_hash)
+                        # Result should be tuple of (chunk_count, content_hash, failed_chunks)
                         doc_info = documents[i]
                         self.stats.documents_processed += 1
-                        if isinstance(result, tuple) and len(result) == 2:
-                            chunk_count, content_hash = result
+                        processed_documents.append(doc_info)
+                        if isinstance(result, tuple) and len(result) == 3:
+                            chunk_count, content_hash, failed_chunks = result
                             self.stats.total_chunks_processed += chunk_count
+                            self.stats.total_chunks_failed += failed_chunks
                             # Track chunk count using content_hash (not directory hash)
                             chunks_info[content_hash] = {"chunk_count": chunk_count}
                             logger.info(
-                                f"CHUNK COUNT: Document {doc_info.document_hash[:12]}... (content: {content_hash[:12]}...) -> {chunk_count} chunks"
+                                f"CHUNK COUNT: Document {doc_info.document_hash[:12]}... (content: {content_hash[:12]}...) -> {chunk_count} chunks ({failed_chunks} failed)"
                             )
 
                 # Log the complete chunks_info dictionary
@@ -227,10 +232,10 @@ async def process_all_documents(self) -> ProcessingStats:
                 # Step 4: Update processed files tracking (even if no new documents processed)
                 if diff_detector:
                     try:
-                        # Update metadata for newly processed files
-                        if documents:
+                        # Update metadata for newly processed files (successful only)
+                        if processed_documents:
                             processed_paths = [
-                                doc.cleaned_txt_path for doc in documents
+                                doc.cleaned_txt_path for doc in processed_documents
                             ]
                             if processed_paths:
                                 logger.debug(
@@ -290,7 +295,7 @@ async def _process_single_document(
         doc_info: DocumentInfo,
         qdrant_manager: QdrantManager,
         semaphore: asyncio.Semaphore,
-    ) -> tuple[int, str]:
+    ) -> tuple[int, str, int]:
         """
         Process a single document with contextual retrieval.
 
@@ -300,7 +305,9 @@ async def _process_single_document(
             semaphore: Concurrency control semaphore
 
         Returns:
-            tuple: (chunk_count: int, content_hash: str) or Exception on error
+            tuple: (chunk_count: int, content_hash: str, failed_chunks: int).
+            Raises on any failure (including load failure or zero usable chunks),
+            so the document is counted as failed rather than as success.
         """
         async with semaphore:
             logger.info(f"Processing document: {doc_info.document_hash}")
@@ -310,29 +317,31 @@ async def _process_single_document(
                 document = self.document_loader.load_document(doc_info)
 
                 if not document:
-                    logger.warning(f"Could not load document: {doc_info.document_hash}")
-                    return (0, doc_info.document_hash)
+                    raise DocumentLoadError(
+                        f"Could not load document: {doc_info.document_hash}"
+                    )
 
                 # Process document with contextual retrieval
-                contextual_chunks = await self.contextual_processor.process_document(
-                    document
-                )
+                (
+                    contextual_chunks,
+                    failed_chunks,
+                ) = await self.contextual_processor.process_document(document)
 
                 if not contextual_chunks:
-                    logger.warning(
-                        f"No chunks created for document: {doc_info.document_hash}"
+                    raise RuntimeError(
+                        f"No chunks created for document: {doc_info.document_hash} "
+                        f"({failed_chunks} chunks failed context generation)"
                     )
-                    return (0, document.document_hash)
 
                 # Store chunks in Qdrant
                 await qdrant_manager.store_chunks(contextual_chunks)
 
                 logger.info(
                     f"Successfully processed document {doc_info.document_hash}: "
-                    f"{len(contextual_chunks)} chunks"
+                    f"{len(contextual_chunks)} chunks ({failed_chunks} dropped)"
                 )
 
-                return (len(contextual_chunks), document.document_hash)
+                return (len(contextual_chunks), document.document_hash, failed_chunks)
 
             except Exception as e:
                 logger.error(f"Error processing document {doc_info.document_hash}: {e}")
@@ -352,10 +361,12 @@ def _log_final_summary(self) -> None:
         logger.info(f"   • Failed Chunks: {self.stats.total_chunks_failed}")
 
         if self.stats.total_documents > 0:
-            success_rate = (
-                self.stats.documents_processed / self.stats.total_documents
-            ) * 100
-            logger.info(f"Success Rate: {success_rate:.1f}%")
+            logger.info(f"Success Rate: {self.stats.success_rate * 100:.1f}%")
+
+        if self.stats.total_chunks_processed + self.stats.total_chunks_failed > 0:
+            logger.info(
+                f"Chunk Success Rate: {self.stats.chunk_success_rate * 100:.1f}%"
+            )
 
         logger.info(f"Processing Duration: {self.stats.duration}")
 
@@ -365,6 +376,11 @@ def _log_final_summary(self) -> None:
             )
             logger.info("Check failure logs for details")
 
+        if self.stats.total_chunks_failed > 0:
+            logger.warning(
+                f"  {self.stats.total_chunks_failed} chunks failed processing"
+            )
+
     async def run_health_check(self) -> bool:
         """
         Run health check on all components.
@@ -617,12 +633,20 @@ async def _execute_cleanup_operations(
         return total_deleted
 
     def _cleanup_datasets(self) -> None:
-        """Remove datasets folder after processing."""
+        """Remove datasets folder contents after processing.
+
+        Only the folder's contents are removed, not the folder itself, since
+        the datasets path is a mounted volume in the container.
+        """
         try:
             datasets_path = Path(self.config.dataset_base_path)
             if datasets_path.exists():
-                shutil.rmtree(str(datasets_path))
-                logger.info(f"Datasets folder cleaned up: {datasets_path}")
+                for child in datasets_path.iterdir():
+                    if child.is_dir():
+                        shutil.rmtree(str(child))
+                    else:
+                        child.unlink()
+                logger.info(f"Datasets folder contents cleaned up: {datasets_path}")
             else:
                 logger.debug(f"Datasets folder does not exist: {datasets_path}")
         except Exception as e:
diff --git a/src/vector_indexer/models.py b/src/vector_indexer/models.py
index 752ea02a..41ae1ce1 100644
--- a/src/vector_indexer/models.py
+++ b/src/vector_indexer/models.py
@@ -96,6 +96,14 @@ def success_rate(self) -> float:
             return self.documents_processed / self.total_documents
         return 0.0
 
+    @property
+    def chunk_success_rate(self) -> float:
+        """Calculate chunk success rate (processed vs processed + failed)."""
+        total_chunks = self.total_chunks_processed + self.total_chunks_failed
+        if total_chunks > 0:
+            return self.total_chunks_processed / total_chunks
+        return 0.0
+
 
 class ProcessingError(BaseModel):
     """Error information for failed processing."""

From 8b3d232f9ff080df6c4db88e57f8518cad05ce72 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Thu, 18 Jun 2026 09:55:25 +0530
Subject: [PATCH 6/8] fixed vault packages issue

---
 Dockerfile.vault-init  | 10 ++++++++++
 docker-compose-ec2.yml |  6 ++++--
 docker-compose.yml     |  6 ++++--
 3 files changed, 18 insertions(+), 4 deletions(-)
 create mode 100644 Dockerfile.vault-init

diff --git a/Dockerfile.vault-init b/Dockerfile.vault-init
new file mode 100644
index 00000000..7743fa6e
--- /dev/null
+++ b/Dockerfile.vault-init
@@ -0,0 +1,10 @@
+FROM hashicorp/vault:1.20.3
+
+# Bake the only CLI tools vault-init.sh actually needs (jq + openssl) so container
+# startup never depends on the Alpine CDN. Previously these were installed via
+# `apk add` on every boot, which failed intermittently on EC2. Retry guards the
+# one-time build against a transient mirror hiccup.
+RUN for i in 1 2 3; do \
+        apk add --no-cache jq openssl && break; \
+        echo "apk add failed (attempt $i), retrying..."; sleep 3; \
+    done
diff --git a/docker-compose-ec2.yml b/docker-compose-ec2.yml
index 136c097c..c20acca8 100644
--- a/docker-compose-ec2.yml
+++ b/docker-compose-ec2.yml
@@ -516,7 +516,10 @@ services:
       start_period: 10s
 
   vault-init:
-    image: hashicorp/vault:1.20.3
+    build:
+      context: .
+      dockerfile: Dockerfile.vault-init
+    image: rag-vault-init:1.20.3
     container_name: vault-init
     user: "0"
     depends_on:
@@ -538,7 +541,6 @@ services:
     command:
       - -c
       - |
-        apk add --no-cache curl jq uuidgen openssl
         # Create and set permissions for all agent directories
         mkdir -p /agent/credentials /agent/gui-token /agent/cron-token /agent/llm-token /agent/out
         chown -R vault:vault /agent/credentials /agent/gui-token /agent/cron-token /agent/llm-token /agent/out
diff --git a/docker-compose.yml b/docker-compose.yml
index 3e6cfba2..29f8139d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -463,7 +463,10 @@ services:
       start_period: 10s
 
   vault-init:
-    image: hashicorp/vault:1.20.3
+    build:
+      context: .
+      dockerfile: Dockerfile.vault-init
+    image: rag-vault-init:1.20.3
     container_name: vault-init
     user: "0"
     depends_on:
@@ -485,7 +488,6 @@ services:
     command:
       - -c
       - |
-        apk add --no-cache curl jq uuidgen openssl
         # Create and set permissions for all agent directories
         mkdir -p /agent/credentials /agent/gui-token /agent/cron-token /agent/llm-token /agent/out
         chown -R vault:vault /agent/credentials /agent/gui-token /agent/cron-token /agent/llm-token /agent/out

From 5cc9c82a2fe1e5e708ab4343cbe92fd4c679e5d9 Mon Sep 17 00:00:00 2001
From: ruwinirathnamalala <ruwini.rathnamalala@rootcodelabs.com>
Date: Thu, 18 Jun 2026 12:32:57 +0530
Subject: [PATCH 7/8] Issue fix for test llm connections displaying production
 llm

---
 .../get-all-llm-connections-paginated.sql     | 45 ++++++++++
 .../POST/get-llm-connections-paginated.sql    |  2 +-
 .../rag-search/GET/llm-connections/all.yml    | 84 +++++++++++++++++++
 GUI/src/pages/TestProductionLLM/index.tsx     |  4 +-
 GUI/src/services/llmConnections.ts            | 16 ++++
 GUI/src/utils/endpoints.ts                    |  1 +
 6 files changed, 149 insertions(+), 3 deletions(-)
 create mode 100644 DSL/Resql/rag-search/POST/get-all-llm-connections-paginated.sql
 create mode 100644 DSL/Ruuter.private/rag-search/GET/llm-connections/all.yml

diff --git a/DSL/Resql/rag-search/POST/get-all-llm-connections-paginated.sql b/DSL/Resql/rag-search/POST/get-all-llm-connections-paginated.sql
new file mode 100644
index 00000000..cb1e2394
--- /dev/null
+++ b/DSL/Resql/rag-search/POST/get-all-llm-connections-paginated.sql
@@ -0,0 +1,45 @@
+SELECT 
+    id,
+    vault_uuid,
+    connection_name,
+    llm_platform,
+    llm_model,
+    embedding_platform,
+    embedding_model,
+    monthly_budget,
+    warn_budget_threshold,
+    stop_budget_threshold,
+    disconnect_on_budget_exceed,
+    used_budget,
+    environment,
+    connection_status,
+    created_at,
+    CEIL(COUNT(*) OVER() / :page_size::DECIMAL) AS totalPages,
+    CASE 
+        WHEN used_budget IS NULL OR used_budget = 0 OR (used_budget::DECIMAL / monthly_budget::DECIMAL) < (warn_budget_threshold::DECIMAL / 100.0) THEN 'within_budget'
+        WHEN stop_budget_threshold != 0 AND (used_budget::DECIMAL / monthly_budget::DECIMAL) >= (stop_budget_threshold::DECIMAL / 100.0) THEN 'over_budget'
+        WHEN stop_budget_threshold = 0 AND (used_budget::DECIMAL / monthly_budget::DECIMAL) >= 1 THEN 'over_budget'
+        WHEN (used_budget::DECIMAL / monthly_budget::DECIMAL) >= (warn_budget_threshold::DECIMAL / 100.0) THEN 'close_to_exceed'
+        ELSE 'within_budget'
+    END AS budget_status
+FROM rag_search.llm_connections
+WHERE connection_status <> 'deleted'
+    -- AND environment = 'testing'
+    AND (:llm_platform IS NULL OR :llm_platform = '' OR llm_platform = :llm_platform)
+    AND (:llm_model IS NULL OR :llm_model = '' OR llm_model = :llm_model)
+    AND (:environment IS NULL OR :environment = '' OR environment = :environment)
+ORDER BY
+    CASE WHEN :sorting = 'connection_name asc' THEN connection_name END ASC,
+    CASE WHEN :sorting = 'connection_name desc' THEN connection_name END DESC,
+    CASE WHEN :sorting = 'llm_platform asc' THEN llm_platform END ASC,
+    CASE WHEN :sorting = 'llm_platform desc' THEN llm_platform END DESC,
+    CASE WHEN :sorting = 'llm_model asc' THEN llm_model END ASC,
+    CASE WHEN :sorting = 'llm_model desc' THEN llm_model END DESC,
+    CASE WHEN :sorting = 'monthly_budget asc' THEN monthly_budget END ASC,
+    CASE WHEN :sorting = 'monthly_budget desc' THEN monthly_budget END DESC,
+    CASE WHEN :sorting = 'environment asc' THEN environment END ASC,
+    CASE WHEN :sorting = 'environment desc' THEN environment END DESC,
+    CASE WHEN :sorting = 'created_at asc' THEN created_at END ASC,
+    CASE WHEN :sorting = 'created_at desc' THEN created_at END DESC,
+    created_at DESC  -- Default fallback sorting
+OFFSET ((GREATEST(:page, 1) - 1) * :page_size) LIMIT :page_size;
\ No newline at end of file
diff --git a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql
index d4c15efb..922c16ec 100644
--- a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql
+++ b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql
@@ -24,7 +24,7 @@ SELECT
     END AS budget_status
 FROM rag_search.llm_connections
 WHERE connection_status <> 'deleted'
-    -- AND environment = 'testing'
+    AND environment = 'testing'
     AND (:llm_platform IS NULL OR :llm_platform = '' OR llm_platform = :llm_platform)
     AND (:llm_model IS NULL OR :llm_model = '' OR llm_model = :llm_model)
     AND (:environment IS NULL OR :environment = '' OR environment = :environment)
diff --git a/DSL/Ruuter.private/rag-search/GET/llm-connections/all.yml b/DSL/Ruuter.private/rag-search/GET/llm-connections/all.yml
new file mode 100644
index 00000000..3b69aebd
--- /dev/null
+++ b/DSL/Ruuter.private/rag-search/GET/llm-connections/all.yml
@@ -0,0 +1,84 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Get paginated list of LLM connections"
+  method: get
+  accepts: json
+  returns: json
+  namespace: rag-search
+  allowlist:
+    params:
+      - field: pageNumber
+        type: number
+        description: "Page number (1-based)"
+      - field: pageSize
+        type: number
+        description: "Number of items per page"
+      - field: sortBy
+        type: string
+        description: "Field to sort by (e.g. 'llm_platform', 'created_at')"
+      - field: sortOrder
+        type: string
+        description: "Sort order: 'asc' or 'desc'"
+      - field: llmPlatform
+        type: string
+        description: "Filter by LLM platform"
+      - field: llmModel
+        type: string
+        description: "Filter by LLM model"
+      - field: environment
+        type: string
+        description: "Filter by deployment environment"
+
+extract_request_data:
+  assign:
+    pageNumber: ${Number(incoming.params.pageNumber) ?? 1}
+    pageSize: ${Number(incoming.params.pageSize) ?? 10}
+    sortBy: ${incoming.params.sortBy ?? "created_at"}
+    sortOrder: ${incoming.params.sortOrder ?? "desc"}
+    sorting: ${sortBy + " " + sortOrder}
+    llmPlatform: ${incoming.params.llmPlatform ?? ""}
+    llmModel: ${incoming.params.llmModel ?? ""}
+    environment: ${incoming.params.environment ?? ""}
+  next: validate_page_params
+
+validate_page_params:
+  switch:
+    - condition: ${pageNumber < 1}
+      next: return_invalid_page
+    - condition: ${pageSize < 1 || pageSize > 100}
+      next: return_invalid_page_size
+  next: get_llm_connections
+
+get_llm_connections:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/get-all-llm-connections-paginated"
+    body:
+      page: ${pageNumber}
+      page_size: ${pageSize}
+      sorting: ${sorting}
+      llm_platform: ${llmPlatform}
+      llm_model: ${llmModel}
+      environment: ${environment}
+  result: connections_result
+  next: transform_response
+
+transform_response:
+  assign:
+    response_data: ${connections_result.response.body}
+  next: return_success
+
+return_success:
+  return: ${response_data}
+  next: end
+
+return_invalid_page:
+  status: 400
+  return: "Page number must be greater than 0"
+  next: end
+
+return_invalid_page_size:
+  status: 400
+  return: "Page size must be between 1 and 100"
+  next: end
\ No newline at end of file
diff --git a/GUI/src/pages/TestProductionLLM/index.tsx b/GUI/src/pages/TestProductionLLM/index.tsx
index b9ba6be7..5f22d714 100644
--- a/GUI/src/pages/TestProductionLLM/index.tsx
+++ b/GUI/src/pages/TestProductionLLM/index.tsx
@@ -8,7 +8,7 @@ import { ChoiceButton } from 'services/inference';
 import './TestProductionLLM.scss';
 import MessageContent from 'components/MessageContent';
 import { llmConnectionsQueryKeys } from 'utils/queryKeys';
-import { fetchLLMConnectionsPaginated } from 'services/llmConnections';
+import { fetchAllLLMConnectionsPaginated } from 'services/llmConnections';
 
 
 interface Message {
@@ -45,7 +45,7 @@ const TestProductionLLM: FC = () => {
         pageSize: 100, // Get all connections for dropdown
         sorting: 'created_at desc',
       }),
-      queryFn: () => fetchLLMConnectionsPaginated({
+      queryFn: () => fetchAllLLMConnectionsPaginated({
         pageNumber: 1,
         pageSize: 100,
         sortBy: 'created_at desc',
diff --git a/GUI/src/services/llmConnections.ts b/GUI/src/services/llmConnections.ts
index 647addfc..cd07324e 100644
--- a/GUI/src/services/llmConnections.ts
+++ b/GUI/src/services/llmConnections.ts
@@ -378,3 +378,19 @@ export async function updateLLMConnectionStatus(
   });
   return data?.response;
 }
+
+export async function fetchAllLLMConnectionsPaginated(filters: LLMConnectionFilters): Promise<LLMConnection[]> {
+  const queryParams = new URLSearchParams();
+
+  if (filters.pageNumber) queryParams.append('pageNumber', filters.pageNumber.toString());
+  if (filters.pageSize) queryParams.append('pageSize', filters.pageSize.toString());
+  if (filters.sortBy) queryParams.append('sortBy', filters.sortBy);
+  if (filters.sortOrder) queryParams.append('sortOrder', filters.sortOrder);
+  if (filters.llmPlatform) queryParams.append('llmPlatform', filters.llmPlatform);
+  if (filters.llmModel) queryParams.append('llmModel', filters.llmModel);
+  if (filters.environment) queryParams.append('environment', filters.environment);
+
+  const url = `${llmConnectionsEndpoints.FETCH_ALL_LLM_CONNECTIONS_PAGINATED()}?${queryParams.toString()}`;
+  const { data } = await apiDev.get(url);
+  return data?.response;
+}
\ No newline at end of file
diff --git a/GUI/src/utils/endpoints.ts b/GUI/src/utils/endpoints.ts
index 386db296..30624914 100644
--- a/GUI/src/utils/endpoints.ts
+++ b/GUI/src/utils/endpoints.ts
@@ -15,6 +15,7 @@ export const authEndpoints = {
 
 export const llmConnectionsEndpoints = {
   FETCH_LLM_CONNECTIONS_PAGINATED: (): string => `/rag-search/llm-connections/list`,
+  FETCH_ALL_LLM_CONNECTIONS_PAGINATED: (): string => `/rag-search/llm-connections/all`,
   GET_LLM_CONNECTION: (): string => `/rag-search/llm-connections/get`,
   GET_PRODUCTION_CONNECTION: (): string => `/rag-search/llm-connections/production`,
   CREATE_LLM_CONNECTION: (): string => `/rag-search/llm-connections/add`,

From 2ab4c5101c5b3713d876c21d41ed1e1632a267b0 Mon Sep 17 00:00:00 2001
From: ruwinirathnamalala <ruwini.rathnamalala@rootcodelabs.com>
Date: Thu, 18 Jun 2026 13:53:19 +0530
Subject: [PATCH 8/8] Issue fix for test llm connections displaying production
 llm

---
 DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql
index d4c15efb..922c16ec 100644
--- a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql
+++ b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql
@@ -24,7 +24,7 @@ SELECT
     END AS budget_status
 FROM rag_search.llm_connections
 WHERE connection_status <> 'deleted'
-    -- AND environment = 'testing'
+    AND environment = 'testing'
     AND (:llm_platform IS NULL OR :llm_platform = '' OR llm_platform = :llm_platform)
     AND (:llm_model IS NULL OR :llm_model = '' OR llm_model = :llm_model)
     AND (:environment IS NULL OR :environment = '' OR environment = :environment)