diff --git a/content/develop/ai/agent-builder/_index.md b/content/develop/ai/agent-builder/_index.md
index 8b27ce90fa..45f87ce306 100644
--- a/content/develop/ai/agent-builder/_index.md
+++ b/content/develop/ai/agent-builder/_index.md
@@ -30,16 +30,17 @@ Redis powers these capabilities with fast, reliable data storage and retrieval t
 
 ## What you can build
 
-Choose from two types of intelligent agents:
+Choose from three types of intelligent agents:
 
 - **Recommendation engines**: Personalized product and content recommendations
 - **Conversational assistants**: Chatbots with memory and context awareness
+- **Knowledge assistants**: RAG agents that ingest documents, answer questions with citations, and use semantic caching
 
 The agent builder will generate complete, working code examples for your chosen agent type.
 
 ## Features
 
-- **Multiple programming languages**: Generate code in Python, with JavaScript (Node.js), Java, and C# coming soon
+- **Multiple programming languages**: Generate code in Python and JavaScript (Node.js), with Java and C# coming soon
 - **LLM integration**: Support for OpenAI, Anthropic Claude, and Llama 2
 - **Redis optimized**: Uses Redis data structures for optimal performance
 
diff --git a/layouts/shortcodes/agent-builder.html b/layouts/shortcodes/agent-builder.html
index 8adeffb96b..f95ec26073 100644
--- a/layouts/shortcodes/agent-builder.html
+++ b/layouts/shortcodes/agent-builder.html
@@ -24,6 +24,7 @@ <h3 class="text-2xl font-semibold text-redis-ink-900 mb-2">Build Your AI Agent</
           <div class="suggestion-chips">
             <button class="suggestion-chip" data-suggestion="recommendation">🛍️ Recommendation Engine</button>
             <button class="suggestion-chip" data-suggestion="conversational">💬 Conversational Assistant</button>
+            <button class="suggestion-chip" data-suggestion="rag">🔍 Knowledge Assistant</button>
           </div>
         </div>
       </div>
@@ -90,5 +91,7 @@ <h4 class="text-lg font-medium text-redis-ink-900 mb-4">Generated Agent Code</h4
 
 </div>
 
+<!-- Inject the Hugo-resolved base path for template file fetches -->
+<script>window.AGENT_TEMPLATE_BASE = "{{ "code/agent-templates" | relURL }}";</script>
 <!-- Load the agent builder JavaScript -->
 <script src="{{ "js/agent-builder.js" | relURL }}"></script>
diff --git a/static/code/agent-templates/javascript/rag_agent.js b/static/code/agent-templates/javascript/rag_agent.js
new file mode 100644
index 0000000000..3db0a8dc4e
--- /dev/null
+++ b/static/code/agent-templates/javascript/rag_agent.js
@@ -0,0 +1,496 @@
+/*
+ * Redis Knowledge Assistant (RAG Agent)
+ *
+ * Features:
+ *   - Ingest documents with automatic chunking and embedding
+ *   - Redis-native hybrid retrieval: text pre-filter + KNN vector search, with a fallback vector pass
+ *   - Semantic cache: skip LLM for similar queries (TTL-based expiry)
+ *   - Per-session conversation memory in a Redis List
+ *   - Citations: each answer references source documents with title, source URL, and chunk ID
+ *
+ * To run this code:
+ *   Install dependencies:
+ *     npm install redis openai
+ *
+ *   Set environment variables:
+ *     LLM_API_KEY=your_api_key_here
+ *     LLM_API_BASE_URL=your_base_url     (optional - default: ${CONFIG.models[formData.llmModel].baseUrl})
+ *     LLM_MODEL=your_model               (optional - default: ${CONFIG.models[formData.llmModel].defaultModel})
+ *     REDIS_URL=redis://host:port         (or REDIS_HOST / REDIS_PORT / REDIS_PASSWORD)
+ *
+ *   Note: this template uses the OpenAI SDK with a configurable base URL. It works with
+ *   OpenAI directly and with any provider that exposes an OpenAI-compatible API endpoint.
+ *
+ *   Run:
+ *     node rag_agent.js
+ *
+ *   Requires Redis Stack or Redis 8+ with Search module enabled.
+ */
+
+'use strict';
+
+const { createClient, SchemaFieldTypes, VectorAlgorithms } = require('redis');
+const OpenAI = require('openai');
+const readline = require('readline');
+const crypto = require('crypto');
+
+const CHUNK_SIZE = 500;
+const CHUNK_OVERLAP = 50;
+const MAX_SEARCH_RESULTS = 5;
+const MAX_HISTORY_TURNS = 6;
+const CACHE_TTL = 3600;
+// Cosine similarity threshold for cache hits. vector_distance for cosine is in [0, 2]:
+// 0 = identical, 1 = orthogonal, 2 = opposite. A hit fires when distance < (1 - threshold),
+// i.e. when cosine similarity > threshold. Verify this against your node-redis version.
+const CACHE_THRESHOLD = 0.92;
+const VECTOR_DIM = parseInt(process.env.VECTOR_DIM) || 1536;
+const EMBEDDING_MODEL = process.env.EMBEDDING_MODEL || 'text-embedding-3-small';
+
+const DOC_INDEX = 'knowledge_docs';
+const CACHE_INDEX = 'knowledge_cache';
+
+// RediSearch special characters that must be backslash-escaped in query strings.
+// Escaping preserves token meaning — "C\+\+" still matches "C++" in documents —
+// whereas stripping would silently discard the + characters and change query intent.
+// Note: this is a best-effort heuristic. It handles the common cases well but is not
+// a full RediSearch query parser; phrase queries and advanced syntax may still produce
+// unexpected results and will need manual adjustment.
+const FT_SPECIAL_CHARS = /[,.<>{}\[\]"'`:;!@#$%^&*()\-+=~|\/\\?]/g;
+
+// Sample Redis documentation for demonstration.
+// Replace with your own documents or use loadDirectory() to load a folder of .txt / .md files.
+const SAMPLE_DOCS = [
+    {
+        title:   'Redis Data Types',
+        source:  'https://redis.io/docs/latest/develop/data-types/',
+        content: 'Redis supports several core data types suited to different use cases. Strings store ' +
+                 'sequences of bytes up to 512 MB and support atomic increment and decrement operations. ' +
+                 'Lists are linked lists of strings with O(1) push and pop from both ends, useful for ' +
+                 'queues and stacks. Sets are unordered collections of unique strings with O(1) add, ' +
+                 'remove, and membership tests, plus union, intersection, and difference operations. ' +
+                 'Sorted sets add a floating-point score to each member, enabling range queries by score ' +
+                 'or rank in O(log N) time. Hashes store field-value pairs in a single key, ideal for ' +
+                 'representing objects without serialization. Redis also supports Streams for append-only ' +
+                 'logs with consumer groups, HyperLogLog for approximate cardinality estimation, Bitmaps ' +
+                 'for efficient bit-level operations, and Geospatial indexes for location-based queries.'
+    },
+    {
+        title:   'Redis Vector Search',
+        source:  'https://redis.io/docs/latest/develop/ai/search-and-query/vectors/',
+        content: 'Redis Vector Search lets you index and search vector embeddings stored in HASH or JSON ' +
+                 'documents. Two index algorithms are available: FLAT (brute-force, exact results, best for ' +
+                 'smaller datasets) and HNSW (Hierarchical Navigable Small World, approximate results, ' +
+                 'much faster at scale using a multi-layer graph structure). Supported distance metrics are ' +
+                 'cosine similarity, L2 Euclidean distance, and inner product. Hybrid queries combine a ' +
+                 'vector KNN clause with a RediSearch filter expression in a single FT.SEARCH call, ' +
+                 'pre-filtering documents by metadata before ranking by vector distance. This avoids ' +
+                 'post-filtering and keeps result quality high. Vector fields are declared with DIM ' +
+                 '(dimension count), TYPE (FLOAT32 or FLOAT64), and DISTANCE_METRIC parameters.'
+    },
+    {
+        title:   'Redis Cloud',
+        source:  'https://redis.io/docs/latest/operate/rc/',
+        content: 'Redis Cloud is the fully managed cloud service for Redis, available on AWS, Google Cloud, ' +
+                 'and Microsoft Azure. It provides automatic clustering, replication, and failover for high ' +
+                 'availability and data durability without operational overhead. Deployment options include ' +
+                 'Redis Stack for development, Redis Enterprise for mission-critical workloads, and active-' +
+                 'active geo-distribution for multi-region deployments with conflict-free replication. ' +
+                 'Built-in monitoring, automated backups, and vertical and horizontal scaling are included. ' +
+                 'A free tier is available for development and testing. Supported modules include RediSearch ' +
+                 'for full-text and vector search, RedisJSON for native JSON documents, RedisTimeSeries for ' +
+                 'time-series data, and RedisBloom for probabilistic structures such as Bloom filters and ' +
+                 'Count-Min sketches.'
+    },
+    {
+        title:   'Redis Context Engine',
+        source:  'https://redis.io/docs/latest/develop/ai/context-engine/',
+        content: 'The Redis Context Engine is a suite of managed services on Redis Cloud that gives AI ' +
+                 'agents the context they need. LangCache provides semantic response caching: incoming ' +
+                 'queries are embedded and compared against cached query-response pairs, returning a cached ' +
+                 'answer when cosine similarity exceeds a configurable threshold to reduce LLM API costs. ' +
+                 'Agent Memory offers two-tier persistent memory with a session layer for recent turns and ' +
+                 'a long-term layer backed by vector search, available as a REST API and Python SDK. ' +
+                 'Context Retriever exposes structured business data as governed tools that agents can ' +
+                 'query reliably without writing custom retrieval logic. Data Integration keeps a Redis ' +
+                 'Cloud database in sync with relational databases in near real time using Change Data ' +
+                 'Capture, so agents always query fresh data.'
+    },
+];
+
+
+class KnowledgeAssistant {
+    constructor(sessionId) {
+        this.sessionId = sessionId || crypto.randomUUID();
+        this.sessionKey = `session:${this.sessionId}:history`;
+        this.client = null;
+        this.llm = null;
+        this.llmModel = null;
+    }
+
+    async init() {
+        if (!process.env.LLM_API_KEY) {
+            throw new Error('LLM_API_KEY environment variable is required');
+        }
+
+        const redisUrl = process.env.REDIS_URL ||
+            `redis://${process.env.REDIS_HOST || 'localhost'}:${process.env.REDIS_PORT || 6379}`;
+
+        this.client = createClient({
+            url: redisUrl,
+            password: process.env.REDIS_PASSWORD || undefined,
+            socket: {
+                reconnectStrategy: (retries) =>
+                    retries < 3 ? Math.min(retries * 100, 1000) : new Error('Max retries exceeded')
+            }
+        });
+        this.client.on('error', (err) => console.error('Redis error:', err.message));
+        await this.client.connect();
+        console.log(`Connected to Redis. Session: ${this.sessionId}`);
+
+        this.llm = new OpenAI({
+            apiKey: process.env.LLM_API_KEY,
+            baseURL: process.env.LLM_API_BASE_URL || '${CONFIG.models[formData.llmModel].baseUrl}'
+        });
+        this.llmModel = process.env.LLM_MODEL || '${CONFIG.models[formData.llmModel].defaultModel}';
+        console.log(`LLM configured: ${this.llmModel}`);
+
+        await this._createIndexes();
+    }
+
+    async _createIndexes() {
+        const indexes = [
+            {
+                name: DOC_INDEX,
+                prefix: 'doc:',
+                schema: {
+                    doc_id:    { type: SchemaFieldTypes.TAG },
+                    chunk_id:  { type: SchemaFieldTypes.TAG },
+                    title:     { type: SchemaFieldTypes.TEXT },
+                    source:    { type: SchemaFieldTypes.TAG },
+                    content:   { type: SchemaFieldTypes.TEXT },
+                    embedding: {
+                        type: SchemaFieldTypes.VECTOR,
+                        ALGORITHM: VectorAlgorithms.FLAT,
+                        TYPE: 'FLOAT32',
+                        DIM: VECTOR_DIM,
+                        DISTANCE_METRIC: 'COSINE'
+                    }
+                }
+            },
+            {
+                name: CACHE_INDEX,
+                prefix: 'ragcache:',
+                schema: {
+                    response:        { type: SchemaFieldTypes.TEXT },
+                    citations:       { type: SchemaFieldTypes.TEXT },
+                    query_embedding: {
+                        type: SchemaFieldTypes.VECTOR,
+                        ALGORITHM: VectorAlgorithms.FLAT,
+                        TYPE: 'FLOAT32',
+                        DIM: VECTOR_DIM,
+                        DISTANCE_METRIC: 'COSINE'
+                    }
+                }
+            }
+        ];
+
+        for (const { name, prefix, schema } of indexes) {
+            try {
+                await this.client.ft.create(name, schema, { ON: 'HASH', PREFIX: [prefix] });
+            } catch (err) {
+                if (!err.message.includes('Index already exists')) throw err;
+            }
+        }
+    }
+
+    // ── Document ingestion ────────────────────────────────────────────────────
+
+    async loadDirectory(dirPath, extensions = ['.txt', '.md']) {
+        const fs = require('fs').promises;
+        const path = require('path');
+
+        const walk = async (dir) => {
+            const entries = await fs.readdir(dir, { withFileTypes: true });
+            const files = [];
+            for (const entry of entries) {
+                const full = path.join(dir, entry.name);
+                if (entry.isDirectory()) files.push(...await walk(full));
+                else if (extensions.includes(path.extname(entry.name).toLowerCase())) files.push(full);
+            }
+            return files;
+        };
+
+        const files = (await walk(dirPath)).sort();
+        let loaded = 0;
+        for (const file of files) {
+            try {
+                const content = (await fs.readFile(file, 'utf-8')).trim();
+                if (content) {
+                    await this.ingestDocument(content, path.basename(file, path.extname(file)), file);
+                    loaded++;
+                }
+            } catch (err) {
+                console.warn(`Skipping ${file}: ${err.message}`);
+            }
+        }
+        console.log(`Loaded ${loaded} document(s) from ${dirPath}`);
+        return loaded;
+    }
+
+    _chunkText(text) {
+        // Character-based chunking is simple but not token-aware. For production,
+        // consider a token-counting library (e.g. js-tiktoken).
+        const chunks = [];
+        let start = 0;
+        while (start < text.length) {
+            chunks.push(text.slice(start, start + CHUNK_SIZE));
+            start += CHUNK_SIZE - CHUNK_OVERLAP;
+        }
+        return chunks;
+    }
+
+    async _embed(text) {
+        const resp = await this.llm.embeddings.create({
+            model: EMBEDDING_MODEL,
+            input: text.slice(0, 8000)
+        });
+        return resp.data[0].embedding;
+    }
+
+    _toBuffer(embedding) {
+        const buf = Buffer.allocUnsafe(embedding.length * 4);
+        embedding.forEach((v, i) => buf.writeFloatLE(v, i * 4));
+        return buf;
+    }
+
+    async ingestDocument(content, title, source = '') {
+        const docId = crypto.randomUUID();
+        const chunks = this._chunkText(content);
+        for (let i = 0; i < chunks.length; i++) {
+            const chunkId = `${docId}:${i}`;
+            const embedding = await this._embed(chunks[i]);
+            await this.client.hSet(`doc:${chunkId}`, {
+                doc_id:    docId,
+                chunk_id:  chunkId,
+                title,
+                source,
+                content:   chunks[i],
+                embedding: this._toBuffer(embedding)
+            });
+        }
+        console.log(`Ingested '${title}': ${chunks.length} chunk(s) (doc_id: ${docId})`);
+        return docId;
+    }
+
+    // ── Hybrid search ─────────────────────────────────────────────────────────
+    // First pass: FT.SEARCH with a text pre-filter and an inline KNN clause —
+    // "(text_terms)=>[KNN k @embedding $BLOB AS distance]" — so Redis applies
+    // both filters in a single round trip. This is more Redis-native than running
+    // two separate queries and fusing the results in JavaScript.
+    // Second pass (fallback): if the text filter is too selective and returns nothing,
+    // a pure vector search is issued so queries always return results when documents exist.
+
+    _sanitizeFtQuery(text) {
+        // Escape RediSearch special characters rather than strip them, so tokens like
+        // "C++", "redis.io", and non-English text survive into the query intact.
+        // We OR-join per-word terms for recall; the KNN step handles ranking.
+        // This is a best-effort heuristic — see FT_SPECIAL_CHARS comment above.
+        const terms = text.split(/\s+/).filter(Boolean);
+        if (terms.length === 0) return '*';
+        const escaped = terms.map(t => t.replace(FT_SPECIAL_CHARS, '\\$&'));
+        return escaped.slice(0, 10).join(' | '); // cap at 10 terms
+    }
+
+    async _runKnnQuery(queryStr, queryEmbedding, topK) {
+        const results = await this.client.ft.search(DOC_INDEX, queryStr, {
+            PARAMS:  { K: topK, BLOB: this._toBuffer(queryEmbedding) },
+            SORTBY:  { BY: 'distance', DIRECTION: 'ASC' },
+            DIALECT: 2,
+            RETURN:  ['chunk_id', 'doc_id', 'title', 'source', 'content', 'distance']
+        });
+        return (results?.documents ?? []).map(d => ({ id: d.id, ...d.value }));
+    }
+
+    async _hybridSearch(queryText, queryEmbedding, topK = MAX_SEARCH_RESULTS) {
+        const safeText = this._sanitizeFtQuery(queryText);
+        if (safeText !== '*') {
+            try {
+                const results = await this._runKnnQuery(
+                    `(${safeText})=>[KNN $K @embedding $BLOB AS distance]`,
+                    queryEmbedding, topK
+                );
+                if (results.length > 0) return results;
+            } catch (err) {
+                console.error('Hybrid search error:', err.message);
+            }
+        }
+        // Fall back to pure vector search if the text filter returned nothing
+        return this._runKnnQuery(
+            `*=>[KNN $K @embedding $BLOB AS distance]`,
+            queryEmbedding, topK
+        );
+    }
+
+    // ── Semantic cache ────────────────────────────────────────────────────────
+
+    async _checkCache(queryEmbedding) {
+        const results = await this.client.ft.search(
+            CACHE_INDEX,
+            `*=>[KNN 1 @query_embedding $BLOB AS distance]`,
+            {
+                PARAMS:  { BLOB: this._toBuffer(queryEmbedding) },
+                SORTBY:  { BY: 'distance', DIRECTION: 'ASC' },
+                DIALECT: 2,
+                RETURN:  ['response', 'citations', 'distance']
+            }
+        );
+        const top = results?.documents?.[0];
+        if (!top) return null;
+        // vector_distance for cosine: 0=identical, 1=orthogonal. Hit when similarity > CACHE_THRESHOLD.
+        const dist = parseFloat(top.value.distance ?? '1');
+        if (dist < (1 - CACHE_THRESHOLD)) {
+            return {
+                response:  top.value.response,
+                citations: JSON.parse(top.value.citations ?? '[]')
+            };
+        }
+        return null;
+    }
+
+    async _storeCache(queryEmbedding, response, citations) {
+        const key = `ragcache:${crypto.randomUUID()}`;
+        await this.client.hSet(key, {
+            response,
+            citations:       JSON.stringify(citations),
+            query_embedding: this._toBuffer(queryEmbedding)
+        });
+        await this.client.expire(key, CACHE_TTL);
+    }
+
+    // ── Session memory ────────────────────────────────────────────────────────
+
+    async _getHistory() {
+        const raw = await this.client.lRange(this.sessionKey, 0, MAX_HISTORY_TURNS * 2 - 1);
+        return raw.reverse().map(s => {
+            try { return JSON.parse(s); } catch { return null; }
+        }).filter(Boolean);
+    }
+
+    async _saveHistory(role, content) {
+        await this.client.lPush(this.sessionKey, JSON.stringify({ role, content }));
+        await this.client.lTrim(this.sessionKey, 0, MAX_HISTORY_TURNS * 2 - 1);
+    }
+
+    // ── Query ─────────────────────────────────────────────────────────────────
+
+    async query(userQuery) {
+        const queryEmbedding = await this._embed(userQuery);
+
+        const cached = await this._checkCache(queryEmbedding);
+        if (cached) {
+            console.log('[cache hit]');
+            await this._saveHistory('user', userQuery);
+            await this._saveHistory('assistant', cached.response);
+            return cached;
+        }
+
+        const topChunks = await this._hybridSearch(userQuery, queryEmbedding);
+        if (topChunks.length === 0) {
+            return { response: 'No documents found. Please ingest documents before querying.', citations: [] };
+        }
+
+        const contextParts = [];
+        const citations = [];
+        topChunks.forEach((chunk, i) => {
+            contextParts.push(`[${i + 1}] ${chunk.title ?? 'Unknown'}\n${chunk.content ?? ''}`);
+            citations.push({
+                index:    i + 1,
+                title:    chunk.title ?? 'Unknown',
+                source:   chunk.source ?? '',
+                chunk_id: chunk.chunk_id ?? '',
+                doc_id:   chunk.doc_id ?? ''
+            });
+        });
+
+        const history = await this._getHistory();
+        const messages = [
+            {
+                role:    'system',
+                content: 'You are a helpful knowledge assistant. Answer using only the provided context. ' +
+                         'Reference sources as [1], [2], etc. If the context lacks the answer, say so clearly.'
+            },
+            ...history,
+            { role: 'user', content: `Context:\n${contextParts.join('\n\n')}\n\nQuestion: ${userQuery}` }
+        ];
+
+        const completion = await this.llm.chat.completions.create({
+            model: this.llmModel,
+            messages
+        });
+        const answer = completion.choices[0].message.content;
+
+        await this._storeCache(queryEmbedding, answer, citations);
+        await this._saveHistory('user', userQuery);
+        await this._saveHistory('assistant', answer);
+        return { response: answer, citations };
+    }
+}
+
+async function main() {
+    const agent = new KnowledgeAssistant();
+    await agent.init();
+
+    // Only ingest sample documents when the index is empty so re-running the agent
+    // does not re-embed the same content on every startup.
+    // To load your own documents instead: await agent.loadDirectory('path/to/docs');
+    const indexInfo = await agent.client.ft.info(DOC_INDEX);
+    if (parseInt(indexInfo.numDocs ?? '0') === 0) {
+        console.log('Empty index — ingesting sample documents...');
+        for (const doc of SAMPLE_DOCS) {
+            await agent.ingestDocument(doc.content, doc.title, doc.source);
+        }
+    } else {
+        console.log(`Index already contains ${indexInfo.numDocs} document(s). Skipping ingestion.`);
+    }
+
+    const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+    console.log('\nKnowledge Assistant ready. Type your questions or "quit" to exit.\n');
+
+    const ask = () => {
+        rl.question('Question: ', async (input) => {
+            const trimmed = input.trim();
+            if (['quit', 'exit', 'bye'].includes(trimmed.toLowerCase())) {
+                console.log('Goodbye!');
+                rl.close();
+                await agent.client.quit();
+                return;
+            }
+            if (!trimmed) {
+                ask();
+                return;
+            }
+            try {
+                const { response, citations } = await agent.query(trimmed);
+                console.log(`\n${response}`);
+                if (citations.length > 0) {
+                    console.log('\nSources:');
+                    citations.forEach(c => {
+                        const src = c.source ? ` — ${c.source}` : '';
+                        console.log(`  [${c.index}] ${c.title}${src}`);
+                        console.log(`         chunk_id: ${c.chunk_id}`);
+                    });
+                }
+                console.log();
+            } catch (err) {
+                console.error('Error:', err.message);
+            }
+            ask();
+        });
+    };
+    ask();
+}
+
+main().catch(err => {
+    console.error('Failed to initialize:', err.message);
+    process.exit(1);
+});
diff --git a/static/code/agent-templates/python/rag_agent.py b/static/code/agent-templates/python/rag_agent.py
new file mode 100644
index 0000000000..2c522a8330
--- /dev/null
+++ b/static/code/agent-templates/python/rag_agent.py
@@ -0,0 +1,442 @@
+'''
+Redis Knowledge Assistant (RAG Agent)
+
+Features:
+- Ingest documents with automatic chunking and embedding
+- Redis-native hybrid retrieval: text pre-filter + KNN vector search, with a fallback vector pass
+- Semantic cache: skip LLM for similar queries using cached responses (TTL-based expiry)
+- Per-session conversation memory in a Redis List
+- Citations: each answer references source documents with title, source URL, and chunk ID
+
+To run this code:
+    Install dependencies:
+        pip install redisvl[all] redis openai
+
+    Set environment variables:
+        export LLM_API_KEY=your_api_key_here
+        export LLM_API_BASE_URL=your_${formData.llmModel.toLowerCase()}_api_base_url
+            (optional - default: ${CONFIG.models[formData.llmModel].baseUrl})
+        export LLM_MODEL=your_${formData.llmModel.toLowerCase()}_model
+            (optional - default: ${CONFIG.models[formData.llmModel].defaultModel})
+        export REDIS_HOST=your_redis_host
+        export REDIS_PORT=your_redis_port
+        export REDIS_PASSWORD=your_redis_password
+
+    Note: this template uses the OpenAI SDK with a configurable base URL. It works with
+    OpenAI directly and with any provider that exposes an OpenAI-compatible API endpoint.
+
+    Requires Redis Stack or Redis 8+ with Search module enabled.
+'''
+
+import json
+import os
+import re
+import struct
+import uuid
+
+import openai
+import redis
+from redis.commands.search.query import Query as FTQuery
+from redisvl.index import SearchIndex
+from redisvl.schema import IndexSchema
+
+CHUNK_SIZE = 500
+CHUNK_OVERLAP = 50
+MAX_SEARCH_RESULTS = 5
+MAX_HISTORY_TURNS = 6
+CACHE_TTL = 3600
+# Cosine similarity threshold for cache hits. vector_distance for cosine is in [0, 2]:
+# 0 = identical, 1 = orthogonal, 2 = opposite. A hit fires when distance < (1 - threshold),
+# i.e. when cosine similarity > threshold. Treat this as a value to test, not assume.
+CACHE_THRESHOLD = 0.92
+VECTOR_DIM = int(os.getenv('VECTOR_DIM', '1536'))
+EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small')
+
+# RediSearch special characters that must be backslash-escaped in query strings.
+# Escaping preserves token meaning — "C\+\+" still matches "C++" in documents —
+# whereas stripping would silently discard the + characters and change query intent.
+# Note: this is a best-effort heuristic. It handles the common cases well but is
+# not a full RediSearch query parser; phrase queries and advanced syntax may still
+# need manual adjustment.
+_FT_SPECIAL = re.compile(r'([,.<>{}\[\]"\':;!@#$%^&*()\-+=~|/\\?])')
+
+_DOC_SCHEMA = {
+    'index': {'name': 'knowledge_docs', 'prefix': 'doc', 'storage_type': 'hash'},
+    'fields': [
+        {'name': 'doc_id', 'type': 'tag'},
+        {'name': 'chunk_id', 'type': 'tag'},
+        {'name': 'title', 'type': 'text'},
+        {'name': 'source', 'type': 'tag'},
+        {'name': 'content', 'type': 'text'},
+        {'name': 'embedding', 'type': 'vector',
+         'attrs': {'dims': VECTOR_DIM, 'algorithm': 'flat',
+                   'distance_metric': 'cosine', 'datatype': 'float32'}}
+    ]
+}
+
+_CACHE_SCHEMA = {
+    'index': {'name': 'knowledge_cache', 'prefix': 'ragcache', 'storage_type': 'hash'},
+    'fields': [
+        {'name': 'response', 'type': 'text'},
+        {'name': 'citations', 'type': 'text'},
+        {'name': 'query_embedding', 'type': 'vector',
+         'attrs': {'dims': VECTOR_DIM, 'algorithm': 'flat',
+                   'distance_metric': 'cosine', 'datatype': 'float32'}}
+    ]
+}
+
+
+# Sample Redis documentation for demonstration.
+# Replace with your own documents or use load_directory() to load a folder of .txt / .md files.
+SAMPLE_DOCS = [
+    {
+        'title': 'Redis Data Types',
+        'source': 'https://redis.io/docs/latest/develop/data-types/',
+        'content': (
+            'Redis supports several core data types suited to different use cases. Strings store '
+            'sequences of bytes up to 512 MB and support atomic increment and decrement operations. '
+            'Lists are linked lists of strings with O(1) push and pop from both ends, useful for '
+            'queues and stacks. Sets are unordered collections of unique strings with O(1) add, '
+            'remove, and membership tests, plus union, intersection, and difference operations. '
+            'Sorted sets add a floating-point score to each member, enabling range queries by score '
+            'or rank in O(log N) time. Hashes store field-value pairs in a single key, ideal for '
+            'representing objects without serialization. Redis also supports Streams for append-only '
+            'logs with consumer groups, HyperLogLog for approximate cardinality estimation, Bitmaps '
+            'for efficient bit-level operations, and Geospatial indexes for location-based queries.'
+        )
+    },
+    {
+        'title': 'Redis Vector Search',
+        'source': 'https://redis.io/docs/latest/develop/ai/search-and-query/vectors/',
+        'content': (
+            'Redis Vector Search lets you index and search vector embeddings stored in HASH or JSON '
+            'documents. Two index algorithms are available: FLAT (brute-force, exact results, best for '
+            'smaller datasets) and HNSW (Hierarchical Navigable Small World, approximate results, '
+            'much faster at scale using a multi-layer graph structure). Supported distance metrics are '
+            'cosine similarity, L2 Euclidean distance, and inner product. Hybrid queries combine a '
+            'vector KNN clause with a RediSearch filter expression in a single FT.SEARCH call, '
+            'pre-filtering documents by metadata before ranking by vector distance. This avoids '
+            'post-filtering and keeps result quality high. Vector fields are declared with DIM '
+            '(dimension count), TYPE (FLOAT32 or FLOAT64), and DISTANCE_METRIC parameters.'
+        )
+    },
+    {
+        'title': 'Redis Cloud',
+        'source': 'https://redis.io/docs/latest/operate/rc/',
+        'content': (
+            'Redis Cloud is the fully managed cloud service for Redis, available on AWS, Google Cloud, '
+            'and Microsoft Azure. It provides automatic clustering, replication, and failover for high '
+            'availability and data durability without operational overhead. Deployment options include '
+            'Redis Stack for development, Redis Enterprise for mission-critical workloads, and active-'
+            'active geo-distribution for multi-region deployments with conflict-free replication. '
+            'Built-in monitoring, automated backups, and vertical and horizontal scaling are included. '
+            'A free tier is available for development and testing. Supported modules include RediSearch '
+            'for full-text and vector search, RedisJSON for native JSON documents, RedisTimeSeries for '
+            'time-series data, and RedisBloom for probabilistic structures such as Bloom filters and '
+            'Count-Min sketches.'
+        )
+    },
+    {
+        'title': 'Redis Context Engine',
+        'source': 'https://redis.io/docs/latest/develop/ai/context-engine/',
+        'content': (
+            'The Redis Context Engine is a suite of managed services on Redis Cloud that gives AI '
+            'agents the context they need. LangCache provides semantic response caching: incoming '
+            'queries are embedded and compared against cached query-response pairs, returning a cached '
+            'answer when cosine similarity exceeds a configurable threshold to reduce LLM API costs. '
+            'Agent Memory offers two-tier persistent memory with a session layer for recent turns and '
+            'a long-term layer backed by vector search, available as a REST API and Python SDK. '
+            'Context Retriever exposes structured business data as governed tools that agents can '
+            'query reliably without writing custom retrieval logic. Data Integration keeps a Redis '
+            'Cloud database in sync with relational databases in near real time using Change Data '
+            'Capture, so agents always query fresh data.'
+        )
+    },
+]
+
+
+class KnowledgeAssistant:
+    def __init__(self, session_id=None):
+        self.session_id = session_id or str(uuid.uuid4())
+        self.session_key = f'session:{self.session_id}:history'
+
+        self.llm_api_key = os.getenv('LLM_API_KEY')
+        if not self.llm_api_key:
+            raise ValueError('LLM_API_KEY environment variable is required')
+        self.llm_base_url = os.getenv('LLM_API_BASE_URL', '${CONFIG.models[formData.llmModel].baseUrl}')
+        self.llm_model = os.getenv('LLM_MODEL', '${CONFIG.models[formData.llmModel].defaultModel}')
+
+        try:
+            # Single client with decode_responses=False handles both text and binary (embedding) fields.
+            self.client = redis.Redis(
+                host=os.getenv('REDIS_HOST', 'localhost'),
+                port=int(os.getenv('REDIS_PORT', 6379)),
+                username=os.getenv('REDIS_USERNAME', 'default'),
+                password=os.getenv('REDIS_PASSWORD', ''),
+                decode_responses=False,
+                socket_connect_timeout=5
+            )
+            self.client.ping()
+            print(f'Connected to Redis. Session: {self.session_id}')
+        except redis.ConnectionError as e:
+            print(f'Failed to connect to Redis: {e}')
+            raise
+
+        self.llm = openai.OpenAI(api_key=self.llm_api_key, base_url=self.llm_base_url)
+        print(f'LLM configured: {self.llm_model}')
+
+        # redisvl is used only for index creation; all queries use redis-py directly.
+        doc_index = SearchIndex(IndexSchema.from_dict(_DOC_SCHEMA), redis_client=self.client)
+        cache_index = SearchIndex(IndexSchema.from_dict(_CACHE_SCHEMA), redis_client=self.client)
+        doc_index.create(overwrite=False)
+        cache_index.create(overwrite=False)
+
+    # ── Document ingestion ────────────────────────────────────────────────────
+
+    def load_directory(self, path, extensions=('.txt', '.md')):
+        """Ingest all matching files from a directory tree. Each file becomes one document."""
+        import pathlib
+        loaded = 0
+        for filepath in sorted(pathlib.Path(path).rglob('*')):
+            if filepath.suffix.lower() in extensions and filepath.is_file():
+                try:
+                    content = filepath.read_text(encoding='utf-8', errors='ignore').strip()
+                    if content:
+                        self.ingest_document(content, title=filepath.stem, source=str(filepath))
+                        loaded += 1
+                except Exception as e:
+                    print(f'Skipping {filepath}: {e}')
+        print(f'Loaded {loaded} document(s) from {path}')
+        return loaded
+
+    def _chunk_text(self, text):
+        # Character-based chunking is simple but not token-aware. For production,
+        # consider tiktoken or RecursiveCharacterTextSplitter from langchain.
+        chunks, start = [], 0
+        while start < len(text):
+            chunks.append(text[start:start + CHUNK_SIZE])
+            start += CHUNK_SIZE - CHUNK_OVERLAP
+        return chunks
+
+    def _embed(self, text):
+        resp = self.llm.embeddings.create(model=EMBEDDING_MODEL, input=text[:8000])
+        return resp.data[0].embedding
+
+    def _to_bytes(self, embedding):
+        return struct.pack(f'{len(embedding)}f', *embedding)
+
+    def _decode_doc(self, doc):
+        def d(val):
+            return val.decode('utf-8', errors='replace') if isinstance(val, bytes) else (val or '')
+        return {
+            'id':       d(doc.id),
+            'chunk_id': d(getattr(doc, 'chunk_id', '')),
+            'doc_id':   d(getattr(doc, 'doc_id', '')),
+            'title':    d(getattr(doc, 'title', '')),
+            'source':   d(getattr(doc, 'source', '')),
+            'content':  d(getattr(doc, 'content', '')),
+            'distance': d(getattr(doc, 'distance', '1.0'))
+        }
+
+    def ingest_document(self, content, title, source=''):
+        doc_id = str(uuid.uuid4())
+        chunks = self._chunk_text(content)
+        for i, chunk in enumerate(chunks):
+            chunk_id = f'{doc_id}:{i}'
+            embedding = self._embed(chunk)
+            self.client.hset(f'doc:{chunk_id}', mapping={
+                b'doc_id':    doc_id.encode(),
+                b'chunk_id':  chunk_id.encode(),
+                b'title':     title.encode(),
+                b'source':    source.encode(),
+                b'content':   chunk.encode(),
+                b'embedding': self._to_bytes(embedding)
+            })
+        print(f"Ingested '{title}': {len(chunks)} chunk(s) (doc_id: {doc_id})")
+        return doc_id
+
+    # ── Hybrid search ─────────────────────────────────────────────────────────
+    # First pass: FT.SEARCH with a text pre-filter and an inline KNN clause —
+    # "(text_terms) => [KNN k @embedding $BLOB AS distance]" — so Redis applies
+    # both filters in a single round trip. This is more Redis-native than running
+    # two separate queries and fusing the results in Python.
+    # Second pass (fallback): if the text filter is too selective and returns nothing,
+    # a pure vector search is issued so queries always return results when documents exist.
+
+    def _sanitize_ft_query(self, text):
+        # Escape RediSearch special characters rather than strip them, so tokens like
+        # "C++", "redis.io", and non-English text survive into the query intact.
+        # We OR-join per-word terms for recall; the KNN step handles ranking.
+        # This is a best-effort heuristic — see _FT_SPECIAL comment above.
+        terms = text.split()
+        if not terms:
+            return '*'
+        escaped = [_FT_SPECIAL.sub(r'\\\1', t) for t in terms]
+        return ' | '.join(escaped[:10])  # cap at 10 terms
+
+    def _run_knn_query(self, query_str, query_embedding, top_k):
+        return self.client.ft('knowledge_docs').search(
+            FTQuery(query_str)
+                .sort_by('distance', asc=True)
+                .paging(0, top_k)
+                .return_fields('chunk_id', 'doc_id', 'title', 'source', 'content', 'distance')
+                .dialect(2),
+            query_params={'K': top_k, 'BLOB': self._to_bytes(query_embedding)}
+        )
+
+    def _hybrid_search(self, query_text, query_embedding, top_k=MAX_SEARCH_RESULTS):
+        safe_text = self._sanitize_ft_query(query_text)
+        if safe_text != '*':
+            try:
+                result = self._run_knn_query(
+                    f'({safe_text})=>[KNN $K @embedding $BLOB AS distance]',
+                    query_embedding, top_k
+                )
+                if result.docs:
+                    return [self._decode_doc(d) for d in result.docs]
+            except Exception as e:
+                print(f'Hybrid search error: {e}')
+        # Fall back to pure vector search if the text filter returned nothing
+        result = self._run_knn_query(
+            '*=>[KNN $K @embedding $BLOB AS distance]',
+            query_embedding, top_k
+        )
+        return [self._decode_doc(d) for d in result.docs]
+
+    # ── Semantic cache ────────────────────────────────────────────────────────
+
+    def _check_cache(self, query_embedding):
+        try:
+            result = self.client.ft('knowledge_cache').search(
+                FTQuery('*=>[KNN 1 @query_embedding $BLOB AS distance]')
+                    .sort_by('distance', asc=True)
+                    .paging(0, 1)
+                    .return_fields('response', 'citations', 'distance')
+                    .dialect(2),
+                query_params={'BLOB': self._to_bytes(query_embedding)}
+            )
+            if result.docs:
+                doc = self._decode_doc(result.docs[0])
+                # vector_distance for cosine: 0=identical, 1=orthogonal. Hit when similarity > CACHE_THRESHOLD.
+                if float(doc.get('distance', '1.0')) < (1.0 - CACHE_THRESHOLD):
+                    return doc.get('response', ''), json.loads(doc.get('citations', '[]'))
+        except Exception:
+            pass
+        return None, None
+
+    def _store_cache(self, query_embedding, response, citations):
+        key = f'ragcache:{uuid.uuid4()}'
+        self.client.hset(key, mapping={
+            b'response':        response.encode(),
+            b'citations':       json.dumps(citations).encode(),
+            b'query_embedding': self._to_bytes(query_embedding)
+        })
+        self.client.expire(key, CACHE_TTL)
+
+    # ── Session memory ────────────────────────────────────────────────────────
+
+    def _get_history(self):
+        raw = self.client.lrange(self.session_key, 0, MAX_HISTORY_TURNS * 2 - 1)
+        messages = []
+        for item in reversed(raw):
+            try:
+                messages.append(json.loads(item.decode() if isinstance(item, bytes) else item))
+            except Exception:
+                pass
+        return messages
+
+    def _save_history(self, role, content):
+        self.client.lpush(self.session_key, json.dumps({'role': role, 'content': content}).encode())
+        self.client.ltrim(self.session_key, 0, MAX_HISTORY_TURNS * 2 - 1)
+
+    # ── Query ─────────────────────────────────────────────────────────────────
+
+    def query(self, user_query):
+        query_embedding = self._embed(user_query)
+
+        cached_response, cached_citations = self._check_cache(query_embedding)
+        if cached_response:
+            print('[cache hit]')
+            self._save_history('user', user_query)
+            self._save_history('assistant', cached_response)
+            return cached_response, cached_citations
+
+        top_chunks = self._hybrid_search(user_query, query_embedding)
+        if not top_chunks:
+            return 'No documents found. Please ingest documents before querying.', []
+
+        context_parts, citations = [], []
+        for i, chunk in enumerate(top_chunks):
+            title = chunk.get('title', 'Unknown')
+            source = chunk.get('source', '')
+            context_parts.append(f'[{i + 1}] {title}\n{chunk.get("content", "")}')
+            citations.append({
+                'index':    i + 1,
+                'title':    title,
+                'source':   source,
+                'chunk_id': chunk.get('chunk_id', ''),
+                'doc_id':   chunk.get('doc_id', '')
+            })
+
+        messages = [
+            {'role': 'system', 'content': (
+                'You are a helpful knowledge assistant. Answer using only the provided context. '
+                'Reference sources as [1], [2], etc. If the context lacks the answer, say so clearly.'
+            )},
+            *self._get_history(),
+            {'role': 'user', 'content': f'Context:\n{chr(10).join(context_parts)}\n\nQuestion: {user_query}'}
+        ]
+
+        response = self.llm.chat.completions.create(model=self.llm_model, messages=messages)
+        answer = response.choices[0].message.content
+
+        self._store_cache(query_embedding, answer, citations)
+        self._save_history('user', user_query)
+        self._save_history('assistant', answer)
+        return answer, citations
+
+
+if __name__ == '__main__':
+    try:
+        agent = KnowledgeAssistant()
+
+        # Only ingest sample documents when the index is empty so re-running the agent
+        # does not re-embed the same content on every startup.
+        # To load your own documents instead: agent.load_directory('path/to/docs')
+        index_info = agent.client.ft('knowledge_docs').info()
+        if int(index_info.get('num_docs', 0)) == 0:
+            print('Empty index — ingesting sample documents...')
+            for doc in SAMPLE_DOCS:
+                agent.ingest_document(doc['content'], doc['title'], doc['source'])
+        else:
+            print(f"Index already contains {index_info.get('num_docs')} document(s). Skipping ingestion.")
+
+        print('\nKnowledge Assistant ready. Type your questions or "quit" to exit.\n')
+        while True:
+            try:
+                user_input = input('Question: ').strip()
+                if user_input.lower() in ['quit', 'exit', 'bye']:
+                    print('Goodbye!')
+                    break
+                if not user_input:
+                    continue
+                answer, citations = agent.query(user_input)
+                print(f'\n{answer}')
+                if citations:
+                    print('\nSources:')
+                    for c in citations:
+                        src = f' — {c["source"]}' if c['source'] else ''
+                        print(f'  [{c["index"]}] {c["title"]}{src}')
+                        print(f'         chunk_id: {c["chunk_id"]}')
+                print()
+            except KeyboardInterrupt:
+                print('\nGoodbye!')
+                break
+            except Exception as e:
+                print(f'Error: {e}')
+    except ValueError as e:
+        print(f'Configuration error: {e}')
+        exit(1)
+    except Exception as e:
+        print(f'Failed to initialize: {e}')
+        exit(1)
diff --git a/static/js/agent-builder.js b/static/js/agent-builder.js
index 91f37ebee3..ff21b0ff36 100644
--- a/static/js/agent-builder.js
+++ b/static/js/agent-builder.js
@@ -20,6 +20,12 @@
                 description: "A chatbot that maintains conversation history using semantic message history and provides contextual responses.",
                 features: ["Conversation memory", "Context awareness", "Multi-turn dialogue"],
                 keywords: ["chat", "conversation", "assistant", "bot", "chatbot", "talk", "dialogue"]
+            },
+            rag: {
+                name: "Knowledge Assistant",
+                description: "A RAG agent that ingests documents, uses Redis-native hybrid retrieval (text pre-filter + vector search), semantic caching, and session memory to answer questions with citations.",
+                features: ["Document ingestion with chunking", "Hybrid vector + full-text search", "Semantic caching", "Citations"],
+                keywords: ["rag", "knowledge", "documents", "search", "retrieval", "qa", "question answering", "citations", "hybrid"]
             }
         },
         languages: {
@@ -286,16 +292,18 @@
         let suggestions = [];
 
         switch (conversationState.step) {
-            case 'agent-type':
+            case 'agent-type': {
+                const agentIcons = { recommendation: '🛍️', conversational: '💬', rag: '🔍' };
                 suggestions = Object.entries(CONFIG.agentTypes).map(([key, config]) => ({
                     value: key,
                     label: config.name,
-                    icon: key === 'recommendation' ? '🛍️' : '💬'
+                    icon: agentIcons[key] || '🤖'
                 })).filter(s =>
                     s.label.toLowerCase().includes(lowerInput) ||
                     CONFIG.agentTypes[s.value].keywords.some(k => k.includes(lowerInput))
                 );
                 break;
+            }
 
             case 'language':
                 suggestions = Object.entries(CONFIG.languages).map(([key, config]) => ({
@@ -381,7 +389,8 @@
             // Generate a default agent name based on the type
             const defaultNames = {
                 recommendation: 'RecommendationEngine',
-                conversational: 'ConversationalAgent'
+                conversational: 'ConversationalAgent',
+                rag: 'KnowledgeAssistant'
             };
             conversationState.selections.agentName = defaultNames[selectedType] || 'RedisAgent';
 
@@ -398,7 +407,8 @@
         } else {
             addMessage("I didn't understand that. Please choose one of the agent types:", 'bot', [
                 { value: 'recommendation', label: '🛍️ Recommendation Engine' },
-                { value: 'conversational', label: '💬 Conversational Assistant' }
+                { value: 'conversational', label: '💬 Conversational Assistant' },
+                { value: 'rag', label: '🔍 Knowledge Assistant' }
             ]);
         }
     }
@@ -426,8 +436,8 @@
         }
 
         if (selectedLang) {
-            // Check if it's Python (fully supported)
-            if (selectedLang === 'python') {
+            // Check if it's a supported language
+            if (selectedLang === 'python' || selectedLang === 'javascript') {
                 conversationState.selections.programmingLanguage = selectedLang;
                 const config = CONFIG.languages[selectedLang];
 
@@ -445,9 +455,10 @@
                 const config = CONFIG.languages[selectedLang];
                 const languageName = config.name;
 
-                addMessage(`${languageName} support is coming soon. Currently, only Python is fully supported.`, 'bot');
-                addMessage(`Would you like to build a Python agent instead?`, 'bot', [
-                    { value: 'python', label: 'Yes, use Python' },
+                addMessage(`${languageName} support is coming soon. Currently, Python and JavaScript are supported.`, 'bot');
+                addMessage(`Would you like to build a Python or JavaScript agent instead?`, 'bot', [
+                    { value: 'python', label: '🐍 Yes, use Python' },
+                    { value: 'javascript', label: '🟨 Yes, use JavaScript' },
                     { value: 'wait', label: 'I\'ll wait for ' + languageName }
                 ]);
             }
@@ -520,8 +531,8 @@
             java: '.java',
             csharp: '.cs'
         };
-        const base = window.HUGO_BASEURL || '';
-        const filename = `${base}code/agent-templates/${formData.programmingLanguage}/${formData.agentType}_agent${fileExtensions[formData.programmingLanguage]}`;
+        const templateBase = (window.AGENT_TEMPLATE_BASE || '/code/agent-templates').replace(/\/$/, '');
+        const filename = `${templateBase}/${formData.programmingLanguage}/${formData.agentType}_agent${fileExtensions[formData.programmingLanguage]}`;
 
         return loadTemplateFile(filename, formData) || genericTemplates[formData.programmingLanguage](formData);
     }