diff --git a/content/develop/ai/agent-builder/_index.md b/content/develop/ai/agent-builder/_index.md
index 8b27ce90fa..45f87ce306 100644
--- a/content/develop/ai/agent-builder/_index.md
+++ b/content/develop/ai/agent-builder/_index.md
@@ -30,16 +30,17 @@ Redis powers these capabilities with fast, reliable data storage and retrieval t
## What you can build
-Choose from two types of intelligent agents:
+Choose from three types of intelligent agents:
- **Recommendation engines**: Personalized product and content recommendations
- **Conversational assistants**: Chatbots with memory and context awareness
+- **Knowledge assistants**: RAG agents that ingest documents, answer questions with citations, and use semantic caching
The agent builder will generate complete, working code examples for your chosen agent type.
## Features
-- **Multiple programming languages**: Generate code in Python, with JavaScript (Node.js), Java, and C# coming soon
+- **Multiple programming languages**: Generate code in Python and JavaScript (Node.js), with Java and C# coming soon
- **LLM integration**: Support for OpenAI, Anthropic Claude, and Llama 2
- **Redis optimized**: Uses Redis data structures for optimal performance
diff --git a/layouts/shortcodes/agent-builder.html b/layouts/shortcodes/agent-builder.html
index 8adeffb96b..f95ec26073 100644
--- a/layouts/shortcodes/agent-builder.html
+++ b/layouts/shortcodes/agent-builder.html
@@ -24,6 +24,7 @@
Build Your AI Agent
+
@@ -90,5 +91,7 @@
Generated Agent Code
+
+
diff --git a/static/code/agent-templates/javascript/rag_agent.js b/static/code/agent-templates/javascript/rag_agent.js
new file mode 100644
index 0000000000..3db0a8dc4e
--- /dev/null
+++ b/static/code/agent-templates/javascript/rag_agent.js
@@ -0,0 +1,496 @@
+/*
+ * Redis Knowledge Assistant (RAG Agent)
+ *
+ * Features:
+ * - Ingest documents with automatic chunking and embedding
+ * - Redis-native hybrid retrieval: text pre-filter + KNN vector search, with a fallback vector pass
+ * - Semantic cache: skip LLM for similar queries (TTL-based expiry)
+ * - Per-session conversation memory in a Redis List
+ * - Citations: each answer references source documents with title, source URL, and chunk ID
+ *
+ * To run this code:
+ * Install dependencies:
+ * npm install redis openai
+ *
+ * Set environment variables:
+ * LLM_API_KEY=your_api_key_here
+ * LLM_API_BASE_URL=your_base_url (optional - default: ${CONFIG.models[formData.llmModel].baseUrl})
+ * LLM_MODEL=your_model (optional - default: ${CONFIG.models[formData.llmModel].defaultModel})
+ * REDIS_URL=redis://host:port (or REDIS_HOST / REDIS_PORT / REDIS_PASSWORD)
+ *
+ * Note: this template uses the OpenAI SDK with a configurable base URL. It works with
+ * OpenAI directly and with any provider that exposes an OpenAI-compatible API endpoint.
+ *
+ * Run:
+ * node rag_agent.js
+ *
+ * Requires Redis Stack or Redis 8+ with Search module enabled.
+ */
+
+'use strict';
+
+const { createClient, SchemaFieldTypes, VectorAlgorithms } = require('redis');
+const OpenAI = require('openai');
+const readline = require('readline');
+const crypto = require('crypto');
+
+const CHUNK_SIZE = 500;
+const CHUNK_OVERLAP = 50;
+const MAX_SEARCH_RESULTS = 5;
+const MAX_HISTORY_TURNS = 6;
+const CACHE_TTL = 3600;
+// Cosine similarity threshold for cache hits. vector_distance for cosine is in [0, 2]:
+// 0 = identical, 1 = orthogonal, 2 = opposite. A hit fires when distance < (1 - threshold),
+// i.e. when cosine similarity > threshold. Verify this against your node-redis version.
+const CACHE_THRESHOLD = 0.92;
+const VECTOR_DIM = parseInt(process.env.VECTOR_DIM) || 1536;
+const EMBEDDING_MODEL = process.env.EMBEDDING_MODEL || 'text-embedding-3-small';
+
+const DOC_INDEX = 'knowledge_docs';
+const CACHE_INDEX = 'knowledge_cache';
+
+// RediSearch special characters that must be backslash-escaped in query strings.
+// Escaping preserves token meaning — "C\+\+" still matches "C++" in documents —
+// whereas stripping would silently discard the + characters and change query intent.
+// Note: this is a best-effort heuristic. It handles the common cases well but is not
+// a full RediSearch query parser; phrase queries and advanced syntax may still produce
+// unexpected results and will need manual adjustment.
+const FT_SPECIAL_CHARS = /[,.<>{}\[\]"'`:;!@#$%^&*()\-+=~|\/\\?]/g;
+
+// Sample Redis documentation for demonstration.
+// Replace with your own documents or use loadDirectory() to load a folder of .txt / .md files.
+const SAMPLE_DOCS = [
+ {
+ title: 'Redis Data Types',
+ source: 'https://redis.io/docs/latest/develop/data-types/',
+ content: 'Redis supports several core data types suited to different use cases. Strings store ' +
+ 'sequences of bytes up to 512 MB and support atomic increment and decrement operations. ' +
+ 'Lists are linked lists of strings with O(1) push and pop from both ends, useful for ' +
+ 'queues and stacks. Sets are unordered collections of unique strings with O(1) add, ' +
+ 'remove, and membership tests, plus union, intersection, and difference operations. ' +
+ 'Sorted sets add a floating-point score to each member, enabling range queries by score ' +
+ 'or rank in O(log N) time. Hashes store field-value pairs in a single key, ideal for ' +
+ 'representing objects without serialization. Redis also supports Streams for append-only ' +
+ 'logs with consumer groups, HyperLogLog for approximate cardinality estimation, Bitmaps ' +
+ 'for efficient bit-level operations, and Geospatial indexes for location-based queries.'
+ },
+ {
+ title: 'Redis Vector Search',
+ source: 'https://redis.io/docs/latest/develop/ai/search-and-query/vectors/',
+ content: 'Redis Vector Search lets you index and search vector embeddings stored in HASH or JSON ' +
+ 'documents. Two index algorithms are available: FLAT (brute-force, exact results, best for ' +
+ 'smaller datasets) and HNSW (Hierarchical Navigable Small World, approximate results, ' +
+ 'much faster at scale using a multi-layer graph structure). Supported distance metrics are ' +
+ 'cosine similarity, L2 Euclidean distance, and inner product. Hybrid queries combine a ' +
+ 'vector KNN clause with a RediSearch filter expression in a single FT.SEARCH call, ' +
+ 'pre-filtering documents by metadata before ranking by vector distance. This avoids ' +
+ 'post-filtering and keeps result quality high. Vector fields are declared with DIM ' +
+ '(dimension count), TYPE (FLOAT32 or FLOAT64), and DISTANCE_METRIC parameters.'
+ },
+ {
+ title: 'Redis Cloud',
+ source: 'https://redis.io/docs/latest/operate/rc/',
+ content: 'Redis Cloud is the fully managed cloud service for Redis, available on AWS, Google Cloud, ' +
+ 'and Microsoft Azure. It provides automatic clustering, replication, and failover for high ' +
+ 'availability and data durability without operational overhead. Deployment options include ' +
+ 'Redis Stack for development, Redis Enterprise for mission-critical workloads, and active-' +
+ 'active geo-distribution for multi-region deployments with conflict-free replication. ' +
+ 'Built-in monitoring, automated backups, and vertical and horizontal scaling are included. ' +
+ 'A free tier is available for development and testing. Supported modules include RediSearch ' +
+ 'for full-text and vector search, RedisJSON for native JSON documents, RedisTimeSeries for ' +
+ 'time-series data, and RedisBloom for probabilistic structures such as Bloom filters and ' +
+ 'Count-Min sketches.'
+ },
+ {
+ title: 'Redis Context Engine',
+ source: 'https://redis.io/docs/latest/develop/ai/context-engine/',
+ content: 'The Redis Context Engine is a suite of managed services on Redis Cloud that gives AI ' +
+ 'agents the context they need. LangCache provides semantic response caching: incoming ' +
+ 'queries are embedded and compared against cached query-response pairs, returning a cached ' +
+ 'answer when cosine similarity exceeds a configurable threshold to reduce LLM API costs. ' +
+ 'Agent Memory offers two-tier persistent memory with a session layer for recent turns and ' +
+ 'a long-term layer backed by vector search, available as a REST API and Python SDK. ' +
+ 'Context Retriever exposes structured business data as governed tools that agents can ' +
+ 'query reliably without writing custom retrieval logic. Data Integration keeps a Redis ' +
+ 'Cloud database in sync with relational databases in near real time using Change Data ' +
+ 'Capture, so agents always query fresh data.'
+ },
+];
+
+
+class KnowledgeAssistant {
+ constructor(sessionId) {
+ this.sessionId = sessionId || crypto.randomUUID();
+ this.sessionKey = `session:${this.sessionId}:history`;
+ this.client = null;
+ this.llm = null;
+ this.llmModel = null;
+ }
+
+ async init() {
+ if (!process.env.LLM_API_KEY) {
+ throw new Error('LLM_API_KEY environment variable is required');
+ }
+
+ const redisUrl = process.env.REDIS_URL ||
+ `redis://${process.env.REDIS_HOST || 'localhost'}:${process.env.REDIS_PORT || 6379}`;
+
+ this.client = createClient({
+ url: redisUrl,
+ password: process.env.REDIS_PASSWORD || undefined,
+ socket: {
+ reconnectStrategy: (retries) =>
+ retries < 3 ? Math.min(retries * 100, 1000) : new Error('Max retries exceeded')
+ }
+ });
+ this.client.on('error', (err) => console.error('Redis error:', err.message));
+ await this.client.connect();
+ console.log(`Connected to Redis. Session: ${this.sessionId}`);
+
+ this.llm = new OpenAI({
+ apiKey: process.env.LLM_API_KEY,
+ baseURL: process.env.LLM_API_BASE_URL || '${CONFIG.models[formData.llmModel].baseUrl}'
+ });
+ this.llmModel = process.env.LLM_MODEL || '${CONFIG.models[formData.llmModel].defaultModel}';
+ console.log(`LLM configured: ${this.llmModel}`);
+
+ await this._createIndexes();
+ }
+
+ async _createIndexes() {
+ const indexes = [
+ {
+ name: DOC_INDEX,
+ prefix: 'doc:',
+ schema: {
+ doc_id: { type: SchemaFieldTypes.TAG },
+ chunk_id: { type: SchemaFieldTypes.TAG },
+ title: { type: SchemaFieldTypes.TEXT },
+ source: { type: SchemaFieldTypes.TAG },
+ content: { type: SchemaFieldTypes.TEXT },
+ embedding: {
+ type: SchemaFieldTypes.VECTOR,
+ ALGORITHM: VectorAlgorithms.FLAT,
+ TYPE: 'FLOAT32',
+ DIM: VECTOR_DIM,
+ DISTANCE_METRIC: 'COSINE'
+ }
+ }
+ },
+ {
+ name: CACHE_INDEX,
+ prefix: 'ragcache:',
+ schema: {
+ response: { type: SchemaFieldTypes.TEXT },
+ citations: { type: SchemaFieldTypes.TEXT },
+ query_embedding: {
+ type: SchemaFieldTypes.VECTOR,
+ ALGORITHM: VectorAlgorithms.FLAT,
+ TYPE: 'FLOAT32',
+ DIM: VECTOR_DIM,
+ DISTANCE_METRIC: 'COSINE'
+ }
+ }
+ }
+ ];
+
+ for (const { name, prefix, schema } of indexes) {
+ try {
+ await this.client.ft.create(name, schema, { ON: 'HASH', PREFIX: [prefix] });
+ } catch (err) {
+ if (!err.message.includes('Index already exists')) throw err;
+ }
+ }
+ }
+
+ // ── Document ingestion ────────────────────────────────────────────────────
+
+ async loadDirectory(dirPath, extensions = ['.txt', '.md']) {
+ const fs = require('fs').promises;
+ const path = require('path');
+
+ const walk = async (dir) => {
+ const entries = await fs.readdir(dir, { withFileTypes: true });
+ const files = [];
+ for (const entry of entries) {
+ const full = path.join(dir, entry.name);
+ if (entry.isDirectory()) files.push(...await walk(full));
+ else if (extensions.includes(path.extname(entry.name).toLowerCase())) files.push(full);
+ }
+ return files;
+ };
+
+ const files = (await walk(dirPath)).sort();
+ let loaded = 0;
+ for (const file of files) {
+ try {
+ const content = (await fs.readFile(file, 'utf-8')).trim();
+ if (content) {
+ await this.ingestDocument(content, path.basename(file, path.extname(file)), file);
+ loaded++;
+ }
+ } catch (err) {
+ console.warn(`Skipping ${file}: ${err.message}`);
+ }
+ }
+ console.log(`Loaded ${loaded} document(s) from ${dirPath}`);
+ return loaded;
+ }
+
+ _chunkText(text) {
+ // Character-based chunking is simple but not token-aware. For production,
+ // consider a token-counting library (e.g. js-tiktoken).
+ const chunks = [];
+ let start = 0;
+ while (start < text.length) {
+ chunks.push(text.slice(start, start + CHUNK_SIZE));
+ start += CHUNK_SIZE - CHUNK_OVERLAP;
+ }
+ return chunks;
+ }
+
+ async _embed(text) {
+ const resp = await this.llm.embeddings.create({
+ model: EMBEDDING_MODEL,
+ input: text.slice(0, 8000)
+ });
+ return resp.data[0].embedding;
+ }
+
+ _toBuffer(embedding) {
+ const buf = Buffer.allocUnsafe(embedding.length * 4);
+ embedding.forEach((v, i) => buf.writeFloatLE(v, i * 4));
+ return buf;
+ }
+
+ async ingestDocument(content, title, source = '') {
+ const docId = crypto.randomUUID();
+ const chunks = this._chunkText(content);
+ for (let i = 0; i < chunks.length; i++) {
+ const chunkId = `${docId}:${i}`;
+ const embedding = await this._embed(chunks[i]);
+ await this.client.hSet(`doc:${chunkId}`, {
+ doc_id: docId,
+ chunk_id: chunkId,
+ title,
+ source,
+ content: chunks[i],
+ embedding: this._toBuffer(embedding)
+ });
+ }
+ console.log(`Ingested '${title}': ${chunks.length} chunk(s) (doc_id: ${docId})`);
+ return docId;
+ }
+
+ // ── Hybrid search ─────────────────────────────────────────────────────────
+ // First pass: FT.SEARCH with a text pre-filter and an inline KNN clause —
+ // "(text_terms)=>[KNN k @embedding $BLOB AS distance]" — so Redis applies
+ // both filters in a single round trip. This is more Redis-native than running
+ // two separate queries and fusing the results in JavaScript.
+ // Second pass (fallback): if the text filter is too selective and returns nothing,
+ // a pure vector search is issued so queries always return results when documents exist.
+
+ _sanitizeFtQuery(text) {
+ // Escape RediSearch special characters rather than strip them, so tokens like
+ // "C++", "redis.io", and non-English text survive into the query intact.
+ // We OR-join per-word terms for recall; the KNN step handles ranking.
+ // This is a best-effort heuristic — see FT_SPECIAL_CHARS comment above.
+ const terms = text.split(/\s+/).filter(Boolean);
+ if (terms.length === 0) return '*';
+ const escaped = terms.map(t => t.replace(FT_SPECIAL_CHARS, '\\$&'));
+ return escaped.slice(0, 10).join(' | '); // cap at 10 terms
+ }
+
+ async _runKnnQuery(queryStr, queryEmbedding, topK) {
+ const results = await this.client.ft.search(DOC_INDEX, queryStr, {
+ PARAMS: { K: topK, BLOB: this._toBuffer(queryEmbedding) },
+ SORTBY: { BY: 'distance', DIRECTION: 'ASC' },
+ DIALECT: 2,
+ RETURN: ['chunk_id', 'doc_id', 'title', 'source', 'content', 'distance']
+ });
+ return (results?.documents ?? []).map(d => ({ id: d.id, ...d.value }));
+ }
+
+ async _hybridSearch(queryText, queryEmbedding, topK = MAX_SEARCH_RESULTS) {
+ const safeText = this._sanitizeFtQuery(queryText);
+ if (safeText !== '*') {
+ try {
+ const results = await this._runKnnQuery(
+ `(${safeText})=>[KNN $K @embedding $BLOB AS distance]`,
+ queryEmbedding, topK
+ );
+ if (results.length > 0) return results;
+ } catch (err) {
+ console.error('Hybrid search error:', err.message);
+ }
+ }
+ // Fall back to pure vector search if the text filter returned nothing
+ return this._runKnnQuery(
+ `*=>[KNN $K @embedding $BLOB AS distance]`,
+ queryEmbedding, topK
+ );
+ }
+
+ // ── Semantic cache ────────────────────────────────────────────────────────
+
+ async _checkCache(queryEmbedding) {
+ const results = await this.client.ft.search(
+ CACHE_INDEX,
+ `*=>[KNN 1 @query_embedding $BLOB AS distance]`,
+ {
+ PARAMS: { BLOB: this._toBuffer(queryEmbedding) },
+ SORTBY: { BY: 'distance', DIRECTION: 'ASC' },
+ DIALECT: 2,
+ RETURN: ['response', 'citations', 'distance']
+ }
+ );
+ const top = results?.documents?.[0];
+ if (!top) return null;
+ // vector_distance for cosine: 0=identical, 1=orthogonal. Hit when similarity > CACHE_THRESHOLD.
+ const dist = parseFloat(top.value.distance ?? '1');
+ if (dist < (1 - CACHE_THRESHOLD)) {
+ return {
+ response: top.value.response,
+ citations: JSON.parse(top.value.citations ?? '[]')
+ };
+ }
+ return null;
+ }
+
+ async _storeCache(queryEmbedding, response, citations) {
+ const key = `ragcache:${crypto.randomUUID()}`;
+ await this.client.hSet(key, {
+ response,
+ citations: JSON.stringify(citations),
+ query_embedding: this._toBuffer(queryEmbedding)
+ });
+ await this.client.expire(key, CACHE_TTL);
+ }
+
+ // ── Session memory ────────────────────────────────────────────────────────
+
+ async _getHistory() {
+ const raw = await this.client.lRange(this.sessionKey, 0, MAX_HISTORY_TURNS * 2 - 1);
+ return raw.reverse().map(s => {
+ try { return JSON.parse(s); } catch { return null; }
+ }).filter(Boolean);
+ }
+
+ async _saveHistory(role, content) {
+ await this.client.lPush(this.sessionKey, JSON.stringify({ role, content }));
+ await this.client.lTrim(this.sessionKey, 0, MAX_HISTORY_TURNS * 2 - 1);
+ }
+
+ // ── Query ─────────────────────────────────────────────────────────────────
+
+ async query(userQuery) {
+ const queryEmbedding = await this._embed(userQuery);
+
+ const cached = await this._checkCache(queryEmbedding);
+ if (cached) {
+ console.log('[cache hit]');
+ await this._saveHistory('user', userQuery);
+ await this._saveHistory('assistant', cached.response);
+ return cached;
+ }
+
+ const topChunks = await this._hybridSearch(userQuery, queryEmbedding);
+ if (topChunks.length === 0) {
+ return { response: 'No documents found. Please ingest documents before querying.', citations: [] };
+ }
+
+ const contextParts = [];
+ const citations = [];
+ topChunks.forEach((chunk, i) => {
+ contextParts.push(`[${i + 1}] ${chunk.title ?? 'Unknown'}\n${chunk.content ?? ''}`);
+ citations.push({
+ index: i + 1,
+ title: chunk.title ?? 'Unknown',
+ source: chunk.source ?? '',
+ chunk_id: chunk.chunk_id ?? '',
+ doc_id: chunk.doc_id ?? ''
+ });
+ });
+
+ const history = await this._getHistory();
+ const messages = [
+ {
+ role: 'system',
+ content: 'You are a helpful knowledge assistant. Answer using only the provided context. ' +
+ 'Reference sources as [1], [2], etc. If the context lacks the answer, say so clearly.'
+ },
+ ...history,
+ { role: 'user', content: `Context:\n${contextParts.join('\n\n')}\n\nQuestion: ${userQuery}` }
+ ];
+
+ const completion = await this.llm.chat.completions.create({
+ model: this.llmModel,
+ messages
+ });
+ const answer = completion.choices[0].message.content;
+
+ await this._storeCache(queryEmbedding, answer, citations);
+ await this._saveHistory('user', userQuery);
+ await this._saveHistory('assistant', answer);
+ return { response: answer, citations };
+ }
+}
+
+async function main() {
+ const agent = new KnowledgeAssistant();
+ await agent.init();
+
+ // Only ingest sample documents when the index is empty so re-running the agent
+ // does not re-embed the same content on every startup.
+ // To load your own documents instead: await agent.loadDirectory('path/to/docs');
+ const indexInfo = await agent.client.ft.info(DOC_INDEX);
+ if (parseInt(indexInfo.numDocs ?? '0') === 0) {
+ console.log('Empty index — ingesting sample documents...');
+ for (const doc of SAMPLE_DOCS) {
+ await agent.ingestDocument(doc.content, doc.title, doc.source);
+ }
+ } else {
+ console.log(`Index already contains ${indexInfo.numDocs} document(s). Skipping ingestion.`);
+ }
+
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+ console.log('\nKnowledge Assistant ready. Type your questions or "quit" to exit.\n');
+
+ const ask = () => {
+ rl.question('Question: ', async (input) => {
+ const trimmed = input.trim();
+ if (['quit', 'exit', 'bye'].includes(trimmed.toLowerCase())) {
+ console.log('Goodbye!');
+ rl.close();
+ await agent.client.quit();
+ return;
+ }
+ if (!trimmed) {
+ ask();
+ return;
+ }
+ try {
+ const { response, citations } = await agent.query(trimmed);
+ console.log(`\n${response}`);
+ if (citations.length > 0) {
+ console.log('\nSources:');
+ citations.forEach(c => {
+ const src = c.source ? ` — ${c.source}` : '';
+ console.log(` [${c.index}] ${c.title}${src}`);
+ console.log(` chunk_id: ${c.chunk_id}`);
+ });
+ }
+ console.log();
+ } catch (err) {
+ console.error('Error:', err.message);
+ }
+ ask();
+ });
+ };
+ ask();
+}
+
+main().catch(err => {
+ console.error('Failed to initialize:', err.message);
+ process.exit(1);
+});
diff --git a/static/code/agent-templates/python/rag_agent.py b/static/code/agent-templates/python/rag_agent.py
new file mode 100644
index 0000000000..2c522a8330
--- /dev/null
+++ b/static/code/agent-templates/python/rag_agent.py
@@ -0,0 +1,442 @@
+'''
+Redis Knowledge Assistant (RAG Agent)
+
+Features:
+- Ingest documents with automatic chunking and embedding
+- Redis-native hybrid retrieval: text pre-filter + KNN vector search, with a fallback vector pass
+- Semantic cache: skip LLM for similar queries using cached responses (TTL-based expiry)
+- Per-session conversation memory in a Redis List
+- Citations: each answer references source documents with title, source URL, and chunk ID
+
+To run this code:
+ Install dependencies:
+ pip install redisvl[all] redis openai
+
+ Set environment variables:
+ export LLM_API_KEY=your_api_key_here
+ export LLM_API_BASE_URL=your_${formData.llmModel.toLowerCase()}_api_base_url
+ (optional - default: ${CONFIG.models[formData.llmModel].baseUrl})
+ export LLM_MODEL=your_${formData.llmModel.toLowerCase()}_model
+ (optional - default: ${CONFIG.models[formData.llmModel].defaultModel})
+ export REDIS_HOST=your_redis_host
+ export REDIS_PORT=your_redis_port
+ export REDIS_PASSWORD=your_redis_password
+
+ Note: this template uses the OpenAI SDK with a configurable base URL. It works with
+ OpenAI directly and with any provider that exposes an OpenAI-compatible API endpoint.
+
+ Requires Redis Stack or Redis 8+ with Search module enabled.
+'''
+
+import json
+import os
+import re
+import struct
+import uuid
+
+import openai
+import redis
+from redis.commands.search.query import Query as FTQuery
+from redisvl.index import SearchIndex
+from redisvl.schema import IndexSchema
+
+CHUNK_SIZE = 500
+CHUNK_OVERLAP = 50
+MAX_SEARCH_RESULTS = 5
+MAX_HISTORY_TURNS = 6
+CACHE_TTL = 3600
+# Cosine similarity threshold for cache hits. vector_distance for cosine is in [0, 2]:
+# 0 = identical, 1 = orthogonal, 2 = opposite. A hit fires when distance < (1 - threshold),
+# i.e. when cosine similarity > threshold. Treat this as a value to test, not assume.
+CACHE_THRESHOLD = 0.92
+VECTOR_DIM = int(os.getenv('VECTOR_DIM', '1536'))
+EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small')
+
+# RediSearch special characters that must be backslash-escaped in query strings.
+# Escaping preserves token meaning — "C\+\+" still matches "C++" in documents —
+# whereas stripping would silently discard the + characters and change query intent.
+# Note: this is a best-effort heuristic. It handles the common cases well but is
+# not a full RediSearch query parser; phrase queries and advanced syntax may still
+# need manual adjustment.
+_FT_SPECIAL = re.compile(r'([,.<>{}\[\]"\':;!@#$%^&*()\-+=~|/\\?])')
+
+_DOC_SCHEMA = {
+ 'index': {'name': 'knowledge_docs', 'prefix': 'doc', 'storage_type': 'hash'},
+ 'fields': [
+ {'name': 'doc_id', 'type': 'tag'},
+ {'name': 'chunk_id', 'type': 'tag'},
+ {'name': 'title', 'type': 'text'},
+ {'name': 'source', 'type': 'tag'},
+ {'name': 'content', 'type': 'text'},
+ {'name': 'embedding', 'type': 'vector',
+ 'attrs': {'dims': VECTOR_DIM, 'algorithm': 'flat',
+ 'distance_metric': 'cosine', 'datatype': 'float32'}}
+ ]
+}
+
+_CACHE_SCHEMA = {
+ 'index': {'name': 'knowledge_cache', 'prefix': 'ragcache', 'storage_type': 'hash'},
+ 'fields': [
+ {'name': 'response', 'type': 'text'},
+ {'name': 'citations', 'type': 'text'},
+ {'name': 'query_embedding', 'type': 'vector',
+ 'attrs': {'dims': VECTOR_DIM, 'algorithm': 'flat',
+ 'distance_metric': 'cosine', 'datatype': 'float32'}}
+ ]
+}
+
+
+# Sample Redis documentation for demonstration.
+# Replace with your own documents or use load_directory() to load a folder of .txt / .md files.
+SAMPLE_DOCS = [
+ {
+ 'title': 'Redis Data Types',
+ 'source': 'https://redis.io/docs/latest/develop/data-types/',
+ 'content': (
+ 'Redis supports several core data types suited to different use cases. Strings store '
+ 'sequences of bytes up to 512 MB and support atomic increment and decrement operations. '
+ 'Lists are linked lists of strings with O(1) push and pop from both ends, useful for '
+ 'queues and stacks. Sets are unordered collections of unique strings with O(1) add, '
+ 'remove, and membership tests, plus union, intersection, and difference operations. '
+ 'Sorted sets add a floating-point score to each member, enabling range queries by score '
+ 'or rank in O(log N) time. Hashes store field-value pairs in a single key, ideal for '
+ 'representing objects without serialization. Redis also supports Streams for append-only '
+ 'logs with consumer groups, HyperLogLog for approximate cardinality estimation, Bitmaps '
+ 'for efficient bit-level operations, and Geospatial indexes for location-based queries.'
+ )
+ },
+ {
+ 'title': 'Redis Vector Search',
+ 'source': 'https://redis.io/docs/latest/develop/ai/search-and-query/vectors/',
+ 'content': (
+ 'Redis Vector Search lets you index and search vector embeddings stored in HASH or JSON '
+ 'documents. Two index algorithms are available: FLAT (brute-force, exact results, best for '
+ 'smaller datasets) and HNSW (Hierarchical Navigable Small World, approximate results, '
+ 'much faster at scale using a multi-layer graph structure). Supported distance metrics are '
+ 'cosine similarity, L2 Euclidean distance, and inner product. Hybrid queries combine a '
+ 'vector KNN clause with a RediSearch filter expression in a single FT.SEARCH call, '
+ 'pre-filtering documents by metadata before ranking by vector distance. This avoids '
+ 'post-filtering and keeps result quality high. Vector fields are declared with DIM '
+ '(dimension count), TYPE (FLOAT32 or FLOAT64), and DISTANCE_METRIC parameters.'
+ )
+ },
+ {
+ 'title': 'Redis Cloud',
+ 'source': 'https://redis.io/docs/latest/operate/rc/',
+ 'content': (
+ 'Redis Cloud is the fully managed cloud service for Redis, available on AWS, Google Cloud, '
+ 'and Microsoft Azure. It provides automatic clustering, replication, and failover for high '
+ 'availability and data durability without operational overhead. Deployment options include '
+ 'Redis Stack for development, Redis Enterprise for mission-critical workloads, and active-'
+ 'active geo-distribution for multi-region deployments with conflict-free replication. '
+ 'Built-in monitoring, automated backups, and vertical and horizontal scaling are included. '
+ 'A free tier is available for development and testing. Supported modules include RediSearch '
+ 'for full-text and vector search, RedisJSON for native JSON documents, RedisTimeSeries for '
+ 'time-series data, and RedisBloom for probabilistic structures such as Bloom filters and '
+ 'Count-Min sketches.'
+ )
+ },
+ {
+ 'title': 'Redis Context Engine',
+ 'source': 'https://redis.io/docs/latest/develop/ai/context-engine/',
+ 'content': (
+ 'The Redis Context Engine is a suite of managed services on Redis Cloud that gives AI '
+ 'agents the context they need. LangCache provides semantic response caching: incoming '
+ 'queries are embedded and compared against cached query-response pairs, returning a cached '
+ 'answer when cosine similarity exceeds a configurable threshold to reduce LLM API costs. '
+ 'Agent Memory offers two-tier persistent memory with a session layer for recent turns and '
+ 'a long-term layer backed by vector search, available as a REST API and Python SDK. '
+ 'Context Retriever exposes structured business data as governed tools that agents can '
+ 'query reliably without writing custom retrieval logic. Data Integration keeps a Redis '
+ 'Cloud database in sync with relational databases in near real time using Change Data '
+ 'Capture, so agents always query fresh data.'
+ )
+ },
+]
+
+
+class KnowledgeAssistant:
+ def __init__(self, session_id=None):
+ self.session_id = session_id or str(uuid.uuid4())
+ self.session_key = f'session:{self.session_id}:history'
+
+ self.llm_api_key = os.getenv('LLM_API_KEY')
+ if not self.llm_api_key:
+ raise ValueError('LLM_API_KEY environment variable is required')
+ self.llm_base_url = os.getenv('LLM_API_BASE_URL', '${CONFIG.models[formData.llmModel].baseUrl}')
+ self.llm_model = os.getenv('LLM_MODEL', '${CONFIG.models[formData.llmModel].defaultModel}')
+
+ try:
+ # Single client with decode_responses=False handles both text and binary (embedding) fields.
+ self.client = redis.Redis(
+ host=os.getenv('REDIS_HOST', 'localhost'),
+ port=int(os.getenv('REDIS_PORT', 6379)),
+ username=os.getenv('REDIS_USERNAME', 'default'),
+ password=os.getenv('REDIS_PASSWORD', ''),
+ decode_responses=False,
+ socket_connect_timeout=5
+ )
+ self.client.ping()
+ print(f'Connected to Redis. Session: {self.session_id}')
+ except redis.ConnectionError as e:
+ print(f'Failed to connect to Redis: {e}')
+ raise
+
+ self.llm = openai.OpenAI(api_key=self.llm_api_key, base_url=self.llm_base_url)
+ print(f'LLM configured: {self.llm_model}')
+
+ # redisvl is used only for index creation; all queries use redis-py directly.
+ doc_index = SearchIndex(IndexSchema.from_dict(_DOC_SCHEMA), redis_client=self.client)
+ cache_index = SearchIndex(IndexSchema.from_dict(_CACHE_SCHEMA), redis_client=self.client)
+ doc_index.create(overwrite=False)
+ cache_index.create(overwrite=False)
+
+ # ── Document ingestion ────────────────────────────────────────────────────
+
+ def load_directory(self, path, extensions=('.txt', '.md')):
+ """Ingest all matching files from a directory tree. Each file becomes one document."""
+ import pathlib
+ loaded = 0
+ for filepath in sorted(pathlib.Path(path).rglob('*')):
+ if filepath.suffix.lower() in extensions and filepath.is_file():
+ try:
+ content = filepath.read_text(encoding='utf-8', errors='ignore').strip()
+ if content:
+ self.ingest_document(content, title=filepath.stem, source=str(filepath))
+ loaded += 1
+ except Exception as e:
+ print(f'Skipping {filepath}: {e}')
+ print(f'Loaded {loaded} document(s) from {path}')
+ return loaded
+
+ def _chunk_text(self, text):
+ # Character-based chunking is simple but not token-aware. For production,
+ # consider tiktoken or RecursiveCharacterTextSplitter from langchain.
+ chunks, start = [], 0
+ while start < len(text):
+ chunks.append(text[start:start + CHUNK_SIZE])
+ start += CHUNK_SIZE - CHUNK_OVERLAP
+ return chunks
+
+ def _embed(self, text):
+ resp = self.llm.embeddings.create(model=EMBEDDING_MODEL, input=text[:8000])
+ return resp.data[0].embedding
+
+ def _to_bytes(self, embedding):
+ return struct.pack(f'{len(embedding)}f', *embedding)
+
+ def _decode_doc(self, doc):
+ def d(val):
+ return val.decode('utf-8', errors='replace') if isinstance(val, bytes) else (val or '')
+ return {
+ 'id': d(doc.id),
+ 'chunk_id': d(getattr(doc, 'chunk_id', '')),
+ 'doc_id': d(getattr(doc, 'doc_id', '')),
+ 'title': d(getattr(doc, 'title', '')),
+ 'source': d(getattr(doc, 'source', '')),
+ 'content': d(getattr(doc, 'content', '')),
+ 'distance': d(getattr(doc, 'distance', '1.0'))
+ }
+
+ def ingest_document(self, content, title, source=''):
+ doc_id = str(uuid.uuid4())
+ chunks = self._chunk_text(content)
+ for i, chunk in enumerate(chunks):
+ chunk_id = f'{doc_id}:{i}'
+ embedding = self._embed(chunk)
+ self.client.hset(f'doc:{chunk_id}', mapping={
+ b'doc_id': doc_id.encode(),
+ b'chunk_id': chunk_id.encode(),
+ b'title': title.encode(),
+ b'source': source.encode(),
+ b'content': chunk.encode(),
+ b'embedding': self._to_bytes(embedding)
+ })
+ print(f"Ingested '{title}': {len(chunks)} chunk(s) (doc_id: {doc_id})")
+ return doc_id
+
+ # ── Hybrid search ─────────────────────────────────────────────────────────
+ # First pass: FT.SEARCH with a text pre-filter and an inline KNN clause —
+ # "(text_terms) => [KNN k @embedding $BLOB AS distance]" — so Redis applies
+ # both filters in a single round trip. This is more Redis-native than running
+ # two separate queries and fusing the results in Python.
+ # Second pass (fallback): if the text filter is too selective and returns nothing,
+ # a pure vector search is issued so queries always return results when documents exist.
+
+ def _sanitize_ft_query(self, text):
+ # Escape RediSearch special characters rather than strip them, so tokens like
+ # "C++", "redis.io", and non-English text survive into the query intact.
+ # We OR-join per-word terms for recall; the KNN step handles ranking.
+ # This is a best-effort heuristic — see _FT_SPECIAL comment above.
+ terms = text.split()
+ if not terms:
+ return '*'
+ escaped = [_FT_SPECIAL.sub(r'\\\1', t) for t in terms]
+ return ' | '.join(escaped[:10]) # cap at 10 terms
+
+ def _run_knn_query(self, query_str, query_embedding, top_k):
+ return self.client.ft('knowledge_docs').search(
+ FTQuery(query_str)
+ .sort_by('distance', asc=True)
+ .paging(0, top_k)
+ .return_fields('chunk_id', 'doc_id', 'title', 'source', 'content', 'distance')
+ .dialect(2),
+ query_params={'K': top_k, 'BLOB': self._to_bytes(query_embedding)}
+ )
+
+ def _hybrid_search(self, query_text, query_embedding, top_k=MAX_SEARCH_RESULTS):
+ safe_text = self._sanitize_ft_query(query_text)
+ if safe_text != '*':
+ try:
+ result = self._run_knn_query(
+ f'({safe_text})=>[KNN $K @embedding $BLOB AS distance]',
+ query_embedding, top_k
+ )
+ if result.docs:
+ return [self._decode_doc(d) for d in result.docs]
+ except Exception as e:
+ print(f'Hybrid search error: {e}')
+ # Fall back to pure vector search if the text filter returned nothing
+ result = self._run_knn_query(
+ '*=>[KNN $K @embedding $BLOB AS distance]',
+ query_embedding, top_k
+ )
+ return [self._decode_doc(d) for d in result.docs]
+
+ # ── Semantic cache ────────────────────────────────────────────────────────
+
+ def _check_cache(self, query_embedding):
+ try:
+ result = self.client.ft('knowledge_cache').search(
+ FTQuery('*=>[KNN 1 @query_embedding $BLOB AS distance]')
+ .sort_by('distance', asc=True)
+ .paging(0, 1)
+ .return_fields('response', 'citations', 'distance')
+ .dialect(2),
+ query_params={'BLOB': self._to_bytes(query_embedding)}
+ )
+ if result.docs:
+ doc = self._decode_doc(result.docs[0])
+ # vector_distance for cosine: 0=identical, 1=orthogonal. Hit when similarity > CACHE_THRESHOLD.
+ if float(doc.get('distance', '1.0')) < (1.0 - CACHE_THRESHOLD):
+ return doc.get('response', ''), json.loads(doc.get('citations', '[]'))
+ except Exception:
+ pass
+ return None, None
+
+ def _store_cache(self, query_embedding, response, citations):
+ key = f'ragcache:{uuid.uuid4()}'
+ self.client.hset(key, mapping={
+ b'response': response.encode(),
+ b'citations': json.dumps(citations).encode(),
+ b'query_embedding': self._to_bytes(query_embedding)
+ })
+ self.client.expire(key, CACHE_TTL)
+
+ # ── Session memory ────────────────────────────────────────────────────────
+
+ def _get_history(self):
+ raw = self.client.lrange(self.session_key, 0, MAX_HISTORY_TURNS * 2 - 1)
+ messages = []
+ for item in reversed(raw):
+ try:
+ messages.append(json.loads(item.decode() if isinstance(item, bytes) else item))
+ except Exception:
+ pass
+ return messages
+
+ def _save_history(self, role, content):
+ self.client.lpush(self.session_key, json.dumps({'role': role, 'content': content}).encode())
+ self.client.ltrim(self.session_key, 0, MAX_HISTORY_TURNS * 2 - 1)
+
+ # ── Query ─────────────────────────────────────────────────────────────────
+
+ def query(self, user_query):
+ query_embedding = self._embed(user_query)
+
+ cached_response, cached_citations = self._check_cache(query_embedding)
+ if cached_response:
+ print('[cache hit]')
+ self._save_history('user', user_query)
+ self._save_history('assistant', cached_response)
+ return cached_response, cached_citations
+
+ top_chunks = self._hybrid_search(user_query, query_embedding)
+ if not top_chunks:
+ return 'No documents found. Please ingest documents before querying.', []
+
+ context_parts, citations = [], []
+ for i, chunk in enumerate(top_chunks):
+ title = chunk.get('title', 'Unknown')
+ source = chunk.get('source', '')
+ context_parts.append(f'[{i + 1}] {title}\n{chunk.get("content", "")}')
+ citations.append({
+ 'index': i + 1,
+ 'title': title,
+ 'source': source,
+ 'chunk_id': chunk.get('chunk_id', ''),
+ 'doc_id': chunk.get('doc_id', '')
+ })
+
+ messages = [
+ {'role': 'system', 'content': (
+ 'You are a helpful knowledge assistant. Answer using only the provided context. '
+ 'Reference sources as [1], [2], etc. If the context lacks the answer, say so clearly.'
+ )},
+ *self._get_history(),
+ {'role': 'user', 'content': f'Context:\n{chr(10).join(context_parts)}\n\nQuestion: {user_query}'}
+ ]
+
+ response = self.llm.chat.completions.create(model=self.llm_model, messages=messages)
+ answer = response.choices[0].message.content
+
+ self._store_cache(query_embedding, answer, citations)
+ self._save_history('user', user_query)
+ self._save_history('assistant', answer)
+ return answer, citations
+
+
+if __name__ == '__main__':
+ try:
+ agent = KnowledgeAssistant()
+
+ # Only ingest sample documents when the index is empty so re-running the agent
+ # does not re-embed the same content on every startup.
+ # To load your own documents instead: agent.load_directory('path/to/docs')
+ index_info = agent.client.ft('knowledge_docs').info()
+ if int(index_info.get('num_docs', 0)) == 0:
+ print('Empty index — ingesting sample documents...')
+ for doc in SAMPLE_DOCS:
+ agent.ingest_document(doc['content'], doc['title'], doc['source'])
+ else:
+ print(f"Index already contains {index_info.get('num_docs')} document(s). Skipping ingestion.")
+
+ print('\nKnowledge Assistant ready. Type your questions or "quit" to exit.\n')
+ while True:
+ try:
+ user_input = input('Question: ').strip()
+ if user_input.lower() in ['quit', 'exit', 'bye']:
+ print('Goodbye!')
+ break
+ if not user_input:
+ continue
+ answer, citations = agent.query(user_input)
+ print(f'\n{answer}')
+ if citations:
+ print('\nSources:')
+ for c in citations:
+ src = f' — {c["source"]}' if c['source'] else ''
+ print(f' [{c["index"]}] {c["title"]}{src}')
+ print(f' chunk_id: {c["chunk_id"]}')
+ print()
+ except KeyboardInterrupt:
+ print('\nGoodbye!')
+ break
+ except Exception as e:
+ print(f'Error: {e}')
+ except ValueError as e:
+ print(f'Configuration error: {e}')
+ exit(1)
+ except Exception as e:
+ print(f'Failed to initialize: {e}')
+ exit(1)
diff --git a/static/js/agent-builder.js b/static/js/agent-builder.js
index 91f37ebee3..ff21b0ff36 100644
--- a/static/js/agent-builder.js
+++ b/static/js/agent-builder.js
@@ -20,6 +20,12 @@
description: "A chatbot that maintains conversation history using semantic message history and provides contextual responses.",
features: ["Conversation memory", "Context awareness", "Multi-turn dialogue"],
keywords: ["chat", "conversation", "assistant", "bot", "chatbot", "talk", "dialogue"]
+ },
+ rag: {
+ name: "Knowledge Assistant",
+ description: "A RAG agent that ingests documents, uses Redis-native hybrid retrieval (text pre-filter + vector search), semantic caching, and session memory to answer questions with citations.",
+ features: ["Document ingestion with chunking", "Hybrid vector + full-text search", "Semantic caching", "Citations"],
+ keywords: ["rag", "knowledge", "documents", "search", "retrieval", "qa", "question answering", "citations", "hybrid"]
}
},
languages: {
@@ -286,16 +292,18 @@
let suggestions = [];
switch (conversationState.step) {
- case 'agent-type':
+ case 'agent-type': {
+ const agentIcons = { recommendation: '🛍️', conversational: '💬', rag: '🔍' };
suggestions = Object.entries(CONFIG.agentTypes).map(([key, config]) => ({
value: key,
label: config.name,
- icon: key === 'recommendation' ? '🛍️' : '💬'
+ icon: agentIcons[key] || '🤖'
})).filter(s =>
s.label.toLowerCase().includes(lowerInput) ||
CONFIG.agentTypes[s.value].keywords.some(k => k.includes(lowerInput))
);
break;
+ }
case 'language':
suggestions = Object.entries(CONFIG.languages).map(([key, config]) => ({
@@ -381,7 +389,8 @@
// Generate a default agent name based on the type
const defaultNames = {
recommendation: 'RecommendationEngine',
- conversational: 'ConversationalAgent'
+ conversational: 'ConversationalAgent',
+ rag: 'KnowledgeAssistant'
};
conversationState.selections.agentName = defaultNames[selectedType] || 'RedisAgent';
@@ -398,7 +407,8 @@
} else {
addMessage("I didn't understand that. Please choose one of the agent types:", 'bot', [
{ value: 'recommendation', label: '🛍️ Recommendation Engine' },
- { value: 'conversational', label: '💬 Conversational Assistant' }
+ { value: 'conversational', label: '💬 Conversational Assistant' },
+ { value: 'rag', label: '🔍 Knowledge Assistant' }
]);
}
}
@@ -426,8 +436,8 @@
}
if (selectedLang) {
- // Check if it's Python (fully supported)
- if (selectedLang === 'python') {
+ // Check if it's a supported language
+ if (selectedLang === 'python' || selectedLang === 'javascript') {
conversationState.selections.programmingLanguage = selectedLang;
const config = CONFIG.languages[selectedLang];
@@ -445,9 +455,10 @@
const config = CONFIG.languages[selectedLang];
const languageName = config.name;
- addMessage(`${languageName} support is coming soon. Currently, only Python is fully supported.`, 'bot');
- addMessage(`Would you like to build a Python agent instead?`, 'bot', [
- { value: 'python', label: 'Yes, use Python' },
+ addMessage(`${languageName} support is coming soon. Currently, Python and JavaScript are supported.`, 'bot');
+ addMessage(`Would you like to build a Python or JavaScript agent instead?`, 'bot', [
+ { value: 'python', label: '🐍 Yes, use Python' },
+ { value: 'javascript', label: '🟨 Yes, use JavaScript' },
{ value: 'wait', label: 'I\'ll wait for ' + languageName }
]);
}
@@ -520,8 +531,8 @@
java: '.java',
csharp: '.cs'
};
- const base = window.HUGO_BASEURL || '';
- const filename = `${base}code/agent-templates/${formData.programmingLanguage}/${formData.agentType}_agent${fileExtensions[formData.programmingLanguage]}`;
+ const templateBase = (window.AGENT_TEMPLATE_BASE || '/code/agent-templates').replace(/\/$/, '');
+ const filename = `${templateBase}/${formData.programmingLanguage}/${formData.agentType}_agent${fileExtensions[formData.programmingLanguage]}`;
return loadTemplateFile(filename, formData) || genericTemplates[formData.programmingLanguage](formData);
}