diff --git a/content/develop/ai/agent-builder/_index.md b/content/develop/ai/agent-builder/_index.md index 8b27ce90fa..45f87ce306 100644 --- a/content/develop/ai/agent-builder/_index.md +++ b/content/develop/ai/agent-builder/_index.md @@ -30,16 +30,17 @@ Redis powers these capabilities with fast, reliable data storage and retrieval t ## What you can build -Choose from two types of intelligent agents: +Choose from three types of intelligent agents: - **Recommendation engines**: Personalized product and content recommendations - **Conversational assistants**: Chatbots with memory and context awareness +- **Knowledge assistants**: RAG agents that ingest documents, answer questions with citations, and use semantic caching The agent builder will generate complete, working code examples for your chosen agent type. ## Features -- **Multiple programming languages**: Generate code in Python, with JavaScript (Node.js), Java, and C# coming soon +- **Multiple programming languages**: Generate code in Python and JavaScript (Node.js), with Java and C# coming soon - **LLM integration**: Support for OpenAI, Anthropic Claude, and Llama 2 - **Redis optimized**: Uses Redis data structures for optimal performance diff --git a/layouts/shortcodes/agent-builder.html b/layouts/shortcodes/agent-builder.html index 8adeffb96b..f95ec26073 100644 --- a/layouts/shortcodes/agent-builder.html +++ b/layouts/shortcodes/agent-builder.html @@ -24,6 +24,7 @@

Build Your AI Agent + @@ -90,5 +91,7 @@

Generated Agent Code

+ + diff --git a/static/code/agent-templates/javascript/rag_agent.js b/static/code/agent-templates/javascript/rag_agent.js new file mode 100644 index 0000000000..3db0a8dc4e --- /dev/null +++ b/static/code/agent-templates/javascript/rag_agent.js @@ -0,0 +1,496 @@ +/* + * Redis Knowledge Assistant (RAG Agent) + * + * Features: + * - Ingest documents with automatic chunking and embedding + * - Redis-native hybrid retrieval: text pre-filter + KNN vector search, with a fallback vector pass + * - Semantic cache: skip LLM for similar queries (TTL-based expiry) + * - Per-session conversation memory in a Redis List + * - Citations: each answer references source documents with title, source URL, and chunk ID + * + * To run this code: + * Install dependencies: + * npm install redis openai + * + * Set environment variables: + * LLM_API_KEY=your_api_key_here + * LLM_API_BASE_URL=your_base_url (optional - default: ${CONFIG.models[formData.llmModel].baseUrl}) + * LLM_MODEL=your_model (optional - default: ${CONFIG.models[formData.llmModel].defaultModel}) + * REDIS_URL=redis://host:port (or REDIS_HOST / REDIS_PORT / REDIS_PASSWORD) + * + * Note: this template uses the OpenAI SDK with a configurable base URL. It works with + * OpenAI directly and with any provider that exposes an OpenAI-compatible API endpoint. + * + * Run: + * node rag_agent.js + * + * Requires Redis Stack or Redis 8+ with Search module enabled. + */ + +'use strict'; + +const { createClient, SchemaFieldTypes, VectorAlgorithms } = require('redis'); +const OpenAI = require('openai'); +const readline = require('readline'); +const crypto = require('crypto'); + +const CHUNK_SIZE = 500; +const CHUNK_OVERLAP = 50; +const MAX_SEARCH_RESULTS = 5; +const MAX_HISTORY_TURNS = 6; +const CACHE_TTL = 3600; +// Cosine similarity threshold for cache hits. vector_distance for cosine is in [0, 2]: +// 0 = identical, 1 = orthogonal, 2 = opposite. A hit fires when distance < (1 - threshold), +// i.e. when cosine similarity > threshold. Verify this against your node-redis version. +const CACHE_THRESHOLD = 0.92; +const VECTOR_DIM = parseInt(process.env.VECTOR_DIM) || 1536; +const EMBEDDING_MODEL = process.env.EMBEDDING_MODEL || 'text-embedding-3-small'; + +const DOC_INDEX = 'knowledge_docs'; +const CACHE_INDEX = 'knowledge_cache'; + +// RediSearch special characters that must be backslash-escaped in query strings. +// Escaping preserves token meaning — "C\+\+" still matches "C++" in documents — +// whereas stripping would silently discard the + characters and change query intent. +// Note: this is a best-effort heuristic. It handles the common cases well but is not +// a full RediSearch query parser; phrase queries and advanced syntax may still produce +// unexpected results and will need manual adjustment. +const FT_SPECIAL_CHARS = /[,.<>{}\[\]"'`:;!@#$%^&*()\-+=~|\/\\?]/g; + +// Sample Redis documentation for demonstration. +// Replace with your own documents or use loadDirectory() to load a folder of .txt / .md files. +const SAMPLE_DOCS = [ + { + title: 'Redis Data Types', + source: 'https://redis.io/docs/latest/develop/data-types/', + content: 'Redis supports several core data types suited to different use cases. Strings store ' + + 'sequences of bytes up to 512 MB and support atomic increment and decrement operations. ' + + 'Lists are linked lists of strings with O(1) push and pop from both ends, useful for ' + + 'queues and stacks. Sets are unordered collections of unique strings with O(1) add, ' + + 'remove, and membership tests, plus union, intersection, and difference operations. ' + + 'Sorted sets add a floating-point score to each member, enabling range queries by score ' + + 'or rank in O(log N) time. Hashes store field-value pairs in a single key, ideal for ' + + 'representing objects without serialization. Redis also supports Streams for append-only ' + + 'logs with consumer groups, HyperLogLog for approximate cardinality estimation, Bitmaps ' + + 'for efficient bit-level operations, and Geospatial indexes for location-based queries.' + }, + { + title: 'Redis Vector Search', + source: 'https://redis.io/docs/latest/develop/ai/search-and-query/vectors/', + content: 'Redis Vector Search lets you index and search vector embeddings stored in HASH or JSON ' + + 'documents. Two index algorithms are available: FLAT (brute-force, exact results, best for ' + + 'smaller datasets) and HNSW (Hierarchical Navigable Small World, approximate results, ' + + 'much faster at scale using a multi-layer graph structure). Supported distance metrics are ' + + 'cosine similarity, L2 Euclidean distance, and inner product. Hybrid queries combine a ' + + 'vector KNN clause with a RediSearch filter expression in a single FT.SEARCH call, ' + + 'pre-filtering documents by metadata before ranking by vector distance. This avoids ' + + 'post-filtering and keeps result quality high. Vector fields are declared with DIM ' + + '(dimension count), TYPE (FLOAT32 or FLOAT64), and DISTANCE_METRIC parameters.' + }, + { + title: 'Redis Cloud', + source: 'https://redis.io/docs/latest/operate/rc/', + content: 'Redis Cloud is the fully managed cloud service for Redis, available on AWS, Google Cloud, ' + + 'and Microsoft Azure. It provides automatic clustering, replication, and failover for high ' + + 'availability and data durability without operational overhead. Deployment options include ' + + 'Redis Stack for development, Redis Enterprise for mission-critical workloads, and active-' + + 'active geo-distribution for multi-region deployments with conflict-free replication. ' + + 'Built-in monitoring, automated backups, and vertical and horizontal scaling are included. ' + + 'A free tier is available for development and testing. Supported modules include RediSearch ' + + 'for full-text and vector search, RedisJSON for native JSON documents, RedisTimeSeries for ' + + 'time-series data, and RedisBloom for probabilistic structures such as Bloom filters and ' + + 'Count-Min sketches.' + }, + { + title: 'Redis Context Engine', + source: 'https://redis.io/docs/latest/develop/ai/context-engine/', + content: 'The Redis Context Engine is a suite of managed services on Redis Cloud that gives AI ' + + 'agents the context they need. LangCache provides semantic response caching: incoming ' + + 'queries are embedded and compared against cached query-response pairs, returning a cached ' + + 'answer when cosine similarity exceeds a configurable threshold to reduce LLM API costs. ' + + 'Agent Memory offers two-tier persistent memory with a session layer for recent turns and ' + + 'a long-term layer backed by vector search, available as a REST API and Python SDK. ' + + 'Context Retriever exposes structured business data as governed tools that agents can ' + + 'query reliably without writing custom retrieval logic. Data Integration keeps a Redis ' + + 'Cloud database in sync with relational databases in near real time using Change Data ' + + 'Capture, so agents always query fresh data.' + }, +]; + + +class KnowledgeAssistant { + constructor(sessionId) { + this.sessionId = sessionId || crypto.randomUUID(); + this.sessionKey = `session:${this.sessionId}:history`; + this.client = null; + this.llm = null; + this.llmModel = null; + } + + async init() { + if (!process.env.LLM_API_KEY) { + throw new Error('LLM_API_KEY environment variable is required'); + } + + const redisUrl = process.env.REDIS_URL || + `redis://${process.env.REDIS_HOST || 'localhost'}:${process.env.REDIS_PORT || 6379}`; + + this.client = createClient({ + url: redisUrl, + password: process.env.REDIS_PASSWORD || undefined, + socket: { + reconnectStrategy: (retries) => + retries < 3 ? Math.min(retries * 100, 1000) : new Error('Max retries exceeded') + } + }); + this.client.on('error', (err) => console.error('Redis error:', err.message)); + await this.client.connect(); + console.log(`Connected to Redis. Session: ${this.sessionId}`); + + this.llm = new OpenAI({ + apiKey: process.env.LLM_API_KEY, + baseURL: process.env.LLM_API_BASE_URL || '${CONFIG.models[formData.llmModel].baseUrl}' + }); + this.llmModel = process.env.LLM_MODEL || '${CONFIG.models[formData.llmModel].defaultModel}'; + console.log(`LLM configured: ${this.llmModel}`); + + await this._createIndexes(); + } + + async _createIndexes() { + const indexes = [ + { + name: DOC_INDEX, + prefix: 'doc:', + schema: { + doc_id: { type: SchemaFieldTypes.TAG }, + chunk_id: { type: SchemaFieldTypes.TAG }, + title: { type: SchemaFieldTypes.TEXT }, + source: { type: SchemaFieldTypes.TAG }, + content: { type: SchemaFieldTypes.TEXT }, + embedding: { + type: SchemaFieldTypes.VECTOR, + ALGORITHM: VectorAlgorithms.FLAT, + TYPE: 'FLOAT32', + DIM: VECTOR_DIM, + DISTANCE_METRIC: 'COSINE' + } + } + }, + { + name: CACHE_INDEX, + prefix: 'ragcache:', + schema: { + response: { type: SchemaFieldTypes.TEXT }, + citations: { type: SchemaFieldTypes.TEXT }, + query_embedding: { + type: SchemaFieldTypes.VECTOR, + ALGORITHM: VectorAlgorithms.FLAT, + TYPE: 'FLOAT32', + DIM: VECTOR_DIM, + DISTANCE_METRIC: 'COSINE' + } + } + } + ]; + + for (const { name, prefix, schema } of indexes) { + try { + await this.client.ft.create(name, schema, { ON: 'HASH', PREFIX: [prefix] }); + } catch (err) { + if (!err.message.includes('Index already exists')) throw err; + } + } + } + + // ── Document ingestion ──────────────────────────────────────────────────── + + async loadDirectory(dirPath, extensions = ['.txt', '.md']) { + const fs = require('fs').promises; + const path = require('path'); + + const walk = async (dir) => { + const entries = await fs.readdir(dir, { withFileTypes: true }); + const files = []; + for (const entry of entries) { + const full = path.join(dir, entry.name); + if (entry.isDirectory()) files.push(...await walk(full)); + else if (extensions.includes(path.extname(entry.name).toLowerCase())) files.push(full); + } + return files; + }; + + const files = (await walk(dirPath)).sort(); + let loaded = 0; + for (const file of files) { + try { + const content = (await fs.readFile(file, 'utf-8')).trim(); + if (content) { + await this.ingestDocument(content, path.basename(file, path.extname(file)), file); + loaded++; + } + } catch (err) { + console.warn(`Skipping ${file}: ${err.message}`); + } + } + console.log(`Loaded ${loaded} document(s) from ${dirPath}`); + return loaded; + } + + _chunkText(text) { + // Character-based chunking is simple but not token-aware. For production, + // consider a token-counting library (e.g. js-tiktoken). + const chunks = []; + let start = 0; + while (start < text.length) { + chunks.push(text.slice(start, start + CHUNK_SIZE)); + start += CHUNK_SIZE - CHUNK_OVERLAP; + } + return chunks; + } + + async _embed(text) { + const resp = await this.llm.embeddings.create({ + model: EMBEDDING_MODEL, + input: text.slice(0, 8000) + }); + return resp.data[0].embedding; + } + + _toBuffer(embedding) { + const buf = Buffer.allocUnsafe(embedding.length * 4); + embedding.forEach((v, i) => buf.writeFloatLE(v, i * 4)); + return buf; + } + + async ingestDocument(content, title, source = '') { + const docId = crypto.randomUUID(); + const chunks = this._chunkText(content); + for (let i = 0; i < chunks.length; i++) { + const chunkId = `${docId}:${i}`; + const embedding = await this._embed(chunks[i]); + await this.client.hSet(`doc:${chunkId}`, { + doc_id: docId, + chunk_id: chunkId, + title, + source, + content: chunks[i], + embedding: this._toBuffer(embedding) + }); + } + console.log(`Ingested '${title}': ${chunks.length} chunk(s) (doc_id: ${docId})`); + return docId; + } + + // ── Hybrid search ───────────────────────────────────────────────────────── + // First pass: FT.SEARCH with a text pre-filter and an inline KNN clause — + // "(text_terms)=>[KNN k @embedding $BLOB AS distance]" — so Redis applies + // both filters in a single round trip. This is more Redis-native than running + // two separate queries and fusing the results in JavaScript. + // Second pass (fallback): if the text filter is too selective and returns nothing, + // a pure vector search is issued so queries always return results when documents exist. + + _sanitizeFtQuery(text) { + // Escape RediSearch special characters rather than strip them, so tokens like + // "C++", "redis.io", and non-English text survive into the query intact. + // We OR-join per-word terms for recall; the KNN step handles ranking. + // This is a best-effort heuristic — see FT_SPECIAL_CHARS comment above. + const terms = text.split(/\s+/).filter(Boolean); + if (terms.length === 0) return '*'; + const escaped = terms.map(t => t.replace(FT_SPECIAL_CHARS, '\\$&')); + return escaped.slice(0, 10).join(' | '); // cap at 10 terms + } + + async _runKnnQuery(queryStr, queryEmbedding, topK) { + const results = await this.client.ft.search(DOC_INDEX, queryStr, { + PARAMS: { K: topK, BLOB: this._toBuffer(queryEmbedding) }, + SORTBY: { BY: 'distance', DIRECTION: 'ASC' }, + DIALECT: 2, + RETURN: ['chunk_id', 'doc_id', 'title', 'source', 'content', 'distance'] + }); + return (results?.documents ?? []).map(d => ({ id: d.id, ...d.value })); + } + + async _hybridSearch(queryText, queryEmbedding, topK = MAX_SEARCH_RESULTS) { + const safeText = this._sanitizeFtQuery(queryText); + if (safeText !== '*') { + try { + const results = await this._runKnnQuery( + `(${safeText})=>[KNN $K @embedding $BLOB AS distance]`, + queryEmbedding, topK + ); + if (results.length > 0) return results; + } catch (err) { + console.error('Hybrid search error:', err.message); + } + } + // Fall back to pure vector search if the text filter returned nothing + return this._runKnnQuery( + `*=>[KNN $K @embedding $BLOB AS distance]`, + queryEmbedding, topK + ); + } + + // ── Semantic cache ──────────────────────────────────────────────────────── + + async _checkCache(queryEmbedding) { + const results = await this.client.ft.search( + CACHE_INDEX, + `*=>[KNN 1 @query_embedding $BLOB AS distance]`, + { + PARAMS: { BLOB: this._toBuffer(queryEmbedding) }, + SORTBY: { BY: 'distance', DIRECTION: 'ASC' }, + DIALECT: 2, + RETURN: ['response', 'citations', 'distance'] + } + ); + const top = results?.documents?.[0]; + if (!top) return null; + // vector_distance for cosine: 0=identical, 1=orthogonal. Hit when similarity > CACHE_THRESHOLD. + const dist = parseFloat(top.value.distance ?? '1'); + if (dist < (1 - CACHE_THRESHOLD)) { + return { + response: top.value.response, + citations: JSON.parse(top.value.citations ?? '[]') + }; + } + return null; + } + + async _storeCache(queryEmbedding, response, citations) { + const key = `ragcache:${crypto.randomUUID()}`; + await this.client.hSet(key, { + response, + citations: JSON.stringify(citations), + query_embedding: this._toBuffer(queryEmbedding) + }); + await this.client.expire(key, CACHE_TTL); + } + + // ── Session memory ──────────────────────────────────────────────────────── + + async _getHistory() { + const raw = await this.client.lRange(this.sessionKey, 0, MAX_HISTORY_TURNS * 2 - 1); + return raw.reverse().map(s => { + try { return JSON.parse(s); } catch { return null; } + }).filter(Boolean); + } + + async _saveHistory(role, content) { + await this.client.lPush(this.sessionKey, JSON.stringify({ role, content })); + await this.client.lTrim(this.sessionKey, 0, MAX_HISTORY_TURNS * 2 - 1); + } + + // ── Query ───────────────────────────────────────────────────────────────── + + async query(userQuery) { + const queryEmbedding = await this._embed(userQuery); + + const cached = await this._checkCache(queryEmbedding); + if (cached) { + console.log('[cache hit]'); + await this._saveHistory('user', userQuery); + await this._saveHistory('assistant', cached.response); + return cached; + } + + const topChunks = await this._hybridSearch(userQuery, queryEmbedding); + if (topChunks.length === 0) { + return { response: 'No documents found. Please ingest documents before querying.', citations: [] }; + } + + const contextParts = []; + const citations = []; + topChunks.forEach((chunk, i) => { + contextParts.push(`[${i + 1}] ${chunk.title ?? 'Unknown'}\n${chunk.content ?? ''}`); + citations.push({ + index: i + 1, + title: chunk.title ?? 'Unknown', + source: chunk.source ?? '', + chunk_id: chunk.chunk_id ?? '', + doc_id: chunk.doc_id ?? '' + }); + }); + + const history = await this._getHistory(); + const messages = [ + { + role: 'system', + content: 'You are a helpful knowledge assistant. Answer using only the provided context. ' + + 'Reference sources as [1], [2], etc. If the context lacks the answer, say so clearly.' + }, + ...history, + { role: 'user', content: `Context:\n${contextParts.join('\n\n')}\n\nQuestion: ${userQuery}` } + ]; + + const completion = await this.llm.chat.completions.create({ + model: this.llmModel, + messages + }); + const answer = completion.choices[0].message.content; + + await this._storeCache(queryEmbedding, answer, citations); + await this._saveHistory('user', userQuery); + await this._saveHistory('assistant', answer); + return { response: answer, citations }; + } +} + +async function main() { + const agent = new KnowledgeAssistant(); + await agent.init(); + + // Only ingest sample documents when the index is empty so re-running the agent + // does not re-embed the same content on every startup. + // To load your own documents instead: await agent.loadDirectory('path/to/docs'); + const indexInfo = await agent.client.ft.info(DOC_INDEX); + if (parseInt(indexInfo.numDocs ?? '0') === 0) { + console.log('Empty index — ingesting sample documents...'); + for (const doc of SAMPLE_DOCS) { + await agent.ingestDocument(doc.content, doc.title, doc.source); + } + } else { + console.log(`Index already contains ${indexInfo.numDocs} document(s). Skipping ingestion.`); + } + + const rl = readline.createInterface({ input: process.stdin, output: process.stdout }); + console.log('\nKnowledge Assistant ready. Type your questions or "quit" to exit.\n'); + + const ask = () => { + rl.question('Question: ', async (input) => { + const trimmed = input.trim(); + if (['quit', 'exit', 'bye'].includes(trimmed.toLowerCase())) { + console.log('Goodbye!'); + rl.close(); + await agent.client.quit(); + return; + } + if (!trimmed) { + ask(); + return; + } + try { + const { response, citations } = await agent.query(trimmed); + console.log(`\n${response}`); + if (citations.length > 0) { + console.log('\nSources:'); + citations.forEach(c => { + const src = c.source ? ` — ${c.source}` : ''; + console.log(` [${c.index}] ${c.title}${src}`); + console.log(` chunk_id: ${c.chunk_id}`); + }); + } + console.log(); + } catch (err) { + console.error('Error:', err.message); + } + ask(); + }); + }; + ask(); +} + +main().catch(err => { + console.error('Failed to initialize:', err.message); + process.exit(1); +}); diff --git a/static/code/agent-templates/python/rag_agent.py b/static/code/agent-templates/python/rag_agent.py new file mode 100644 index 0000000000..2c522a8330 --- /dev/null +++ b/static/code/agent-templates/python/rag_agent.py @@ -0,0 +1,442 @@ +''' +Redis Knowledge Assistant (RAG Agent) + +Features: +- Ingest documents with automatic chunking and embedding +- Redis-native hybrid retrieval: text pre-filter + KNN vector search, with a fallback vector pass +- Semantic cache: skip LLM for similar queries using cached responses (TTL-based expiry) +- Per-session conversation memory in a Redis List +- Citations: each answer references source documents with title, source URL, and chunk ID + +To run this code: + Install dependencies: + pip install redisvl[all] redis openai + + Set environment variables: + export LLM_API_KEY=your_api_key_here + export LLM_API_BASE_URL=your_${formData.llmModel.toLowerCase()}_api_base_url + (optional - default: ${CONFIG.models[formData.llmModel].baseUrl}) + export LLM_MODEL=your_${formData.llmModel.toLowerCase()}_model + (optional - default: ${CONFIG.models[formData.llmModel].defaultModel}) + export REDIS_HOST=your_redis_host + export REDIS_PORT=your_redis_port + export REDIS_PASSWORD=your_redis_password + + Note: this template uses the OpenAI SDK with a configurable base URL. It works with + OpenAI directly and with any provider that exposes an OpenAI-compatible API endpoint. + + Requires Redis Stack or Redis 8+ with Search module enabled. +''' + +import json +import os +import re +import struct +import uuid + +import openai +import redis +from redis.commands.search.query import Query as FTQuery +from redisvl.index import SearchIndex +from redisvl.schema import IndexSchema + +CHUNK_SIZE = 500 +CHUNK_OVERLAP = 50 +MAX_SEARCH_RESULTS = 5 +MAX_HISTORY_TURNS = 6 +CACHE_TTL = 3600 +# Cosine similarity threshold for cache hits. vector_distance for cosine is in [0, 2]: +# 0 = identical, 1 = orthogonal, 2 = opposite. A hit fires when distance < (1 - threshold), +# i.e. when cosine similarity > threshold. Treat this as a value to test, not assume. +CACHE_THRESHOLD = 0.92 +VECTOR_DIM = int(os.getenv('VECTOR_DIM', '1536')) +EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small') + +# RediSearch special characters that must be backslash-escaped in query strings. +# Escaping preserves token meaning — "C\+\+" still matches "C++" in documents — +# whereas stripping would silently discard the + characters and change query intent. +# Note: this is a best-effort heuristic. It handles the common cases well but is +# not a full RediSearch query parser; phrase queries and advanced syntax may still +# need manual adjustment. +_FT_SPECIAL = re.compile(r'([,.<>{}\[\]"\':;!@#$%^&*()\-+=~|/\\?])') + +_DOC_SCHEMA = { + 'index': {'name': 'knowledge_docs', 'prefix': 'doc', 'storage_type': 'hash'}, + 'fields': [ + {'name': 'doc_id', 'type': 'tag'}, + {'name': 'chunk_id', 'type': 'tag'}, + {'name': 'title', 'type': 'text'}, + {'name': 'source', 'type': 'tag'}, + {'name': 'content', 'type': 'text'}, + {'name': 'embedding', 'type': 'vector', + 'attrs': {'dims': VECTOR_DIM, 'algorithm': 'flat', + 'distance_metric': 'cosine', 'datatype': 'float32'}} + ] +} + +_CACHE_SCHEMA = { + 'index': {'name': 'knowledge_cache', 'prefix': 'ragcache', 'storage_type': 'hash'}, + 'fields': [ + {'name': 'response', 'type': 'text'}, + {'name': 'citations', 'type': 'text'}, + {'name': 'query_embedding', 'type': 'vector', + 'attrs': {'dims': VECTOR_DIM, 'algorithm': 'flat', + 'distance_metric': 'cosine', 'datatype': 'float32'}} + ] +} + + +# Sample Redis documentation for demonstration. +# Replace with your own documents or use load_directory() to load a folder of .txt / .md files. +SAMPLE_DOCS = [ + { + 'title': 'Redis Data Types', + 'source': 'https://redis.io/docs/latest/develop/data-types/', + 'content': ( + 'Redis supports several core data types suited to different use cases. Strings store ' + 'sequences of bytes up to 512 MB and support atomic increment and decrement operations. ' + 'Lists are linked lists of strings with O(1) push and pop from both ends, useful for ' + 'queues and stacks. Sets are unordered collections of unique strings with O(1) add, ' + 'remove, and membership tests, plus union, intersection, and difference operations. ' + 'Sorted sets add a floating-point score to each member, enabling range queries by score ' + 'or rank in O(log N) time. Hashes store field-value pairs in a single key, ideal for ' + 'representing objects without serialization. Redis also supports Streams for append-only ' + 'logs with consumer groups, HyperLogLog for approximate cardinality estimation, Bitmaps ' + 'for efficient bit-level operations, and Geospatial indexes for location-based queries.' + ) + }, + { + 'title': 'Redis Vector Search', + 'source': 'https://redis.io/docs/latest/develop/ai/search-and-query/vectors/', + 'content': ( + 'Redis Vector Search lets you index and search vector embeddings stored in HASH or JSON ' + 'documents. Two index algorithms are available: FLAT (brute-force, exact results, best for ' + 'smaller datasets) and HNSW (Hierarchical Navigable Small World, approximate results, ' + 'much faster at scale using a multi-layer graph structure). Supported distance metrics are ' + 'cosine similarity, L2 Euclidean distance, and inner product. Hybrid queries combine a ' + 'vector KNN clause with a RediSearch filter expression in a single FT.SEARCH call, ' + 'pre-filtering documents by metadata before ranking by vector distance. This avoids ' + 'post-filtering and keeps result quality high. Vector fields are declared with DIM ' + '(dimension count), TYPE (FLOAT32 or FLOAT64), and DISTANCE_METRIC parameters.' + ) + }, + { + 'title': 'Redis Cloud', + 'source': 'https://redis.io/docs/latest/operate/rc/', + 'content': ( + 'Redis Cloud is the fully managed cloud service for Redis, available on AWS, Google Cloud, ' + 'and Microsoft Azure. It provides automatic clustering, replication, and failover for high ' + 'availability and data durability without operational overhead. Deployment options include ' + 'Redis Stack for development, Redis Enterprise for mission-critical workloads, and active-' + 'active geo-distribution for multi-region deployments with conflict-free replication. ' + 'Built-in monitoring, automated backups, and vertical and horizontal scaling are included. ' + 'A free tier is available for development and testing. Supported modules include RediSearch ' + 'for full-text and vector search, RedisJSON for native JSON documents, RedisTimeSeries for ' + 'time-series data, and RedisBloom for probabilistic structures such as Bloom filters and ' + 'Count-Min sketches.' + ) + }, + { + 'title': 'Redis Context Engine', + 'source': 'https://redis.io/docs/latest/develop/ai/context-engine/', + 'content': ( + 'The Redis Context Engine is a suite of managed services on Redis Cloud that gives AI ' + 'agents the context they need. LangCache provides semantic response caching: incoming ' + 'queries are embedded and compared against cached query-response pairs, returning a cached ' + 'answer when cosine similarity exceeds a configurable threshold to reduce LLM API costs. ' + 'Agent Memory offers two-tier persistent memory with a session layer for recent turns and ' + 'a long-term layer backed by vector search, available as a REST API and Python SDK. ' + 'Context Retriever exposes structured business data as governed tools that agents can ' + 'query reliably without writing custom retrieval logic. Data Integration keeps a Redis ' + 'Cloud database in sync with relational databases in near real time using Change Data ' + 'Capture, so agents always query fresh data.' + ) + }, +] + + +class KnowledgeAssistant: + def __init__(self, session_id=None): + self.session_id = session_id or str(uuid.uuid4()) + self.session_key = f'session:{self.session_id}:history' + + self.llm_api_key = os.getenv('LLM_API_KEY') + if not self.llm_api_key: + raise ValueError('LLM_API_KEY environment variable is required') + self.llm_base_url = os.getenv('LLM_API_BASE_URL', '${CONFIG.models[formData.llmModel].baseUrl}') + self.llm_model = os.getenv('LLM_MODEL', '${CONFIG.models[formData.llmModel].defaultModel}') + + try: + # Single client with decode_responses=False handles both text and binary (embedding) fields. + self.client = redis.Redis( + host=os.getenv('REDIS_HOST', 'localhost'), + port=int(os.getenv('REDIS_PORT', 6379)), + username=os.getenv('REDIS_USERNAME', 'default'), + password=os.getenv('REDIS_PASSWORD', ''), + decode_responses=False, + socket_connect_timeout=5 + ) + self.client.ping() + print(f'Connected to Redis. Session: {self.session_id}') + except redis.ConnectionError as e: + print(f'Failed to connect to Redis: {e}') + raise + + self.llm = openai.OpenAI(api_key=self.llm_api_key, base_url=self.llm_base_url) + print(f'LLM configured: {self.llm_model}') + + # redisvl is used only for index creation; all queries use redis-py directly. + doc_index = SearchIndex(IndexSchema.from_dict(_DOC_SCHEMA), redis_client=self.client) + cache_index = SearchIndex(IndexSchema.from_dict(_CACHE_SCHEMA), redis_client=self.client) + doc_index.create(overwrite=False) + cache_index.create(overwrite=False) + + # ── Document ingestion ──────────────────────────────────────────────────── + + def load_directory(self, path, extensions=('.txt', '.md')): + """Ingest all matching files from a directory tree. Each file becomes one document.""" + import pathlib + loaded = 0 + for filepath in sorted(pathlib.Path(path).rglob('*')): + if filepath.suffix.lower() in extensions and filepath.is_file(): + try: + content = filepath.read_text(encoding='utf-8', errors='ignore').strip() + if content: + self.ingest_document(content, title=filepath.stem, source=str(filepath)) + loaded += 1 + except Exception as e: + print(f'Skipping {filepath}: {e}') + print(f'Loaded {loaded} document(s) from {path}') + return loaded + + def _chunk_text(self, text): + # Character-based chunking is simple but not token-aware. For production, + # consider tiktoken or RecursiveCharacterTextSplitter from langchain. + chunks, start = [], 0 + while start < len(text): + chunks.append(text[start:start + CHUNK_SIZE]) + start += CHUNK_SIZE - CHUNK_OVERLAP + return chunks + + def _embed(self, text): + resp = self.llm.embeddings.create(model=EMBEDDING_MODEL, input=text[:8000]) + return resp.data[0].embedding + + def _to_bytes(self, embedding): + return struct.pack(f'{len(embedding)}f', *embedding) + + def _decode_doc(self, doc): + def d(val): + return val.decode('utf-8', errors='replace') if isinstance(val, bytes) else (val or '') + return { + 'id': d(doc.id), + 'chunk_id': d(getattr(doc, 'chunk_id', '')), + 'doc_id': d(getattr(doc, 'doc_id', '')), + 'title': d(getattr(doc, 'title', '')), + 'source': d(getattr(doc, 'source', '')), + 'content': d(getattr(doc, 'content', '')), + 'distance': d(getattr(doc, 'distance', '1.0')) + } + + def ingest_document(self, content, title, source=''): + doc_id = str(uuid.uuid4()) + chunks = self._chunk_text(content) + for i, chunk in enumerate(chunks): + chunk_id = f'{doc_id}:{i}' + embedding = self._embed(chunk) + self.client.hset(f'doc:{chunk_id}', mapping={ + b'doc_id': doc_id.encode(), + b'chunk_id': chunk_id.encode(), + b'title': title.encode(), + b'source': source.encode(), + b'content': chunk.encode(), + b'embedding': self._to_bytes(embedding) + }) + print(f"Ingested '{title}': {len(chunks)} chunk(s) (doc_id: {doc_id})") + return doc_id + + # ── Hybrid search ───────────────────────────────────────────────────────── + # First pass: FT.SEARCH with a text pre-filter and an inline KNN clause — + # "(text_terms) => [KNN k @embedding $BLOB AS distance]" — so Redis applies + # both filters in a single round trip. This is more Redis-native than running + # two separate queries and fusing the results in Python. + # Second pass (fallback): if the text filter is too selective and returns nothing, + # a pure vector search is issued so queries always return results when documents exist. + + def _sanitize_ft_query(self, text): + # Escape RediSearch special characters rather than strip them, so tokens like + # "C++", "redis.io", and non-English text survive into the query intact. + # We OR-join per-word terms for recall; the KNN step handles ranking. + # This is a best-effort heuristic — see _FT_SPECIAL comment above. + terms = text.split() + if not terms: + return '*' + escaped = [_FT_SPECIAL.sub(r'\\\1', t) for t in terms] + return ' | '.join(escaped[:10]) # cap at 10 terms + + def _run_knn_query(self, query_str, query_embedding, top_k): + return self.client.ft('knowledge_docs').search( + FTQuery(query_str) + .sort_by('distance', asc=True) + .paging(0, top_k) + .return_fields('chunk_id', 'doc_id', 'title', 'source', 'content', 'distance') + .dialect(2), + query_params={'K': top_k, 'BLOB': self._to_bytes(query_embedding)} + ) + + def _hybrid_search(self, query_text, query_embedding, top_k=MAX_SEARCH_RESULTS): + safe_text = self._sanitize_ft_query(query_text) + if safe_text != '*': + try: + result = self._run_knn_query( + f'({safe_text})=>[KNN $K @embedding $BLOB AS distance]', + query_embedding, top_k + ) + if result.docs: + return [self._decode_doc(d) for d in result.docs] + except Exception as e: + print(f'Hybrid search error: {e}') + # Fall back to pure vector search if the text filter returned nothing + result = self._run_knn_query( + '*=>[KNN $K @embedding $BLOB AS distance]', + query_embedding, top_k + ) + return [self._decode_doc(d) for d in result.docs] + + # ── Semantic cache ──────────────────────────────────────────────────────── + + def _check_cache(self, query_embedding): + try: + result = self.client.ft('knowledge_cache').search( + FTQuery('*=>[KNN 1 @query_embedding $BLOB AS distance]') + .sort_by('distance', asc=True) + .paging(0, 1) + .return_fields('response', 'citations', 'distance') + .dialect(2), + query_params={'BLOB': self._to_bytes(query_embedding)} + ) + if result.docs: + doc = self._decode_doc(result.docs[0]) + # vector_distance for cosine: 0=identical, 1=orthogonal. Hit when similarity > CACHE_THRESHOLD. + if float(doc.get('distance', '1.0')) < (1.0 - CACHE_THRESHOLD): + return doc.get('response', ''), json.loads(doc.get('citations', '[]')) + except Exception: + pass + return None, None + + def _store_cache(self, query_embedding, response, citations): + key = f'ragcache:{uuid.uuid4()}' + self.client.hset(key, mapping={ + b'response': response.encode(), + b'citations': json.dumps(citations).encode(), + b'query_embedding': self._to_bytes(query_embedding) + }) + self.client.expire(key, CACHE_TTL) + + # ── Session memory ──────────────────────────────────────────────────────── + + def _get_history(self): + raw = self.client.lrange(self.session_key, 0, MAX_HISTORY_TURNS * 2 - 1) + messages = [] + for item in reversed(raw): + try: + messages.append(json.loads(item.decode() if isinstance(item, bytes) else item)) + except Exception: + pass + return messages + + def _save_history(self, role, content): + self.client.lpush(self.session_key, json.dumps({'role': role, 'content': content}).encode()) + self.client.ltrim(self.session_key, 0, MAX_HISTORY_TURNS * 2 - 1) + + # ── Query ───────────────────────────────────────────────────────────────── + + def query(self, user_query): + query_embedding = self._embed(user_query) + + cached_response, cached_citations = self._check_cache(query_embedding) + if cached_response: + print('[cache hit]') + self._save_history('user', user_query) + self._save_history('assistant', cached_response) + return cached_response, cached_citations + + top_chunks = self._hybrid_search(user_query, query_embedding) + if not top_chunks: + return 'No documents found. Please ingest documents before querying.', [] + + context_parts, citations = [], [] + for i, chunk in enumerate(top_chunks): + title = chunk.get('title', 'Unknown') + source = chunk.get('source', '') + context_parts.append(f'[{i + 1}] {title}\n{chunk.get("content", "")}') + citations.append({ + 'index': i + 1, + 'title': title, + 'source': source, + 'chunk_id': chunk.get('chunk_id', ''), + 'doc_id': chunk.get('doc_id', '') + }) + + messages = [ + {'role': 'system', 'content': ( + 'You are a helpful knowledge assistant. Answer using only the provided context. ' + 'Reference sources as [1], [2], etc. If the context lacks the answer, say so clearly.' + )}, + *self._get_history(), + {'role': 'user', 'content': f'Context:\n{chr(10).join(context_parts)}\n\nQuestion: {user_query}'} + ] + + response = self.llm.chat.completions.create(model=self.llm_model, messages=messages) + answer = response.choices[0].message.content + + self._store_cache(query_embedding, answer, citations) + self._save_history('user', user_query) + self._save_history('assistant', answer) + return answer, citations + + +if __name__ == '__main__': + try: + agent = KnowledgeAssistant() + + # Only ingest sample documents when the index is empty so re-running the agent + # does not re-embed the same content on every startup. + # To load your own documents instead: agent.load_directory('path/to/docs') + index_info = agent.client.ft('knowledge_docs').info() + if int(index_info.get('num_docs', 0)) == 0: + print('Empty index — ingesting sample documents...') + for doc in SAMPLE_DOCS: + agent.ingest_document(doc['content'], doc['title'], doc['source']) + else: + print(f"Index already contains {index_info.get('num_docs')} document(s). Skipping ingestion.") + + print('\nKnowledge Assistant ready. Type your questions or "quit" to exit.\n') + while True: + try: + user_input = input('Question: ').strip() + if user_input.lower() in ['quit', 'exit', 'bye']: + print('Goodbye!') + break + if not user_input: + continue + answer, citations = agent.query(user_input) + print(f'\n{answer}') + if citations: + print('\nSources:') + for c in citations: + src = f' — {c["source"]}' if c['source'] else '' + print(f' [{c["index"]}] {c["title"]}{src}') + print(f' chunk_id: {c["chunk_id"]}') + print() + except KeyboardInterrupt: + print('\nGoodbye!') + break + except Exception as e: + print(f'Error: {e}') + except ValueError as e: + print(f'Configuration error: {e}') + exit(1) + except Exception as e: + print(f'Failed to initialize: {e}') + exit(1) diff --git a/static/js/agent-builder.js b/static/js/agent-builder.js index 91f37ebee3..ff21b0ff36 100644 --- a/static/js/agent-builder.js +++ b/static/js/agent-builder.js @@ -20,6 +20,12 @@ description: "A chatbot that maintains conversation history using semantic message history and provides contextual responses.", features: ["Conversation memory", "Context awareness", "Multi-turn dialogue"], keywords: ["chat", "conversation", "assistant", "bot", "chatbot", "talk", "dialogue"] + }, + rag: { + name: "Knowledge Assistant", + description: "A RAG agent that ingests documents, uses Redis-native hybrid retrieval (text pre-filter + vector search), semantic caching, and session memory to answer questions with citations.", + features: ["Document ingestion with chunking", "Hybrid vector + full-text search", "Semantic caching", "Citations"], + keywords: ["rag", "knowledge", "documents", "search", "retrieval", "qa", "question answering", "citations", "hybrid"] } }, languages: { @@ -286,16 +292,18 @@ let suggestions = []; switch (conversationState.step) { - case 'agent-type': + case 'agent-type': { + const agentIcons = { recommendation: '🛍️', conversational: '💬', rag: '🔍' }; suggestions = Object.entries(CONFIG.agentTypes).map(([key, config]) => ({ value: key, label: config.name, - icon: key === 'recommendation' ? '🛍️' : '💬' + icon: agentIcons[key] || '🤖' })).filter(s => s.label.toLowerCase().includes(lowerInput) || CONFIG.agentTypes[s.value].keywords.some(k => k.includes(lowerInput)) ); break; + } case 'language': suggestions = Object.entries(CONFIG.languages).map(([key, config]) => ({ @@ -381,7 +389,8 @@ // Generate a default agent name based on the type const defaultNames = { recommendation: 'RecommendationEngine', - conversational: 'ConversationalAgent' + conversational: 'ConversationalAgent', + rag: 'KnowledgeAssistant' }; conversationState.selections.agentName = defaultNames[selectedType] || 'RedisAgent'; @@ -398,7 +407,8 @@ } else { addMessage("I didn't understand that. Please choose one of the agent types:", 'bot', [ { value: 'recommendation', label: '🛍️ Recommendation Engine' }, - { value: 'conversational', label: '💬 Conversational Assistant' } + { value: 'conversational', label: '💬 Conversational Assistant' }, + { value: 'rag', label: '🔍 Knowledge Assistant' } ]); } } @@ -426,8 +436,8 @@ } if (selectedLang) { - // Check if it's Python (fully supported) - if (selectedLang === 'python') { + // Check if it's a supported language + if (selectedLang === 'python' || selectedLang === 'javascript') { conversationState.selections.programmingLanguage = selectedLang; const config = CONFIG.languages[selectedLang]; @@ -445,9 +455,10 @@ const config = CONFIG.languages[selectedLang]; const languageName = config.name; - addMessage(`${languageName} support is coming soon. Currently, only Python is fully supported.`, 'bot'); - addMessage(`Would you like to build a Python agent instead?`, 'bot', [ - { value: 'python', label: 'Yes, use Python' }, + addMessage(`${languageName} support is coming soon. Currently, Python and JavaScript are supported.`, 'bot'); + addMessage(`Would you like to build a Python or JavaScript agent instead?`, 'bot', [ + { value: 'python', label: '🐍 Yes, use Python' }, + { value: 'javascript', label: '🟨 Yes, use JavaScript' }, { value: 'wait', label: 'I\'ll wait for ' + languageName } ]); } @@ -520,8 +531,8 @@ java: '.java', csharp: '.cs' }; - const base = window.HUGO_BASEURL || ''; - const filename = `${base}code/agent-templates/${formData.programmingLanguage}/${formData.agentType}_agent${fileExtensions[formData.programmingLanguage]}`; + const templateBase = (window.AGENT_TEMPLATE_BASE || '/code/agent-templates').replace(/\/$/, ''); + const filename = `${templateBase}/${formData.programmingLanguage}/${formData.agentType}_agent${fileExtensions[formData.programmingLanguage]}`; return loadTemplateFile(filename, formData) || genericTemplates[formData.programmingLanguage](formData); }