-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy path.env-example
More file actions
94 lines (73 loc) · 2.15 KB
/
.env-example
File metadata and controls
94 lines (73 loc) · 2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# DB2VEC ENVIRONMENT CONFIGURATION
# ===============================
# This file contains environment variables used by db2vec
# Copy this file to ".env" and customize as needed
# INPUT/OUTPUT CONFIGURATION
# --------------------------
# Path to the database dump file to process (.sql/.surql)
DUMP_FILE=./surreal.surql
# Target vector database type
# Options: redis|chroma|milvus|qdrant|surrealdb|pinecone
EXPORT_TYPE=redis
# DEBUG MODE
# ----------
# Print parsed JSON records before embedding
DEBUG=false
# VECTOR DATABASE CONNECTION
# -------------------------
# Vector database URL/host endpoint
VECTOR_HOST=redis://127.0.0.1:6379
# Database authentication (user/password or API key)
USER=root
PASS=
SECRET=
AUTH=false
# Database organization
DATABASE=default_database
TENANT=default_tenant
NAMESPACE=default_namespace
# Pinecone-specific settings
INDEXES=default_indexes
CLOUD=aws
REGION=us-east-1
# VECTOR CONFIGURATION
# -------------------
# Vector dimension size (must match your embedding model)
DIMENSION=768
# Distance metric: l2|ip|cosine|euclidean|dotproduct
METRIC=cosine
# DATA HANDLING
# ------------
# Max payload size (MB) per request
PAYLOAD_SIZE_MB=12
# Batch size for DB inserts
CHUNK_SIZE=10
# Group Redis records by table name (else use FT.CREATE/SEARCH)
GROUP_REDIS=false
# Use exclusion rules from config/exclude.json
USE_EXCLUDE=false
# EMBEDDING CONFIGURATION
# ---------------------
# Which embedding provider to use: ollama, tei, or google
EMBEDDING_PROVIDER=ollama
# Embedding model name/id
# Examples: nomic-embed-text, text-embedding-004, nomic-embed-text-v2-moe
EMBEDDING_MODEL=nomic-embed-text
# API Key for Google Gemini (required if EMBEDDING_PROVIDER=google)
# EMBEDDING_API_KEY=
# URL endpoint for Ollama or Google embeddings (optional)
# EMBEDDING_URL=
# Embedding performance tuning
EMBEDDING_MAX_CONCURRENCY=4
EMBEDDING_BATCH_SIZE=16
EMBEDDING_MAX_TOKENS=8000
OLLAMA_TIMEOUT=60
# Task type for Google Gemini
EMBEDDING_TASK_TYPE=SEMANTIC_SIMILARITY
# TEI (Text Embedding Inference) specific settings
TEI_BINARY_PATH=tei/tei-metal
TEI_LOCAL_PORT=8080
# PERFORMANCE
# ----------
# CPU threads for parallel tasks (0 = auto detect)
NUM_THREADS=0