diff --git a/crates/basic-api/Cargo.lock b/crates/basic-api/Cargo.lock index e8cd33c..7d95ade 100644 --- a/crates/basic-api/Cargo.lock +++ b/crates/basic-api/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -60,6 +69,26 @@ dependencies = [ "tracing", ] +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] + [[package]] name = "bitflags" version = "2.11.1" @@ -72,12 +101,38 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "either" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" + [[package]] name = "errno" version = "0.3.14" @@ -130,6 +185,12 @@ dependencies = [ "slab", ] +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + [[package]] name = "http" version = "1.4.0" @@ -210,6 +271,15 @@ dependencies = [ "tower-service", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.18" @@ -222,6 +292,16 @@ version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + [[package]] name = "lock_api" version = "0.4.14" @@ -255,6 +335,12 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "mio" version = "1.2.0" @@ -266,6 +352,16 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -307,6 +403,16 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -334,6 +440,41 @@ dependencies = [ "bitflags", ] +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rustc-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" + [[package]] name = "ryu" version = "1.0.23" @@ -412,6 +553,12 @@ dependencies = [ "serde", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook-registry" version = "1.4.8" @@ -543,6 +690,13 @@ version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "vdb-ffi" +version = "0.1.0" +dependencies = [ + "bindgen", +] + [[package]] name = "vectordb" version = "0.1.0" @@ -550,6 +704,7 @@ dependencies = [ "axum", "serde", "tokio", + "vdb-ffi", ] [[package]] diff --git a/crates/basic-api/Cargo.toml b/crates/basic-api/Cargo.toml index 6165d87..ad91bec 100644 --- a/crates/basic-api/Cargo.toml +++ b/crates/basic-api/Cargo.toml @@ -7,3 +7,4 @@ edition = "2021" axum = "0.8" serde = { version = "1", features = ["derive"] } tokio = { version = "1", features = ["full"] } +vdb-ffi = { path = "../vdb-ffi" } diff --git a/crates/basic-api/src/engine.rs b/crates/basic-api/src/engine.rs index 95fb00c..89200da 100644 --- a/crates/basic-api/src/engine.rs +++ b/crates/basic-api/src/engine.rs @@ -1,4 +1,7 @@ +use std::collections::HashMap; + use crate::models::SearchResult; +use vdb_ffi::FfiVectorEngine; pub trait VectorEngine { fn insert(&mut self, id: String, vector: Vec); @@ -7,61 +10,114 @@ pub trait VectorEngine { fn dimension(&self) -> Option; } -#[derive(Default)] -pub struct FlatIndex { - vectors: Vec<(String, Vec)>, +pub struct FfiEngineAdapter { + engine: FfiVectorEngine, + stored_dimensions: HashMap, // maps vector ID : dimension + dimension: Option, } -impl FlatIndex { +impl FfiEngineAdapter { pub fn new() -> Self { - Self::default() + Self { + engine: FfiVectorEngine::new(), + stored_dimensions: HashMap::new(), + dimension: None, + } } } -impl VectorEngine for FlatIndex { +impl VectorEngine for FfiEngineAdapter { fn insert(&mut self, id: String, vector: Vec) { - self.delete(&id); - self.vectors.push((id, vector)); + let vector_dimension = vector.len(); + + self.engine.insert(&id, &vector); + self.stored_dimensions.insert(id, vector_dimension); + self.dimension = Some(vector_dimension); } fn search(&self, query: Vec, k: usize) -> Vec { - let mut results: Vec = self - .vectors - .iter() - .map(|(id, vector)| SearchResult { - id: id.clone(), - score: cosine_similarity(&query, vector), - }) - .collect(); + let results = self.engine.search(&query, k); - results.sort_by(|left, right| right.score.total_cmp(&left.score)); - results.truncate(k); - results + (0..results.len()) + .map(|index| SearchResult { + id: results.id_at(index), + score: results.score_at(index), + }) + .collect() } fn delete(&mut self, id: &str) { - self.vectors.retain(|(stored_id, _)| stored_id != id); + if self.engine.delete(id) { + self.stored_dimensions.remove(id); + + if self.stored_dimensions.is_empty() { + self.dimension = None; + } + } } fn dimension(&self) -> Option { - self.vectors.first().map(|(_, vector)| vector.len()) + self.dimension } } -fn cosine_similarity(left: &[f32], right: &[f32]) -> f32 { - let mut dot = 0.0; - let mut left_norm = 0.0; - let mut right_norm = 0.0; - for (left_value, right_value) in left.iter().zip(right.iter()) { - dot += left_value * right_value; - left_norm += left_value * left_value; - right_norm += right_value * right_value; - } +// Old hardcoded implementation minus the FFI +// ------------------------------------------------ +// #[derive(Default)] +// pub struct FlatIndex { +// vectors: Vec<(String, Vec)>, +// } - if left_norm == 0.0 || right_norm == 0.0 { - return 0.0; - } +// impl FlatIndex { +// pub fn new() -> Self { +// Self::default() +// } +// } +// impl VectorEngine for FlatIndex { +// fn insert(&mut self, id: String, vector: Vec) { +// self.delete(&id); +// self.vectors.push((id, vector)); +// } - dot / (left_norm.sqrt() * right_norm.sqrt()) -} +// fn search(&self, query: Vec, k: usize) -> Vec { +// let mut results: Vec = self +// .vectors +// .iter() +// .map(|(id, vector)| SearchResult { +// id: id.clone(), +// score: cosine_similarity(&query, vector), +// }) +// .collect(); + +// results.sort_by(|left, right| right.score.total_cmp(&left.score)); +// results.truncate(k); +// results +// } + +// fn delete(&mut self, id: &str) { +// self.vectors.retain(|(stored_id, _)| stored_id != id); +// } + +// fn dimension(&self) -> Option { +// self.vectors.first().map(|(_, vector)| vector.len()) +// } +// } + +// fn cosine_similarity(left: &[f32], right: &[f32]) -> f32 { +// let mut dot = 0.0; +// let mut left_norm = 0.0; +// let mut right_norm = 0.0; + +// for (left_value, right_value) in left.iter().zip(right.iter()) { +// dot += left_value * right_value; +// left_norm += left_value * left_value; +// right_norm += right_value * right_value; +// } + +// if left_norm == 0.0 || right_norm == 0.0 { +// return 0.0; +// } + +// dot / (left_norm.sqrt() * right_norm.sqrt()) +// } \ No newline at end of file diff --git a/crates/basic-api/src/state.rs b/crates/basic-api/src/state.rs index 9dd4b03..5101ebe 100644 --- a/crates/basic-api/src/state.rs +++ b/crates/basic-api/src/state.rs @@ -1,6 +1,6 @@ use std::sync::{Arc, Mutex}; -use crate::engine::{FlatIndex, VectorEngine}; +use crate::engine::{FfiEngineAdapter, VectorEngine}; #[derive(Clone)] pub struct AppState { @@ -10,7 +10,7 @@ pub struct AppState { impl AppState { pub fn new() -> Self { Self { - engine: Arc::new(Mutex::new(Box::new(FlatIndex::new()))), + engine: Arc::new(Mutex::new(Box::new(FfiEngineAdapter::new()))), } } } diff --git a/crates/vdb-ffi/src/engine.rs b/crates/vdb-ffi/src/engine.rs index 4aead3d..759d4a4 100644 --- a/crates/vdb-ffi/src/engine.rs +++ b/crates/vdb-ffi/src/engine.rs @@ -21,6 +21,10 @@ pub struct FfiSearchResults { handle: *mut NativeSearchResults, } +// The wrapper owns the native handle and is always accessed behind higher-level +// synchronization in the API layer. +unsafe impl Send for FfiVectorEngine {} + impl FfiVectorEngine { pub fn new() -> Self { let handle = unsafe { native_vector_engine_new() };