Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
ce911f8
feat(classifiers): OnnxEmbedder + all-MiniLM-L6-v2 default (WOR-1220)
rickcrawford Jun 9, 2026
177136d
feat(classifier-client): add embed() RPC wrapper (WOR-1222)
rickcrawford Jun 9, 2026
dc37c72
feat(sidecar): implement Embed RPC backed by OnnxEmbedder (WOR-1221)
rickcrawford Jun 9, 2026
3bccb9c
feat(ai): local sidecar embedding source for the semantic cache (WOR-…
rickcrawford Jun 9, 2026
1450ca6
feat(modules): in-process ONNX prompt-injection detector (WOR-1224)
rickcrawford Jun 9, 2026
84ad9c1
feat(observe): metrics + SOTA usage tracking for local inference, mul…
rickcrawford Jun 9, 2026
dbc385e
ci: bump Node-20 deprecated actions to node24 versions
rickcrawford Jun 9, 2026
e06beb1
docs(WOR-1226): local-inference guide for sidecar + in-process embedd…
rickcrawford Jun 9, 2026
28da431
style: fmt embedder test assert (WOR-1220 follow-up)
rickcrawford Jun 9, 2026
c64ecf7
feat(core): wire in-process embedding source for the semantic cache (…
rickcrawford Jun 9, 2026
7213d33
feat(ai): emit derived USD cost on the OTel AI span (WOR-1229)
rickcrawford Jun 9, 2026
419760a
feat(ai): AI error span semantics (WOR-1231)
rickcrawford Jun 9, 2026
3bf86af
test(ai): pin GenAI semconv conformance for AI spans (WOR-1232)
rickcrawford Jun 9, 2026
e4943fc
docs(observe): AI-span gen_ai/OpenInference attributes + backend matr…
rickcrawford Jun 9, 2026
29fc716
chore(deny): ignore RUSTSEC-2026-0173 (proc-macro-error2 unmaintained)
rickcrawford Jun 9, 2026
3e3ba65
feat(observe): cost-aware tail-sampling decision (WOR-1230)
rickcrawford Jun 9, 2026
0cbaff7
feat(observe): Phoenix in the reference stack + cost-aware tail sampl…
rickcrawford Jun 9, 2026
86af6e7
test+docs+example: comprehensive coverage for the in-process classifi…
rickcrawford Jun 9, 2026
fd51c21
feat(ai): capture-gated, redacted prompt content on AI spans (WOR-1228)
rickcrawford Jun 9, 2026
a6028f4
test(e2e): span-arrival at a mock OTLP collector (WOR-1233)
rickcrawford Jun 9, 2026
41cf6d3
chore(config): regenerate schema for the HTTP/3 disabled doc comments
rickcrawford Jun 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/fixture-freshness.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:

# Cache only the standalone tool's target dir; the parent
# workspace's cache is owned by other workflows.
- uses: Swatinem/rust-cache@v2.7.3
- uses: Swatinem/rust-cache@v2
with:
shared-key: fixture-freshness
workspaces: test/fixtures/refresh-tool
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/licensing-conformance.yml
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ jobs:

- name: open PR with refreshed vendored copies
if: steps.diff.outputs.changed == '1'
uses: peter-evans/create-pull-request@v6
uses: peter-evans/create-pull-request@v7
with:
token: ${{ secrets.GITHUB_TOKEN }}
branch: chore/licensing-schemas-refresh
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/perf-regression.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,13 @@ jobs:
# would let the second build trample the first's lockfile state
# and skew the bench.
- name: cache (candidate)
uses: Swatinem/rust-cache@v2.7.3
uses: Swatinem/rust-cache@v2
with:
shared-key: perf-regression-candidate
workspaces: candidate-src

- name: cache (baseline)
uses: Swatinem/rust-cache@v2.7.3
uses: Swatinem/rust-cache@v2
with:
shared-key: perf-regression-baseline
workspaces: baseline-src
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/synthetic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
# `bench-synthetic` is its own workspace, so it gets a dedicated
# cache key. That keeps the proxy workspace cache from being
# invalidated by churn on the synthetic harness.
- uses: Swatinem/rust-cache@v2.7.3
- uses: Swatinem/rust-cache@v2
with:
shared-key: synthetic
workspaces: bench-synthetic
Expand Down Expand Up @@ -189,7 +189,7 @@ jobs:

- uses: dtolnay/rust-toolchain@stable

- uses: Swatinem/rust-cache@v2.7.3
- uses: Swatinem/rust-cache@v2
with:
shared-key: chaos-hot-reload

Expand Down
6 changes: 6 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions crates/sbproxy-ai/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ sbproxy-config.workspace = true
sbproxy-cache.workspace = true
sbproxy-security.workspace = true
sbproxy-platform.workspace = true
# WOR-1223: local embedding source for the semantic cache. Only the
# sidecar client is used here; the in-process embedder lives in sbproxy-core
# because sbproxy-classifiers depends on sbproxy-ai (would be a cycle).
sbproxy-classifier-client.workspace = true

serde.workspace = true
serde_json.workspace = true
Expand Down
13 changes: 13 additions & 0 deletions crates/sbproxy-ai/src/handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,15 @@ pub struct AiHandlerConfig {
/// See `sbproxy_security::pii::PiiConfig` for the rule schema.
#[serde(default)]
pub pii: Option<sbproxy_security::pii::PiiConfig>,
/// WOR-1228: when `true`, emit the prompt text as the OpenInference
/// `input.value` span attribute so trace backends (Phoenix, Langfuse)
/// show the actual conversation, not just token counts. Off by default
/// because prompt content is sensitive: when on, the text is routed
/// through the configured `pii` redactor (if any) and the always-on
/// secret redactor before it lands on the span. Enable only with `pii`
/// configured and a trace backend inside your trust boundary.
#[serde(default)]
pub trace_content: bool,
/// Opaque semantic-cache configuration block. The OSS proxy
/// stores this verbatim and surfaces it through the stream cache
/// recorder hook so the enterprise implementation can read its
Expand Down Expand Up @@ -857,6 +866,7 @@ mod tests {
resilience: None,
shadow: None,
pii: None,
trace_content: false,
semantic_cache: None,
prompts: None,
usage_parser: "auto".to_string(),
Expand Down Expand Up @@ -885,6 +895,7 @@ mod tests {
resilience: None,
shadow: None,
pii: None,
trace_content: false,
semantic_cache: None,
prompts: None,
usage_parser: "auto".to_string(),
Expand Down Expand Up @@ -913,6 +924,7 @@ mod tests {
resilience: None,
shadow: None,
pii: None,
trace_content: false,
semantic_cache: None,
prompts: None,
usage_parser: "auto".to_string(),
Expand Down Expand Up @@ -942,6 +954,7 @@ mod tests {
resilience: None,
shadow: None,
pii: None,
trace_content: false,
semantic_cache: None,
prompts: None,
usage_parser: "auto".to_string(),
Expand Down
Loading
Loading