diff --git a/.github/workflows/preview.yml b/.github/workflows/preview.yml index acd9a3a..c52a8b7 100644 --- a/.github/workflows/preview.yml +++ b/.github/workflows/preview.yml @@ -17,12 +17,8 @@ jobs: with: worker_name: source-data-proxy-pr-${{ github.event.pull_request.number }} wrangler_config: wrangler.preview.toml - var_overrides: >- - LOG_LEVEL:DEBUG - SOURCE_API_URL:https://staging.source.coop - OIDC_PROVIDER_ISSUER:https://source-data-proxy-pr-${{ github.event.pull_request.number }}.source-coop.workers.dev - OIDC_PROVIDER_KID:source-proxy-1 - AUTH_ISSUER:https://auth.staging.source.coop + # All static preview vars live in wrangler.preview.toml [vars]; only the + # per-PR dynamic bindings are templated here. service_overrides: >- PUBLIC_LOG_STREAM:public-log-stream-pr-${{ github.event.pull_request.number }} environment: preview diff --git a/Cargo.lock b/Cargo.lock index 41714fa..a07d44d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -909,7 +909,7 @@ dependencies = [ [[package]] name = "multistore" version = "0.6.0" -source = "git+https://github.com/developmentseed/multistore?tag=v0.6.0#b0f5b7fed72d216d3474ebffacc6de1c074e616f" +source = "git+https://github.com/developmentseed/multistore?branch=fix%2Fdecode-aws-chunked-writes#71b1513a7eacb866328a72eb85818603e859424c" dependencies = [ "async-trait", "base64", @@ -936,7 +936,7 @@ dependencies = [ [[package]] name = "multistore-cf-workers" version = "0.6.0" -source = "git+https://github.com/developmentseed/multistore?tag=v0.6.0#b0f5b7fed72d216d3474ebffacc6de1c074e616f" +source = "git+https://github.com/developmentseed/multistore?branch=fix%2Fdecode-aws-chunked-writes#71b1513a7eacb866328a72eb85818603e859424c" dependencies = [ "async-trait", "bytes", @@ -960,7 +960,7 @@ dependencies = [ [[package]] name = "multistore-oidc-provider" version = "0.6.0" -source = "git+https://github.com/developmentseed/multistore?tag=v0.6.0#b0f5b7fed72d216d3474ebffacc6de1c074e616f" +source = "git+https://github.com/developmentseed/multistore?branch=fix%2Fdecode-aws-chunked-writes#71b1513a7eacb866328a72eb85818603e859424c" dependencies = [ "base64", "chrono", @@ -979,7 +979,7 @@ dependencies = [ [[package]] name = "multistore-path-mapping" version = "0.6.0" -source = "git+https://github.com/developmentseed/multistore?tag=v0.6.0#b0f5b7fed72d216d3474ebffacc6de1c074e616f" +source = "git+https://github.com/developmentseed/multistore?branch=fix%2Fdecode-aws-chunked-writes#71b1513a7eacb866328a72eb85818603e859424c" dependencies = [ "multistore", "percent-encoding", @@ -989,7 +989,7 @@ dependencies = [ [[package]] name = "multistore-sts" version = "0.6.0" -source = "git+https://github.com/developmentseed/multistore?tag=v0.6.0#b0f5b7fed72d216d3474ebffacc6de1c074e616f" +source = "git+https://github.com/developmentseed/multistore?branch=fix%2Fdecode-aws-chunked-writes#71b1513a7eacb866328a72eb85818603e859424c" dependencies = [ "aes-gcm", "base64", diff --git a/Cargo.toml b/Cargo.toml index 8a2edc8..df604ef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,12 +22,16 @@ path = "tests/pagination.rs" name = "backend_auth" path = "tests/backend_auth.rs" +[[test]] +name = "authz" +path = "tests/authz.rs" + [dependencies] # Multistore multistore = { version = "0.6.0", features = ["azure"] } -multistore-oidc-provider = { version = "0.6.0" } -multistore-path-mapping = { version = "0.6.0" } -multistore-sts = { version = "0.6.0" } +multistore-oidc-provider = "0.6.0" +multistore-path-mapping = "0.6.0" +multistore-sts = "0.6.0" # Serialization serde = { version = "1", features = ["derive"] } @@ -65,14 +69,14 @@ web-sys = { version = "0.3", features = [ worker = { version = "=0.7.4", features = ["http"] } worker-macros = { version = "=0.7.4", features = ["http"] } -# multistore-cf-workers 0.6.0 is not on crates.io, so all multistore crates are -# pinned to the v0.6.0 git tag to keep `multistore` single-sourced: a git -# cf-workers alongside registry crates would pull two copies of `multistore` -# and fail to compile. Remove this block once multistore-cf-workers 0.6.0 -# publishes and the version deps above resolve from crates.io. +# TEMPORARY: build against the aws-chunked write-decode fix branch of multistore +# until it ships in a release. Without this, the modern aws-cli's default +# trailer-checksum uploads are stored as raw chunk framing (corrupted writes). +# Remove this block and bump the multistore* deps above once +# https://github.com/developmentseed/multistore/pull/92 is released. [patch.crates-io] -multistore = { git = "https://github.com/developmentseed/multistore", tag = "v0.6.0" } -multistore-oidc-provider = { git = "https://github.com/developmentseed/multistore", tag = "v0.6.0" } -multistore-path-mapping = { git = "https://github.com/developmentseed/multistore", tag = "v0.6.0" } -multistore-sts = { git = "https://github.com/developmentseed/multistore", tag = "v0.6.0" } -multistore-cf-workers = { git = "https://github.com/developmentseed/multistore", tag = "v0.6.0" } +multistore = { git = "https://github.com/developmentseed/multistore", branch = "fix/decode-aws-chunked-writes" } +multistore-cf-workers = { git = "https://github.com/developmentseed/multistore", branch = "fix/decode-aws-chunked-writes" } +multistore-oidc-provider = { git = "https://github.com/developmentseed/multistore", branch = "fix/decode-aws-chunked-writes" } +multistore-path-mapping = { git = "https://github.com/developmentseed/multistore", branch = "fix/decode-aws-chunked-writes" } +multistore-sts = { git = "https://github.com/developmentseed/multistore", branch = "fix/decode-aws-chunked-writes" } diff --git a/src/authz.rs b/src/authz.rs new file mode 100644 index 0000000..bd3d770 --- /dev/null +++ b/src/authz.rs @@ -0,0 +1,20 @@ +//! Write-action classification. Kept wasm-free so it can be unit-tested +//! natively (see `tests/authz.rs`), despite the crate's `[lib] test = false`. +//! The rest of the write gate (read-only / signable / permission checks) is +//! trivial enough to live inline in the registry. + +use multistore::types::Action; + +/// Whether an S3 action mutates the backend. Reads (GET/HEAD/LIST) are served +/// without a write check; everything else is a write and must be authorized. +/// +/// A denylist over the closed [`Action`] set, so it is fail-safe by direction: +/// any action that is not explicitly a read is treated as a write. A future +/// read-only action added upstream would be (harmlessly) gated as a write until +/// added here, never the reverse. +pub(crate) fn is_write_action(action: Action) -> bool { + !matches!( + action, + Action::GetObject | Action::HeadObject | Action::ListBucket + ) +} diff --git a/src/cache.rs b/src/cache.rs index 2fa3cb6..45aafca 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -25,14 +25,25 @@ const PATH_SEGMENT: &AsciiSet = &NON_ALPHANUMERIC /// Product metadata (`/api/v1/products/{account}/{product}`). const PRODUCT_CACHE_SECS: u32 = 300; // 5 minutes -/// A single data connection (`/api/v1/data-connections/{id}`). Short to match -/// product metadata: a per-id response is subject-authorized, so the TTL is an +/// A single data connection (`/api/v1/data-connections/{id}`). Matches product +/// metadata: a per-id response is subject-authorized, so the TTL is an /// authorization-revocation lag, not just a freshness knob. +/// +/// Longer than `PERMISSIONS_CACHE_SECS` on purpose, even though `read_only` +/// gates writes from here. This is fetched on *every* request (read and write), +/// so a short TTL taxes the read path; flipping a connection read-only freezes +/// *all* writers and is a deliberate admin act where ~5 min lag is acceptable. +/// Revoking a single (e.g. compromised) account's write grant is the urgent +/// case, and that rides the 60s permission TTL, not this one. const DATA_CONNECTION_CACHE_SECS: u32 = 300; // 5 minutes /// Product list for an account (`/api/v1/products/{account}`). const PRODUCT_LIST_CACHE_SECS: u32 = 60; // 1 minute +/// Caller's permissions on a product (`.../permissions`). Short: it gates writes, +/// so a revoked grant should stop taking effect quickly. +const PERMISSIONS_CACHE_SECS: u32 = 60; // 1 minute + // ── Public cache functions ───────────────────────────────────────── /// Fetch a single product's metadata, cached for `PRODUCT_CACHE_SECS`. @@ -62,6 +73,35 @@ pub async fn get_or_fetch_product( .await } +/// Fetch the authenticated caller's permissions on a product, cached for +/// `PERMISSIONS_CACHE_SECS`. Returns the permission strings the API grants for +/// the caller (e.g. `["read", "write"]`); writes are gated on `"write"`. +pub async fn get_or_fetch_permissions( + api_base_url: &str, + account: &str, + product: &str, + api_auth: &crate::ApiAuth, + request_id: &str, + subject: &str, +) -> Result, ProxyError> { + let api_url = format!( + "{}/api/v1/products/{}/{}/permissions", + api_base_url, + utf8_percent_encode(account, PATH_SEGMENT), + utf8_percent_encode(product, PATH_SEGMENT), + ); + let cache_key = cache_key_with_subject(&api_url, Some(subject)); + cached_fetch( + &cache_key, + &api_url, + PERMISSIONS_CACHE_SECS, + api_auth, + request_id, + Some(subject), + ) + .await +} + /// Fetch a single data connection by id, cached for `DATA_CONNECTION_CACHE_SECS`. /// /// Resolving by id (rather than scanning a cached full list) lets the diff --git a/src/lib.rs b/src/lib.rs index 18a9b4a..59d71de 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ mod analytics; mod auth; +mod authz; mod backend_auth; mod cache; mod config; @@ -115,36 +116,15 @@ async fn fetch(req: web_sys::Request, env: Env, ctx: Context) -> Result Result Result web_sys::Response { // ── CORS ──────────────────────────────────────────────────────────── -fn add_cors(resp: web_sys::Response, allow_post: bool) -> web_sys::Response { +fn add_cors(resp: web_sys::Response) -> web_sys::Response { let h = resp.headers(); - // The proxy is read-only; POST is advertised only on the STS endpoint. - let methods = if allow_post { - "GET, HEAD, POST, OPTIONS" - } else { - "GET, HEAD, OPTIONS" - }; for (name, value) in [ ("access-control-allow-origin", "*"), - ("access-control-allow-methods", methods), + ( + "access-control-allow-methods", + "GET, HEAD, PUT, POST, DELETE, OPTIONS", + ), ("access-control-allow-headers", "*"), ("access-control-expose-headers", "*"), ] { diff --git a/src/registry.rs b/src/registry.rs index aee6647..635d86a 100644 --- a/src/registry.rs +++ b/src/registry.rs @@ -3,10 +3,11 @@ use multistore::api::response::BucketEntry; use multistore::error::ProxyError; use multistore::registry::{BucketRegistry, ResolvedBucket}; -use multistore::types::{BucketConfig, ResolvedIdentity, S3Operation}; +use multistore::types::{Action, BucketConfig, ResolvedIdentity, S3Operation}; use serde::Deserialize; use std::collections::HashMap; +use crate::authz::is_write_action; use crate::backend_auth::{apply_backend_auth, BackendAuth}; /// Registry that resolves Source Cooperative products to multistore `BucketConfig`s @@ -50,7 +51,7 @@ impl BucketRegistry for SourceCoopRegistry { &self, name: &str, identity: &ResolvedIdentity, - _operation: &S3Operation, + operation: &S3Operation, ) -> Result { // Bucket names arrive pre-mapped as "account:product". let (account, product) = name @@ -69,6 +70,7 @@ impl BucketRegistry for SourceCoopRegistry { &self.api_auth, &self.request_id, subject, + is_write_action(operation.action()), ) .await?; @@ -85,6 +87,28 @@ impl BucketRegistry for SourceCoopRegistry { ) -> Result, ProxyError> { unimplemented!("Bucket listing is not supported") } + + async fn authorize_key( + &self, + _name: &str, + _identity: &ResolvedIdentity, + action: Action, + _key: &str, + ) -> bool { + // Per-key authorization for batch delete. Correctness depends on + // `get_bucket` having already authorized this caller for `name`: Source + // Cooperative authorizes writes at the product level there (caller holds + // product write permission, connection is writable), so every key in a + // batch delete that reached this point is permitted. The multistore + // default would deny every key, since callers' STS sessions carry no + // per-bucket scopes. If a future multistore ever invoked `authorize_key` + // without a prior successful `get_bucket` for the same `name`, this gate + // alone would be insufficient — it only confirms the op is a write, not + // that the caller is entitled. Gating on a write action is thus + // defense-in-depth: only reached for write batch ops, never blanket- + // allows a read. + is_write_action(action) + } } /// Resolve a product to a `BucketConfig` by querying the Source Cooperative API. @@ -95,6 +119,7 @@ async fn resolve_product( api_auth: &crate::ApiAuth, request_id: &str, subject: Option<&str>, + is_write: bool, ) -> Result { let span = tracing::info_span!( "resolve_product", @@ -139,6 +164,39 @@ async fn resolve_product( ) .await?; + // Authorize writes. The subject-scoped fetches above already cleared the + // caller to *see* this product; a write additionally requires an + // authenticated caller who holds the product's `write` permission, a + // connection that is not read-only, and a connection the proxy can sign as. + if is_write { + // Anonymous callers can never write (and there is no subject to query + // permissions with). + let subject = subject.ok_or(ProxyError::AccessDenied)?; + // Connection-level denials need no caller lookup — check them first so a + // write the connection can't accept skips the permissions API call. A + // connection can sign writes only via an S3 web-identity role. + let signable = matches!( + connection.authentication, + BackendAuth::S3WebIdentityRole { .. } + ); + if connection.read_only || !signable { + return Err(ProxyError::AccessDenied); + } + // The caller must hold the product's `write` permission. + let permissions = crate::cache::get_or_fetch_permissions( + api_base_url, + account, + product, + api_auth, + request_id, + subject, + ) + .await?; + if !permissions.iter().any(|p| p.eq_ignore_ascii_case("write")) { + return Err(ProxyError::AccessDenied); + } + } + // 4. Build BucketConfig let backend_type = match connection.details.provider.as_str() { "s3" => "s3", @@ -284,6 +342,9 @@ pub struct SourceProductMirror { #[derive(Debug, Clone, Deserialize)] pub struct DataConnection { pub data_connection_id: String, + /// Whether the connection forbids writes. Required (no serde default): an + /// absent flag fails the fetch rather than defaulting to writable. + pub read_only: bool, pub details: DataConnectionDetails, /// How the proxy authenticates to this connection's backend. A sibling of /// `details`, matching the Source API's `DataConnection` shape. Absent → diff --git a/tests/authz.rs b/tests/authz.rs new file mode 100644 index 0000000..4289428 --- /dev/null +++ b/tests/authz.rs @@ -0,0 +1,30 @@ +//! Native unit tests for the wasm-free `authz` module, included via `#[path]` +//! (the lib itself is `cdylib` with `test = false`). Mirrors the pattern in +//! `tests/backend_auth.rs`. + +#[path = "../src/authz.rs"] +mod authz; + +use authz::is_write_action; +use multistore::types::Action; + +#[test] +fn reads_are_not_writes() { + assert!(!is_write_action(Action::GetObject)); + assert!(!is_write_action(Action::HeadObject)); + assert!(!is_write_action(Action::ListBucket)); +} + +#[test] +fn mutations_are_writes() { + for action in [ + Action::PutObject, + Action::DeleteObject, + Action::CreateMultipartUpload, + Action::UploadPart, + Action::CompleteMultipartUpload, + Action::AbortMultipartUpload, + ] { + assert!(is_write_action(action), "{action:?} should be a write"); + } +} diff --git a/tests/test_integration.py b/tests/test_integration.py index 71c6858..e1395f8 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -22,9 +22,12 @@ def test_index(): assert "Source Cooperative Data Proxy" in resp.text -def test_write_rejected(): - resp = requests.put(f"{PROXY_URL}/test/test/file.txt") - assert resp.status_code == 405 +def test_anonymous_write_denied(): + # Writes route through the gateway and are authorized there. An + # unauthenticated write to a real product is denied at the gate (403), + # before any backend call. + resp = requests.put(f"{PROXY_URL}/{ACCOUNT}/{PRODUCT}/{OBJECT_KEY}") + assert resp.status_code == 403 def test_options_cors(): diff --git a/wrangler.preview.toml b/wrangler.preview.toml index 7c74542..0f13699 100644 --- a/wrangler.preview.toml +++ b/wrangler.preview.toml @@ -2,6 +2,26 @@ compatibility_date = "2026-03-17" main = "build/worker/shim.mjs" workers_dev = true +[vars] +LOG_LEVEL = "DEBUG" +SOURCE_API_URL = "https://staging.source.coop" + +# Sign the proxy's on-behalf-of API token (and AWS federation assertion) as the +# canonical staging proxy, not this ephemeral preview hostname: staging's API and +# AWS IAM trust only issuer https://data.staging.source.coop + kid data-proxy-1 +# and fetch its JWKS from that host (never the pr-*.workers.dev origin). Signing +# as anything else fails JWKS verification (ERR_JWKS_NO_MATCHING_KEY), so the call +# falls back to anonymous → 403 on restricted products. Requires the `preview` +# environment's OIDC_PROVIDER_KEY to be the staging signing key. +OIDC_PROVIDER_ISSUER = "https://data.staging.source.coop" +OIDC_PROVIDER_KID = "data-proxy-1" + +# Preview is staging-equivalent (shares staging analytics + log stream), so +# trust the staging Ory issuer and accept the staging frontend + CLI client_ids. +# AUTH_AUDIENCE must be set or /.sts fail-closes with 501. +AUTH_ISSUER = "https://auth.staging.source.coop" +AUTH_AUDIENCE = "1123dfa8-469f-44fe-b9f4-9b76f06fd325,a79c9537-be78-454a-9ea1-b96a1be811cc" # staging frontend + source-coop-cli client_ids + [build] command = "cargo install -q worker-build@0.7.5 && worker-build --release"