Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions .github/workflows/preview.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,8 @@ jobs:
with:
worker_name: source-data-proxy-pr-${{ github.event.pull_request.number }}
wrangler_config: wrangler.preview.toml
var_overrides: >-
LOG_LEVEL:DEBUG
SOURCE_API_URL:https://staging.source.coop
OIDC_PROVIDER_ISSUER:https://source-data-proxy-pr-${{ github.event.pull_request.number }}.source-coop.workers.dev
OIDC_PROVIDER_KID:source-proxy-1
AUTH_ISSUER:https://auth.staging.source.coop
# All static preview vars live in wrangler.preview.toml [vars]; only the
# per-PR dynamic bindings are templated here.
service_overrides: >-
PUBLIC_LOG_STREAM:public-log-stream-pr-${{ github.event.pull_request.number }}
environment: preview
Expand Down
10 changes: 5 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 17 additions & 13 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,16 @@ path = "tests/pagination.rs"
name = "backend_auth"
path = "tests/backend_auth.rs"

[[test]]
name = "authz"
path = "tests/authz.rs"

[dependencies]
# Multistore
multistore = { version = "0.6.0", features = ["azure"] }
multistore-oidc-provider = { version = "0.6.0" }
multistore-path-mapping = { version = "0.6.0" }
multistore-sts = { version = "0.6.0" }
multistore-oidc-provider = "0.6.0"
multistore-path-mapping = "0.6.0"
multistore-sts = "0.6.0"

# Serialization
serde = { version = "1", features = ["derive"] }
Expand Down Expand Up @@ -65,14 +69,14 @@ web-sys = { version = "0.3", features = [
worker = { version = "=0.7.4", features = ["http"] }
worker-macros = { version = "=0.7.4", features = ["http"] }

# multistore-cf-workers 0.6.0 is not on crates.io, so all multistore crates are
# pinned to the v0.6.0 git tag to keep `multistore` single-sourced: a git
# cf-workers alongside registry crates would pull two copies of `multistore`
# and fail to compile. Remove this block once multistore-cf-workers 0.6.0
# publishes and the version deps above resolve from crates.io.
# TEMPORARY: build against the aws-chunked write-decode fix branch of multistore
# until it ships in a release. Without this, the modern aws-cli's default
# trailer-checksum uploads are stored as raw chunk framing (corrupted writes).
# Remove this block and bump the multistore* deps above once
# https://github.com/developmentseed/multistore/pull/92 is released.
[patch.crates-io]
multistore = { git = "https://github.com/developmentseed/multistore", tag = "v0.6.0" }
multistore-oidc-provider = { git = "https://github.com/developmentseed/multistore", tag = "v0.6.0" }
multistore-path-mapping = { git = "https://github.com/developmentseed/multistore", tag = "v0.6.0" }
multistore-sts = { git = "https://github.com/developmentseed/multistore", tag = "v0.6.0" }
multistore-cf-workers = { git = "https://github.com/developmentseed/multistore", tag = "v0.6.0" }
multistore = { git = "https://github.com/developmentseed/multistore", branch = "fix/decode-aws-chunked-writes" }
multistore-cf-workers = { git = "https://github.com/developmentseed/multistore", branch = "fix/decode-aws-chunked-writes" }
multistore-oidc-provider = { git = "https://github.com/developmentseed/multistore", branch = "fix/decode-aws-chunked-writes" }
multistore-path-mapping = { git = "https://github.com/developmentseed/multistore", branch = "fix/decode-aws-chunked-writes" }
multistore-sts = { git = "https://github.com/developmentseed/multistore", branch = "fix/decode-aws-chunked-writes" }
20 changes: 20 additions & 0 deletions src/authz.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//! Write-action classification. Kept wasm-free so it can be unit-tested
//! natively (see `tests/authz.rs`), despite the crate's `[lib] test = false`.
//! The rest of the write gate (read-only / signable / permission checks) is
//! trivial enough to live inline in the registry.

use multistore::types::Action;

/// Whether an S3 action mutates the backend. Reads (GET/HEAD/LIST) are served
/// without a write check; everything else is a write and must be authorized.
///
/// A denylist over the closed [`Action`] set, so it is fail-safe by direction:
/// any action that is not explicitly a read is treated as a write. A future
/// read-only action added upstream would be (harmlessly) gated as a write until
/// added here, never the reverse.
pub(crate) fn is_write_action(action: Action) -> bool {
!matches!(
action,
Action::GetObject | Action::HeadObject | Action::ListBucket
)
}
44 changes: 42 additions & 2 deletions src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,25 @@ const PATH_SEGMENT: &AsciiSet = &NON_ALPHANUMERIC
/// Product metadata (`/api/v1/products/{account}/{product}`).
const PRODUCT_CACHE_SECS: u32 = 300; // 5 minutes

/// A single data connection (`/api/v1/data-connections/{id}`). Short to match
/// product metadata: a per-id response is subject-authorized, so the TTL is an
/// A single data connection (`/api/v1/data-connections/{id}`). Matches product
/// metadata: a per-id response is subject-authorized, so the TTL is an
/// authorization-revocation lag, not just a freshness knob.
///
/// Longer than `PERMISSIONS_CACHE_SECS` on purpose, even though `read_only`
/// gates writes from here. This is fetched on *every* request (read and write),
/// so a short TTL taxes the read path; flipping a connection read-only freezes
/// *all* writers and is a deliberate admin act where ~5 min lag is acceptable.
/// Revoking a single (e.g. compromised) account's write grant is the urgent
/// case, and that rides the 60s permission TTL, not this one.
const DATA_CONNECTION_CACHE_SECS: u32 = 300; // 5 minutes

/// Product list for an account (`/api/v1/products/{account}`).
const PRODUCT_LIST_CACHE_SECS: u32 = 60; // 1 minute

/// Caller's permissions on a product (`.../permissions`). Short: it gates writes,
/// so a revoked grant should stop taking effect quickly.
const PERMISSIONS_CACHE_SECS: u32 = 60; // 1 minute

// ── Public cache functions ─────────────────────────────────────────

/// Fetch a single product's metadata, cached for `PRODUCT_CACHE_SECS`.
Expand Down Expand Up @@ -62,6 +73,35 @@ pub async fn get_or_fetch_product(
.await
}

/// Fetch the authenticated caller's permissions on a product, cached for
/// `PERMISSIONS_CACHE_SECS`. Returns the permission strings the API grants for
/// the caller (e.g. `["read", "write"]`); writes are gated on `"write"`.
pub async fn get_or_fetch_permissions(
api_base_url: &str,
account: &str,
product: &str,
api_auth: &crate::ApiAuth,
request_id: &str,
subject: &str,
) -> Result<Vec<String>, ProxyError> {
let api_url = format!(
"{}/api/v1/products/{}/{}/permissions",
api_base_url,
utf8_percent_encode(account, PATH_SEGMENT),
utf8_percent_encode(product, PATH_SEGMENT),
);
let cache_key = cache_key_with_subject(&api_url, Some(subject));
cached_fetch(
&cache_key,
&api_url,
PERMISSIONS_CACHE_SECS,
api_auth,
request_id,
Some(subject),
)
.await
}

/// Fetch a single data connection by id, cached for `DATA_CONNECTION_CACHE_SECS`.
///
/// Resolving by id (rather than scanning a cached full list) lets the
Expand Down
48 changes: 12 additions & 36 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod analytics;
mod auth;
mod authz;
mod backend_auth;
mod cache;
mod config;
Expand Down Expand Up @@ -115,36 +116,15 @@ async fn fetch(req: web_sys::Request, env: Env, ctx: Context) -> Result<web_sys:
// methods and bypass the S3 object/bucket path mapping below.
let is_special_path = parts.path.starts_with("/.well-known/") || parts.path == "/.sts";

// POST is only meaningful for STS token exchange; everything else is
// read-only, so don't advertise write methods globally via CORS.
let cors_allow_post = parts.path == "/.sts";

// ── Short-circuit: OPTIONS preflight ────────────────────────────
if parts.method == http::Method::OPTIONS {
return Ok(add_cors(empty_response(204), cors_allow_post));
return Ok(add_cors(empty_response(204)));
}

// ── Short-circuit: write methods ────────────────────────────────
// The proxy is read-only for object/bucket paths, so writes get an
// S3-style 405 rather than falling through to bucket resolution (which
// would misleadingly 404). Special endpoints are exempt.
if !is_special_path
&& matches!(
parts.method,
http::Method::PUT | http::Method::POST | http::Method::DELETE | http::Method::PATCH
)
{
let resp = ErrorResponse {
code: "MethodNotAllowed".to_string(),
message: "Method Not Allowed".to_string(),
resource: String::new(),
request_id: request_id.clone(),
};
return Ok(add_cors(
GatewayResponse::Response(ProxyResult::xml(405, resp.to_xml())).into_web_sys(),
cors_allow_post,
));
}
// Writes (PUT/POST/DELETE) flow through the gateway: the registry authorizes
// them (caller must hold product write permission; the connection must be
// writable and signable) and the backend-auth middleware signs them. See
// `authz` and `backend_auth`.

// ── Short-circuit: STS disabled (fail closed) ───────────────────
// `/.sts` requires an audience restriction (AUTH_AUDIENCE) to be safe —
Expand All @@ -160,7 +140,6 @@ async fn fetch(req: web_sys::Request, env: Env, ctx: Context) -> Result<web_sys:
};
return Ok(add_cors(
GatewayResponse::Response(ProxyResult::xml(501, resp.to_xml())).into_web_sys(),
cors_allow_post,
));
}

Expand Down Expand Up @@ -314,7 +293,7 @@ async fn fetch(req: web_sys::Request, env: Env, ctx: Context) -> Result<web_sys:
}
}

let response = add_cors(response, cors_allow_post);
let response = add_cors(response);
if !request_id.is_empty() {
let _ = response.headers().set("x-request-id", &request_id);
}
Expand Down Expand Up @@ -494,17 +473,14 @@ fn empty_response(status: u16) -> web_sys::Response {

// ── CORS ────────────────────────────────────────────────────────────

fn add_cors(resp: web_sys::Response, allow_post: bool) -> web_sys::Response {
fn add_cors(resp: web_sys::Response) -> web_sys::Response {
let h = resp.headers();
// The proxy is read-only; POST is advertised only on the STS endpoint.
let methods = if allow_post {
"GET, HEAD, POST, OPTIONS"
} else {
"GET, HEAD, OPTIONS"
};
for (name, value) in [
("access-control-allow-origin", "*"),
("access-control-allow-methods", methods),
(
"access-control-allow-methods",
"GET, HEAD, PUT, POST, DELETE, OPTIONS",
),
("access-control-allow-headers", "*"),
("access-control-expose-headers", "*"),
] {
Expand Down
65 changes: 63 additions & 2 deletions src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
use multistore::api::response::BucketEntry;
use multistore::error::ProxyError;
use multistore::registry::{BucketRegistry, ResolvedBucket};
use multistore::types::{BucketConfig, ResolvedIdentity, S3Operation};
use multistore::types::{Action, BucketConfig, ResolvedIdentity, S3Operation};
use serde::Deserialize;
use std::collections::HashMap;

use crate::authz::is_write_action;
use crate::backend_auth::{apply_backend_auth, BackendAuth};

/// Registry that resolves Source Cooperative products to multistore `BucketConfig`s
Expand Down Expand Up @@ -50,7 +51,7 @@ impl BucketRegistry for SourceCoopRegistry {
&self,
name: &str,
identity: &ResolvedIdentity,
_operation: &S3Operation,
operation: &S3Operation,
) -> Result<ResolvedBucket, ProxyError> {
// Bucket names arrive pre-mapped as "account:product".
let (account, product) = name
Expand All @@ -69,6 +70,7 @@ impl BucketRegistry for SourceCoopRegistry {
&self.api_auth,
&self.request_id,
subject,
is_write_action(operation.action()),
)
.await?;

Expand All @@ -85,6 +87,28 @@ impl BucketRegistry for SourceCoopRegistry {
) -> Result<Vec<BucketEntry>, ProxyError> {
unimplemented!("Bucket listing is not supported")
}

async fn authorize_key(
&self,
_name: &str,
_identity: &ResolvedIdentity,
action: Action,
_key: &str,
) -> bool {
// Per-key authorization for batch delete. Correctness depends on
// `get_bucket` having already authorized this caller for `name`: Source
// Cooperative authorizes writes at the product level there (caller holds
// product write permission, connection is writable), so every key in a
// batch delete that reached this point is permitted. The multistore
// default would deny every key, since callers' STS sessions carry no
// per-bucket scopes. If a future multistore ever invoked `authorize_key`
// without a prior successful `get_bucket` for the same `name`, this gate
// alone would be insufficient — it only confirms the op is a write, not
// that the caller is entitled. Gating on a write action is thus
// defense-in-depth: only reached for write batch ops, never blanket-
// allows a read.
is_write_action(action)
}
}

/// Resolve a product to a `BucketConfig` by querying the Source Cooperative API.
Expand All @@ -95,6 +119,7 @@ async fn resolve_product(
api_auth: &crate::ApiAuth,
request_id: &str,
subject: Option<&str>,
is_write: bool,
) -> Result<BucketConfig, ProxyError> {
let span = tracing::info_span!(
"resolve_product",
Expand Down Expand Up @@ -139,6 +164,39 @@ async fn resolve_product(
)
.await?;

// Authorize writes. The subject-scoped fetches above already cleared the
// caller to *see* this product; a write additionally requires an
// authenticated caller who holds the product's `write` permission, a
// connection that is not read-only, and a connection the proxy can sign as.
if is_write {
// Anonymous callers can never write (and there is no subject to query
// permissions with).
let subject = subject.ok_or(ProxyError::AccessDenied)?;
// Connection-level denials need no caller lookup — check them first so a
// write the connection can't accept skips the permissions API call. A
// connection can sign writes only via an S3 web-identity role.
let signable = matches!(
connection.authentication,
BackendAuth::S3WebIdentityRole { .. }
);
if connection.read_only || !signable {
return Err(ProxyError::AccessDenied);
}
// The caller must hold the product's `write` permission.
let permissions = crate::cache::get_or_fetch_permissions(
api_base_url,
account,
product,
api_auth,
request_id,
subject,
)
.await?;
if !permissions.iter().any(|p| p.eq_ignore_ascii_case("write")) {
return Err(ProxyError::AccessDenied);
}
}

// 4. Build BucketConfig
let backend_type = match connection.details.provider.as_str() {
"s3" => "s3",
Expand Down Expand Up @@ -284,6 +342,9 @@ pub struct SourceProductMirror {
#[derive(Debug, Clone, Deserialize)]
pub struct DataConnection {
pub data_connection_id: String,
/// Whether the connection forbids writes. Required (no serde default): an
/// absent flag fails the fetch rather than defaulting to writable.
pub read_only: bool,
pub details: DataConnectionDetails,
/// How the proxy authenticates to this connection's backend. A sibling of
/// `details`, matching the Source API's `DataConnection` shape. Absent →
Expand Down
Loading
Loading