From a820f4e490152ffd25031312f1f87a90e7e12c41 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Tue, 12 May 2026 11:42:47 -0500 Subject: [PATCH 01/13] create new files in develop/ai --- content/develop/ai/featureform/concepts.md | 6 ++++++ content/develop/ai/featureform/configure-auth.md | 6 ++++++ .../develop/ai/featureform/define-and-deploy-features.md | 6 ++++++ content/develop/ai/featureform/mange-workspace.md | 6 ++++++ content/develop/ai/featureform/query-data.md | 6 ++++++ content/develop/ai/featureform/reference.md | 6 ++++++ content/develop/ai/featureform/register-providers.md | 6 ++++++ content/develop/ai/featureform/serve-features.md | 6 ++++++ content/develop/ai/featureform/update-features.md | 6 ++++++ 9 files changed, 54 insertions(+) create mode 100644 content/develop/ai/featureform/concepts.md create mode 100644 content/develop/ai/featureform/configure-auth.md create mode 100644 content/develop/ai/featureform/define-and-deploy-features.md create mode 100644 content/develop/ai/featureform/mange-workspace.md create mode 100644 content/develop/ai/featureform/query-data.md create mode 100644 content/develop/ai/featureform/reference.md create mode 100644 content/develop/ai/featureform/register-providers.md create mode 100644 content/develop/ai/featureform/serve-features.md create mode 100644 content/develop/ai/featureform/update-features.md diff --git a/content/develop/ai/featureform/concepts.md b/content/develop/ai/featureform/concepts.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/concepts.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/configure-auth.md b/content/develop/ai/featureform/configure-auth.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/configure-auth.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/define-and-deploy-features.md b/content/develop/ai/featureform/define-and-deploy-features.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/define-and-deploy-features.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/mange-workspace.md b/content/develop/ai/featureform/mange-workspace.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/mange-workspace.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/query-data.md b/content/develop/ai/featureform/query-data.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/query-data.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/reference.md b/content/develop/ai/featureform/reference.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/reference.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/register-providers.md b/content/develop/ai/featureform/register-providers.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/register-providers.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/serve-features.md b/content/develop/ai/featureform/serve-features.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/serve-features.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/update-features.md b/content/develop/ai/featureform/update-features.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/update-features.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file From 935f357e3a86fdfdb1b85c1c7b3be423d264731b Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Tue, 12 May 2026 11:48:37 -0500 Subject: [PATCH 02/13] typo --- .../ai/featureform/{mange-workspace.md => manage-workspace.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename content/develop/ai/featureform/{mange-workspace.md => manage-workspace.md} (100%) diff --git a/content/develop/ai/featureform/mange-workspace.md b/content/develop/ai/featureform/manage-workspace.md similarity index 100% rename from content/develop/ai/featureform/mange-workspace.md rename to content/develop/ai/featureform/manage-workspace.md From 53b3e06d5d235aa672a2d8f69f6e36925b54cc51 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Tue, 12 May 2026 12:49:19 -0500 Subject: [PATCH 03/13] DOC-6581 DOC-6582 --- .../ai/featureform/register-providers.md | 203 +++++++++++++++++- 1 file changed, 198 insertions(+), 5 deletions(-) diff --git a/content/develop/ai/featureform/register-providers.md b/content/develop/ai/featureform/register-providers.md index 71a7066a02..071af22751 100644 --- a/content/develop/ai/featureform/register-providers.md +++ b/content/develop/ai/featureform/register-providers.md @@ -1,6 +1,199 @@ --- -title: -description: -linkTitle: -weight: 1 ---- \ No newline at end of file +title: Register providers +description: Register storage, compute, and catalog providers in a Redis Feature Form workspace, and configure secret backends. +linkTitle: Register providers +weight: 30 +--- + +Register the providers and secret backends a Redis Feature Form workspace needs before you author features or transformations. Providers connect the workspace to external systems for storage, compute, serving, or catalog-backed access, and definitions files reference them by name. + +## Prerequisites + +Before you register providers, make sure you have: + +- A workspace. See [Manage workspaces](./manage-workspace.md) for the workspace lifecycle commands. +- The `ff` CLI installed and able to reach the Feature Form server. The CLI connects to `localhost:9090` by default; override with `--server ` or by setting `ServerAddress` in `~/.featureform/config.yaml`. +- Any environment variables your provider commands reference set **in the Feature Form server's environment**, not in your shell. + For example, `--pg-password-secret env:PG_PASSWORD` makes the server resolve `PG_PASSWORD` from its own process environment at runtime. For Helm-based deployments, set these through chart values; for binary deployments, export them where the server starts. + +The examples on this page use placeholder names like `demo-workspace`, `demo_postgres`, and `spark-main`. Substitute the names you want to use in your own deployment. + +{{< note >}} +**Best practice:** keep the default health check on. Registration surfaces connectivity and secret-resolution problems at the point you can fix them, rather than as silent failures during materialization or serving. Reserve `--skip-health-check` for cases where you've already validated the provider through another channel. +{{< /note >}} + +## Register Postgres for offline storage + +Use Postgres when the workspace needs an offline store and Postgres-backed SQL execution in the same path. As an `offline-store`, Postgres holds the historical feature values that training sets read from. As a `compute` provider, it runs the SQL transformations that produce those values. + +The `` placeholder in `--pg-host` and in the Redis `--redis-host` stands for your Helm release name. With release name `my-ff`, the bundled Postgres service is `my-ff-featureform-provider-postgres`. If you connect to an external Postgres or Redis instance instead of the bundled chart addons, use that hostname directly. + +```bash +ff provider register demo_postgres \ + --workspace demo-workspace \ + --type postgres \ + --pg-host -featureform-provider-postgres \ + --pg-port 5432 \ + --pg-database featureform_test \ + --pg-user testuser \ + --pg-password-secret env:PG_PASSWORD \ + --pg-ssl-mode disable +``` + +See the [PostgreSQL documentation](https://www.postgresql.org/docs/) for connection and SSL options. + +## Register Redis as the online store + +Use Redis when the workspace needs an online store for low-latency feature serving. As an `online-store`, Redis holds the latest materialized feature values and serves them to applications at inference time. + +```bash +ff provider register demo_redis \ + --workspace demo-workspace \ + --type redis \ + --redis-host -featureform-redis \ + --redis-port 6379 +``` + +In the quickstart definitions file, the feature view references this provider with `inference_store="demo_redis"`. See the [Redis documentation](https://redis.io/docs/latest/) for deployment options. + +## Register S3 as an offline store + +Use S3 when Feature Form needs an object-storage-backed offline location. As an `offline-store`, S3 holds historical feature values as files (typically Parquet) that training sets read from. Choose S3 when dataset size or retention exceeds what a relational store fits. + +```bash +ff provider register data-lake \ + --workspace demo-workspace \ + --type s3 \ + --s3-bucket featureform-data \ + --s3-region us-west-2 \ + --s3-access-key-id-secret env:AWS_ACCESS_KEY_ID \ + --s3-secret-access-key-secret env:AWS_SECRET_ACCESS_KEY +``` + +Use `--s3-endpoint` for MinIO or LocalStack-style endpoints when needed. See the [Amazon S3 documentation](https://docs.aws.amazon.com/AmazonS3/latest/userguide/) for bucket and IAM setup. + +## Register Spark for compute + +Use Spark when the workspace needs a compute provider for transformation or materialization workloads. As a `compute` provider, Spark runs the transformation and materialization jobs that produce feature values. Choose Spark when dataset size exceeds what a single SQL engine can handle. + +```bash +ff provider register spark-main \ + --workspace demo-workspace \ + --type spark \ + --spark-master spark://spark-master:7077 +``` + +See the [Apache Spark documentation](https://spark.apache.org/docs/latest/) for cluster and master configuration. + +## Register an Iceberg catalog + +Use an Iceberg catalog provider when the workspace needs catalog-backed offline storage. As an `offline-store`, the catalog tracks versioned table snapshots over object storage. The workspace reads historical feature values from those tables, with schema evolution and time-travel queries. + +```bash +ff provider register iceberg-main \ + --workspace demo-workspace \ + --type iceberg_catalog \ + --iceberg-warehouse s3://featureform-data/warehouse \ + --iceberg-catalog-name featureform \ + --iceberg-rest-uri https://iceberg.example.com +``` + +This example uses the REST catalog backend; the exact required fields depend on which backend (REST, Hive, Glue, and so on) you choose. See the [Apache Iceberg documentation](https://iceberg.apache.org/docs/latest/) for catalog backend options. + +## Verify registration + +```bash +ff provider list --workspace demo-workspace +ff provider get demo_postgres --workspace demo-workspace +``` + +A successful list returns one row per registered provider: + +```text +NAME TYPE WORKSPACE CREATED UPDATED +demo_postgres postgres demo-workspace 2026-05-12T10:14:02Z 2026-05-12T10:14:02Z +demo_redis redis demo-workspace 2026-05-12T10:14:18Z 2026-05-12T10:14:18Z +``` + +Pass `--output json` or `--output yaml` for machine-readable output. If the list is empty or `get` returns an error, the register command did not complete. Rerun `ff provider register` to see its health-check output, and confirm the provider name and workspace match the ones you registered. + +## Update or delete a provider + +```bash +ff provider update demo_postgres \ + --workspace demo-workspace \ + --pg-port 5433 + +ff provider delete demo_postgres --workspace demo-workspace +``` + +Use `--force` on `update` when changing values that may break running workloads, such as host, port, or broker addresses. + +## Configure secret providers + +Confirm which secret backend a workspace uses, or register an alternate when `env` is not enough. Production deployments typically move off `env` because it mixes secrets with general configuration, offers no rotation or audit, and surfaces values in process listings. Vault, Kubernetes secrets, and AWS Secrets Manager each address those gaps. + +### Check the built-in `env` provider + +```bash +ff secret-provider list --workspace demo-workspace +ff secret-provider get env --workspace demo-workspace +``` + +### Register another secret provider + +Each backend has different preconditions on the Feature Form server. Pick the one that matches how your server is deployed. + +**Environment provider** — best for local development and bootstrap. The server reads variables from its own process environment. Use a prefix (`--env-prefix FF_`) to avoid collisions with other system variables. + +```bash +ff secret-provider register local-env \ + --workspace demo-workspace \ + --type env \ + --env-prefix FF_ +``` + +**Vault** — best for shared deployments that need rotation and audit. The server must be able to authenticate to Vault: export `VAULT_TOKEN` for token auth, or configure Kubernetes auth (when the server runs in-cluster) or AppRole. The backend uses the KV v2 secrets engine. + +```bash +ff secret-provider register vault-main \ + --workspace demo-workspace \ + --type vault \ + --vault-address https://vault.example.com \ + --vault-token-path /var/run/secrets/vault-token +``` + +**Kubernetes secrets** — best when the server runs inside a Kubernetes cluster and provider credentials are already managed as `Secret` resources. The server's service account needs `get` and `list` permissions on `secrets` in the target namespace. + +```bash +ff secret-provider register k8s-main \ + --workspace demo-workspace \ + --type k8s \ + --k8s-namespace featureform \ + --k8s-secret-name provider-secrets +``` + +**AWS Secrets Manager** — best when provider credentials already live in AWS. The server authenticates using the standard AWS credentials chain (IAM role on the host, instance profile, or `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` in the server environment). + +```bash +ff secret-provider register aws-main \ + --workspace demo-workspace \ + --type aws \ + --aws-region us-west-2 +``` + +### Update or delete a secret provider + +```bash +ff secret-provider update local-env \ + --workspace demo-workspace \ + --env-prefix PROD_ + +ff secret-provider delete local-env \ + --workspace demo-workspace \ + --yes +``` + +## Next steps + +With providers registered, the workspace is ready to receive feature definitions. See [Define and deploy features](./define-and-deploy-features.md) for authoring a definitions file and running `ff apply`. From e3c16af35ab40e2cc3d758cc99f901097012904a Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Tue, 12 May 2026 12:49:48 -0500 Subject: [PATCH 04/13] DOC-6583 --- .../ai/featureform/manage-workspace.md | 25 +- content/develop/ai/featureform/streaming.md | 287 ------------------ 2 files changed, 24 insertions(+), 288 deletions(-) diff --git a/content/develop/ai/featureform/manage-workspace.md b/content/develop/ai/featureform/manage-workspace.md index 71a7066a02..10eab4100d 100644 --- a/content/develop/ai/featureform/manage-workspace.md +++ b/content/develop/ai/featureform/manage-workspace.md @@ -3,4 +3,27 @@ title: description: linkTitle: weight: 1 ---- \ No newline at end of file +--- + +## Manage workspaces + +Use these commands when you need to inspect or change a workspace directly. + +### Core commands + +```bash +ff workspace list +ff workspace get --name demo-workspace +ff workspace update \ + --name demo-workspace \ + --description "Updated description" +ff workspace delete --force +``` + +### Workspace state to remember + +- workspaces have unique names and descriptions +- each workspace tracks `last_applied_version` +- providers, secret providers, graph state, catalog entries, and serving metadata are workspace-scoped + +Deleting a workspace removes its associated workspace-scoped data. diff --git a/content/develop/ai/featureform/streaming.md b/content/develop/ai/featureform/streaming.md index 24e07e804c..dc49ccd5ae 100644 --- a/content/develop/ai/featureform/streaming.md +++ b/content/develop/ai/featureform/streaming.md @@ -5,292 +5,5 @@ linkTitle: Providers and workspaces weight: 70 --- -Redis Feature Form supports multiple providers, secrets provider management, and workspaces. - -## Register providers - -Registering a provider binds one workspace to an external system used for storage, compute, serving, or catalog-backed access. Definitions files refer to providers by name, so provider registration comes first. - -### Register a Postgres provider - -```bash -ff provider register demo_postgres \ - --workspace \ - --type postgres \ - --pg-host -featureform-provider-postgres \ - --pg-port 5432 \ - --pg-database featureform_test \ - --pg-user testuser \ - --pg-password-secret env:PG_PASSWORD \ - --pg-ssl-mode disable -``` - -### Register a Redis provider - -```bash -ff provider register demo_redis \ - --workspace \ - --type redis \ - --redis-host -featureform-redis \ - --redis-port 6379 -``` - -If your deployment uses bundled provider addons, the default service names typically include the Helm release name. Otherwise, use the reachable hostnames for your external systems. - -### Verify registration - -```bash -ff provider list --workspace -ff provider get demo_postgres --workspace -``` - -Provider registration performs health validation by default. Fix connectivity or secret-resolution failures instead of treating `--skip-health-check` as the standard path. - -## Postgres provider setup - -```json metadata -{ - "title": "Postgres provider setup", - "description": "Register a Postgres provider for offline storage and SQL execution in Featureform.", - "categories": null, - "tableOfContents": {"sections":[{"id":"registration","title":"Registration"},{"id":"provider-role","title":"Provider role"}]} - -, - "codeExamples": [] -} -``` -Use Postgres when the workspace needs an offline store and Postgres-backed SQL execution in the same path. - -### Registration - -```bash -ff provider register demo_postgres \ - --workspace demo-workspace \ - --type postgres \ - --pg-host -featureform-provider-postgres \ - --pg-port 5432 \ - --pg-database featureform_test \ - --pg-user testuser \ - --pg-password-secret env:PG_PASSWORD \ - --pg-ssl-mode disable -``` - -### Provider role - -`offline-store`, `compute` - -The password reference is resolved through the workspace secret provider at runtime. - -## Redis provider setup - -```json metadata -{ - "title": "Redis provider setup", - "description": "Register Redis as the online store used by Featureform feature-view serving.", - "categories": null, - "tableOfContents": {"sections":[{"id":"registration","title":"Registration"},{"id":"provider-role","title":"Provider role"}]} - -, - "codeExamples": [] -} -``` -Use Redis when the workspace needs an online store for low-latency feature serving. - -### Registration - -```bash -ff provider register demo_redis \ - --workspace demo-workspace \ - --type redis \ - --redis-host -featureform-redis \ - --redis-port 6379 -``` - -### Provider role - -`online-store` - -In the quickstart definitions file, the feature view references this provider with `inference_store="demo_redis"`. - -## S3 provider setup - -```json metadata -{ - "title": "S3 provider setup", - "description": "Register an S3 provider for Featureform offline-store-backed object locations.", - "categories": null, - "tableOfContents": {"sections":[{"id":"registration","title":"Registration"},{"id":"provider-role","title":"Provider role"}]} - -, - "codeExamples": [] -} -``` -Use S3 when Featureform needs an object-storage-backed offline location. - -### Registration - -```bash -ff provider register data-lake \ - --workspace demo-workspace \ - --type s3 \ - --s3-bucket featureform-data \ - --s3-region us-west-2 \ - --s3-access-key-id-secret env:AWS_ACCESS_KEY_ID \ - --s3-secret-access-key-secret env:AWS_SECRET_ACCESS_KEY -``` - -### Provider role - -`offline-store` - -Use `--s3-endpoint` for MinIO or LocalStack-style endpoints when needed. - -## Spark provider setup - -```json metadata -{ - "title": "Spark provider setup", - "description": "Register a Spark compute provider for Featureform transformation and materialization workloads.", - "categories": null, - "tableOfContents": {"sections":[{"id":"minimal-registration","title":"Minimal registration"},{"id":"provider-role","title":"Provider role"}]} - -, - "codeExamples": [] -} -``` -Use Spark when the workspace needs a compute provider for transformation or materialization workloads. - -### Minimal registration - -```bash -ff provider register spark-main \ - --workspace demo-workspace \ - --type spark \ - --spark-master spark://spark-master:7077 -``` - -### Provider role - -`compute` - -Keep Spark registration separate from dataset authoring and from Iceberg catalog registration. - -## Iceberg provider setup - -```json metadata -{ - "title": "Iceberg provider setup", - "description": "Register an Iceberg catalog provider for Featureform offline-store workflows.", - "categories": null, - "tableOfContents": {"sections":[{"id":"registration","title":"Registration"},{"id":"provider-role","title":"Provider role"}]} - -, - "codeExamples": [] -} -``` -Use an Iceberg catalog provider when the workspace needs catalog-backed offline storage. - -### Registration - -```bash -ff provider register iceberg-main \ - --workspace demo-workspace \ - --type iceberg_catalog \ - --iceberg-warehouse s3://featureform-data/warehouse \ - --iceberg-catalog-name featureform \ - --iceberg-rest-uri https://iceberg.example.com -``` - -### Provider role - -`offline-store` - -The exact required fields depend on the catalog backend you choose. - -## Configure secret providers - -Use this section to confirm which secret backend a workspace will use and to register additional backends when `env` is not enough. - -### Check the built-in `env` provider - -```bash -ff secret-provider list --workspace demo-workspace -ff secret-provider get env --workspace demo-workspace -``` - -### Register another secret provider - -Environment provider: - -```bash -ff secret-provider register local-env \ - --workspace demo-workspace \ - --type env \ - --env-prefix FF_ -``` - -Vault: - -```bash -ff secret-provider register vault-main \ - --workspace demo-workspace \ - --type vault \ - --vault-address https://vault.example.com \ - --vault-token-path /var/run/secrets/vault-token -``` - -Kubernetes: - -```bash -ff secret-provider register k8s-main \ - --workspace demo-workspace \ - --type k8s \ - --k8s-namespace featureform \ - --k8s-secret-name provider-secrets -``` - -AWS Secrets Manager: - -```bash -ff secret-provider register aws-main \ - --workspace demo-workspace \ - --type aws \ - --aws-region us-west-2 -``` - -### Update or delete - -```bash -ff secret-provider update local-env \ - --workspace demo-workspace \ - --env-prefix PROD_ - -ff secret-provider delete local-env \ - --workspace demo-workspace \ - --yes -``` - -## Manage workspaces - -Use these commands when you need to inspect or change a workspace directly. - -### Core commands - -```bash -ff workspace list -ff workspace get --name demo-workspace -ff workspace update \ - --name demo-workspace \ - --description "Updated description" -ff workspace delete --force -``` - -### Workspace state to remember - -- workspaces have unique names and descriptions -- each workspace tracks `last_applied_version` -- providers, secret providers, graph state, catalog entries, and serving metadata are workspace-scoped - -Deleting a workspace removes its associated workspace-scoped data. From 734b8ed57aa88f0ab542c6aa81fbac38ad77dda0 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Tue, 12 May 2026 13:38:08 -0500 Subject: [PATCH 05/13] indent list --- content/develop/ai/featureform/register-providers.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/content/develop/ai/featureform/register-providers.md b/content/develop/ai/featureform/register-providers.md index 071af22751..de1cb2d1a9 100644 --- a/content/develop/ai/featureform/register-providers.md +++ b/content/develop/ai/featureform/register-providers.md @@ -12,9 +12,10 @@ Register the providers and secret backends a Redis Feature Form workspace needs Before you register providers, make sure you have: - A workspace. See [Manage workspaces](./manage-workspace.md) for the workspace lifecycle commands. -- The `ff` CLI installed and able to reach the Feature Form server. The CLI connects to `localhost:9090` by default; override with `--server ` or by setting `ServerAddress` in `~/.featureform/config.yaml`. -- Any environment variables your provider commands reference set **in the Feature Form server's environment**, not in your shell. - For example, `--pg-password-secret env:PG_PASSWORD` makes the server resolve `PG_PASSWORD` from its own process environment at runtime. For Helm-based deployments, set these through chart values; for binary deployments, export them where the server starts. +- The `ff` CLI installed and able to reach the Feature Form server. + - The CLI connects to `localhost:9090` by default; override with `--server ` or by setting `ServerAddress` in `~/.featureform/config.yaml`. +- Any environment variables your provider commands reference set **in the Feature Form server's environment**, not in your shell. + - For example, `--pg-password-secret env:PG_PASSWORD` makes the server resolve `PG_PASSWORD` from its own process environment at runtime. For Helm-based deployments, set these through chart values; for binary deployments, export them where the server starts. The examples on this page use placeholder names like `demo-workspace`, `demo_postgres`, and `spark-main`. Substitute the names you want to use in your own deployment. From cc4899b77e687a7728f4964aed5a1c11fb747572 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Thu, 14 May 2026 14:28:18 -0500 Subject: [PATCH 06/13] review feedback --- .../ai/featureform/manage-workspace.md | 16 +- content/develop/ai/featureform/overview.md | 2 +- .../ai/featureform/register-providers.md | 169 +++++++++--------- content/develop/ai/featureform/streaming.md | 11 +- 4 files changed, 97 insertions(+), 101 deletions(-) diff --git a/content/develop/ai/featureform/manage-workspace.md b/content/develop/ai/featureform/manage-workspace.md index 10eab4100d..32634f3efc 100644 --- a/content/develop/ai/featureform/manage-workspace.md +++ b/content/develop/ai/featureform/manage-workspace.md @@ -1,15 +1,13 @@ --- -title: -description: -linkTitle: -weight: 1 +title: Manage workspaces +description: Inspect and manage Redis Feature Form workspaces with the ff CLI. +linkTitle: Manage workspaces +weight: 20 --- -## Manage workspaces - Use these commands when you need to inspect or change a workspace directly. -### Core commands +## Core commands ```bash ff workspace list @@ -20,9 +18,9 @@ ff workspace update \ ff workspace delete --force ``` -### Workspace state to remember +## Workspace state to remember -- workspaces have unique names and descriptions +- workspaces have unique names and optional descriptions - each workspace tracks `last_applied_version` - providers, secret providers, graph state, catalog entries, and serving metadata are workspace-scoped diff --git a/content/develop/ai/featureform/overview.md b/content/develop/ai/featureform/overview.md index 39c2f12b6e..77ef44e215 100644 --- a/content/develop/ai/featureform/overview.md +++ b/content/develop/ai/featureform/overview.md @@ -45,4 +45,4 @@ The latest release adds enterprise-oriented capabilities: ## What to read next - [Quickstart]({{< relref "/develop/ai/featureform/quickstart" >}}) -- [Connect providers]({{< relref "/develop/ai/featureform/streaming" >}}) +- [Register providers]({{< relref "/develop/ai/featureform/register-providers" >}}) diff --git a/content/develop/ai/featureform/register-providers.md b/content/develop/ai/featureform/register-providers.md index de1cb2d1a9..cdae9a1a12 100644 --- a/content/develop/ai/featureform/register-providers.md +++ b/content/develop/ai/featureform/register-providers.md @@ -5,17 +5,16 @@ linkTitle: Register providers weight: 30 --- -Register the providers and secret backends a Redis Feature Form workspace needs before you author features or transformations. Providers connect the workspace to external systems for storage, compute, serving, or catalog-backed access, and definitions files reference them by name. +Register the providers and secret backends Redis Feature Form needs before you author features or transformations. Providers connect the workspace to external systems for storage, compute, serving, or catalog-backed access, and definitions files reference them by name. ## Prerequisites Before you register providers, make sure you have: -- A workspace. See [Manage workspaces](./manage-workspace.md) for the workspace lifecycle commands. -- The `ff` CLI installed and able to reach the Feature Form server. +- A workspace. See [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}) for the workspace lifecycle commands. +- The `ff` CLI installed and able to reach the Feature Form server. - The CLI connects to `localhost:9090` by default; override with `--server ` or by setting `ServerAddress` in `~/.featureform/config.yaml`. -- Any environment variables your provider commands reference set **in the Feature Form server's environment**, not in your shell. - - For example, `--pg-password-secret env:PG_PASSWORD` makes the server resolve `PG_PASSWORD` from its own process environment at runtime. For Helm-based deployments, set these through chart values; for binary deployments, export them where the server starts. +- A secret provider registered to back any credentials your provider commands reference. Each workspace ships with a default `env` secret provider that reads from Feature Form server's process environment. To use Vault, Kubernetes secrets, or AWS Secrets Manager instead, register that backend before you register providers that reference it. See [Configure secret providers](#configure-secret-providers). The examples on this page use placeholder names like `demo-workspace`, `demo_postgres`, and `spark-main`. Substitute the names you want to use in your own deployment. @@ -23,43 +22,110 @@ The examples on this page use placeholder names like `demo-workspace`, `demo_pos **Best practice:** keep the default health check on. Registration surfaces connectivity and secret-resolution problems at the point you can fix them, rather than as silent failures during materialization or serving. Reserve `--skip-health-check` for cases where you've already validated the provider through another channel. {{< /note >}} -## Register Postgres for offline storage +## Configure secret providers + +Each workspace starts with a default `env` secret provider that resolves references such as `env:PG_PASSWORD` from Feature Form server's process environment. Production deployments typically move off `env` because it mixes secrets with general configuration, offers no rotation or audit, and surfaces values in process listings. Vault, Kubernetes secrets, and AWS Secrets Manager each address those gaps. + +### Check the built-in `env` secret provider + +```bash +ff secret-provider list --workspace demo-workspace +ff secret-provider get env --workspace demo-workspace +``` + +### Register another secret provider + +Each backend has different preconditions on the Feature Form server. Pick the one that matches how your server is deployed. + +**Environment secret provider** — best for local development and bootstrap. The server reads variables from its own process environment. Use a prefix (`--env-prefix FF_`) to avoid collisions with other system variables. + +```bash +ff secret-provider register local-env \ + --workspace demo-workspace \ + --type env \ + --env-prefix FF_ +``` -Use Postgres when the workspace needs an offline store and Postgres-backed SQL execution in the same path. As an `offline-store`, Postgres holds the historical feature values that training sets read from. As a `compute` provider, it runs the SQL transformations that produce those values. +**Vault** — best for shared deployments that need rotation and audit. The server must be able to authenticate to Vault: export `VAULT_TOKEN` for token auth, or configure Kubernetes auth (when the server runs in-cluster) or AppRole. The backend uses the KV v2 secrets engine. + +```bash +ff secret-provider register vault-main \ + --workspace demo-workspace \ + --type vault \ + --vault-address https://vault.example.com \ + --vault-token-path /var/run/secrets/vault-token +``` -The `` placeholder in `--pg-host` and in the Redis `--redis-host` stands for your Helm release name. With release name `my-ff`, the bundled Postgres service is `my-ff-featureform-provider-postgres`. If you connect to an external Postgres or Redis instance instead of the bundled chart addons, use that hostname directly. +**Kubernetes secrets** — best when the server runs inside a Kubernetes cluster and provider credentials are already managed as `Secret` resources. The server's service account needs `get` and `list` permissions on `secrets` in the target namespace. + +```bash +ff secret-provider register k8s-main \ + --workspace demo-workspace \ + --type k8s \ + --k8s-namespace featureform \ + --k8s-secret-name provider-secrets +``` + +**AWS Secrets Manager** — best when provider credentials already live in AWS. The server authenticates using the standard AWS credentials chain (IAM role on the host, instance profile, or `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` in the server environment). + +```bash +ff secret-provider register aws-main \ + --workspace demo-workspace \ + --type aws \ + --aws-region us-west-2 +``` + +### Update or delete a secret provider + +```bash +ff secret-provider update local-env \ + --workspace demo-workspace \ + --env-prefix PROD_ + +ff secret-provider delete local-env \ + --workspace demo-workspace \ + --yes +``` + +## Register Postgres as an offline store and compute provider + +Register Postgres when your analytical data lives in Postgres and you want Feature Form to manage feature engineering against it. As an `offline-store`, Postgres holds dataset candidates for feature engineering and the Feature Form-managed datasets that result, such as transformed datasets, training sets, and feature views. As a `compute` provider, Postgres runs the workloads Feature Form orchestrates, such as SQL transformations on primary datasets. + +Point `--pg-host` at the Postgres instance you want Feature Form to use — typically a managed instance such as Amazon RDS or Aurora in production. To use the Postgres service bundled with the Helm chart for local or non-production work, set `--pg-host` to `-featureform-provider-postgres`, where `` is your Helm release name. ```bash ff provider register demo_postgres \ --workspace demo-workspace \ --type postgres \ - --pg-host -featureform-provider-postgres \ + --pg-host featureform-prod.cluster-abc123.us-west-2.rds.amazonaws.com \ --pg-port 5432 \ --pg-database featureform_test \ --pg-user testuser \ --pg-password-secret env:PG_PASSWORD \ - --pg-ssl-mode disable + --pg-ssl-mode require ``` See the [PostgreSQL documentation](https://www.postgresql.org/docs/) for connection and SSL options. -## Register Redis as the online store +## Register Redis as an online store + +Register Redis when Redis is your low-latency inference database for serving features. As an `online-store`, Redis holds the latest materialized feature values and serves them to applications at inference time. -Use Redis when the workspace needs an online store for low-latency feature serving. As an `online-store`, Redis holds the latest materialized feature values and serves them to applications at inference time. +Point `--redis-host` at the Redis deployment you want Feature Form to use — typically a managed deployment such as Redis Cloud in production. To use the Redis service bundled with the Helm chart for local or non-production work, set `--redis-host` to `-featureform-redis`, where `` is your Helm release name. ```bash ff provider register demo_redis \ --workspace demo-workspace \ --type redis \ - --redis-host -featureform-redis \ - --redis-port 6379 + --redis-host redis-12345.c1.us-west-2-2.ec2.cloud.redislabs.com \ + --redis-port 12345 ``` In the quickstart definitions file, the feature view references this provider with `inference_store="demo_redis"`. See the [Redis documentation](https://redis.io/docs/latest/) for deployment options. ## Register S3 as an offline store -Use S3 when Feature Form needs an object-storage-backed offline location. As an `offline-store`, S3 holds historical feature values as files (typically Parquet) that training sets read from. Choose S3 when dataset size or retention exceeds what a relational store fits. +Register S3 when Feature Form needs an object-storage-backed offline location. As an `offline-store`, S3 holds historical feature values as files (typically Parquet) that training sets read from. Choose S3 when dataset size or retention exceeds what a relational store fits. ```bash ff provider register data-lake \ @@ -73,9 +139,9 @@ ff provider register data-lake \ Use `--s3-endpoint` for MinIO or LocalStack-style endpoints when needed. See the [Amazon S3 documentation](https://docs.aws.amazon.com/AmazonS3/latest/userguide/) for bucket and IAM setup. -## Register Spark for compute +## Register Spark as a compute provider -Use Spark when the workspace needs a compute provider for transformation or materialization workloads. As a `compute` provider, Spark runs the transformation and materialization jobs that produce feature values. Choose Spark when dataset size exceeds what a single SQL engine can handle. +Register Spark when you need a compute provider for transformation or materialization workloads at scale. As a `compute` provider, Spark runs the transformation and materialization jobs that produce feature values. Choose Spark when dataset size exceeds what a single SQL engine can handle. ```bash ff provider register spark-main \ @@ -88,7 +154,7 @@ See the [Apache Spark documentation](https://spark.apache.org/docs/latest/) for ## Register an Iceberg catalog -Use an Iceberg catalog provider when the workspace needs catalog-backed offline storage. As an `offline-store`, the catalog tracks versioned table snapshots over object storage. The workspace reads historical feature values from those tables, with schema evolution and time-travel queries. +Register an Iceberg catalog provider when you need catalog-backed offline storage. As an `offline-store`, the catalog tracks versioned table snapshots over object storage. The workspace reads historical feature values from those tables, with schema evolution and time-travel queries. ```bash ff provider register iceberg-main \ @@ -130,71 +196,6 @@ ff provider delete demo_postgres --workspace demo-workspace Use `--force` on `update` when changing values that may break running workloads, such as host, port, or broker addresses. -## Configure secret providers - -Confirm which secret backend a workspace uses, or register an alternate when `env` is not enough. Production deployments typically move off `env` because it mixes secrets with general configuration, offers no rotation or audit, and surfaces values in process listings. Vault, Kubernetes secrets, and AWS Secrets Manager each address those gaps. - -### Check the built-in `env` provider - -```bash -ff secret-provider list --workspace demo-workspace -ff secret-provider get env --workspace demo-workspace -``` - -### Register another secret provider - -Each backend has different preconditions on the Feature Form server. Pick the one that matches how your server is deployed. - -**Environment provider** — best for local development and bootstrap. The server reads variables from its own process environment. Use a prefix (`--env-prefix FF_`) to avoid collisions with other system variables. - -```bash -ff secret-provider register local-env \ - --workspace demo-workspace \ - --type env \ - --env-prefix FF_ -``` - -**Vault** — best for shared deployments that need rotation and audit. The server must be able to authenticate to Vault: export `VAULT_TOKEN` for token auth, or configure Kubernetes auth (when the server runs in-cluster) or AppRole. The backend uses the KV v2 secrets engine. - -```bash -ff secret-provider register vault-main \ - --workspace demo-workspace \ - --type vault \ - --vault-address https://vault.example.com \ - --vault-token-path /var/run/secrets/vault-token -``` - -**Kubernetes secrets** — best when the server runs inside a Kubernetes cluster and provider credentials are already managed as `Secret` resources. The server's service account needs `get` and `list` permissions on `secrets` in the target namespace. - -```bash -ff secret-provider register k8s-main \ - --workspace demo-workspace \ - --type k8s \ - --k8s-namespace featureform \ - --k8s-secret-name provider-secrets -``` - -**AWS Secrets Manager** — best when provider credentials already live in AWS. The server authenticates using the standard AWS credentials chain (IAM role on the host, instance profile, or `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` in the server environment). - -```bash -ff secret-provider register aws-main \ - --workspace demo-workspace \ - --type aws \ - --aws-region us-west-2 -``` - -### Update or delete a secret provider - -```bash -ff secret-provider update local-env \ - --workspace demo-workspace \ - --env-prefix PROD_ - -ff secret-provider delete local-env \ - --workspace demo-workspace \ - --yes -``` - ## Next steps -With providers registered, the workspace is ready to receive feature definitions. See [Define and deploy features](./define-and-deploy-features.md) for authoring a definitions file and running `ff apply`. +With providers registered, the workspace is ready to receive feature definitions. See [Define and deploy features]({{< relref "/develop/ai/featureform/define-and-deploy-features" >}}) for authoring a definitions file and running `ff apply`. diff --git a/content/develop/ai/featureform/streaming.md b/content/develop/ai/featureform/streaming.md index dc49ccd5ae..41a6bd4852 100644 --- a/content/develop/ai/featureform/streaming.md +++ b/content/develop/ai/featureform/streaming.md @@ -1,9 +1,6 @@ --- -title: Providers and workspaces -description: Build stream-backed features with Kafka, streaming transformations, and Redis serving. -linkTitle: Providers and workspaces -weight: 70 +title: +description: +linkTitle: +weight: 1 --- - - - From 12a7a2ed5c6171a27c49b584602085aefb503cd4 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Wed, 27 May 2026 09:49:39 -0500 Subject: [PATCH 07/13] DOC-6588 --- content/develop/ai/featureform/concepts.md | 118 +++++++++++++++++- .../training-sets-and-feature-views.md | 116 ----------------- 2 files changed, 113 insertions(+), 121 deletions(-) delete mode 100644 content/develop/ai/featureform/training-sets-and-feature-views.md diff --git a/content/develop/ai/featureform/concepts.md b/content/develop/ai/featureform/concepts.md index 71a7066a02..5c6c926cba 100644 --- a/content/develop/ai/featureform/concepts.md +++ b/content/develop/ai/featureform/concepts.md @@ -1,6 +1,114 @@ --- -title: -description: -linkTitle: -weight: 1 ---- \ No newline at end of file +title: Concepts +description: Learn the core Feature Form concepts behind workspaces, providers, secrets, and serving. +linkTitle: Concepts +weight: 30 +--- + +These pages explain the resource model and the boundaries that matter when you operate Feature Form. + +## Resources and workspace graph + +A Feature Form workspace owns one logical resource graph. When you run `ff apply`, Feature Form compares the submitted desired state with the current graph and commits a new version if the change is accepted. + +### Resource types in the graph + +- entities +- datasets +- transformations +- features +- labels +- training sets +- feature views + +### Why the graph matters + +- it powers lineage and dependency views +- it tracks `last_applied_version` +- it feeds serving metadata from committed state + +### Useful commands + +```bash +ff graph workspace overview --workspace demo-workspace +ff graph workspace stats --workspace demo-workspace +ff graph dataset get demo_transactions --workspace demo-workspace +ff graph feature-view get demo_customer_feature_view --workspace demo-workspace +``` + +## Providers and provider roles + +A provider is a workspace-scoped connection to external infrastructure. Definitions files reference providers by name, but the provider itself must already be registered in the workspace. + +### Provider roles + +- `offline-store` for batch data and materialized datasets +- `online-store` for low-latency serving +- `compute` for transformations and materialization work +- `streaming` for streaming integrations + +### Core providers documented here + +- Postgres: `offline-store`, `compute` +- Redis: `online-store` +- S3: `offline-store` +- Spark: `compute` +- Iceberg catalog: `offline-store` + +### Workflow mapping + +- Datasets and training sets need an offline store. +- Feature views need an online store. +- SQL and Spark transformations need compute. +- One provider can fill more than one role. + +## Secrets and secret references + +Feature Form stores secret references in provider configuration instead of storing plaintext secret values itself. A provider config can contain a reference like `env:PG_PASSWORD`, which Feature Form resolves through a registered secret provider at runtime. + +### Mental model + +- A secret provider is a workspace-scoped backend such as `env`, Vault, Kubernetes, or AWS Secrets Manager +- A secret reference is the value stored in provider config +- Data providers use secret references but do not own secret storage + +### Default path for a new workspace + +Every new workspace creates a built-in `env` secret provider. That makes references such as `env:PG_PASSWORD` valid as long as the runtime environment actually exposes `PG_PASSWORD`. + +The important detail is runtime scope: in deployed environments, the resolving process is usually the Featureform server, not your local CLI shell. + +### What Featureform stores + +- Secret provider metadata and configuration +- Secret references embedded in provider configuration + +### What Featureform does not store + +- Plaintext secret values from external backends + +## Serving and feature views + +A feature view is the serving interface for a set of features keyed by an entity. In the documented Redis-backed workflow, the feature view is what applications and model services read from at inference time. + +### A feature view includes + +- the feature-view name +- the logical entity and key columns +- the served feature schema +- the online provider +- serving version and key-prefix details + +### Serving requires + +- a registered online store such as Redis +- a committed graph version containing the feature view +- ready serving metadata for that workspace and view + +### Main entry points + +- gRPC: `ServingService.Serve`, `ServingService.GetServingMetadata` +- REST: `/api/v1/serve` +- Python client: `client.serve(...)` + +Serving reads and serving-metadata reads are separate RBAC permissions. diff --git a/content/develop/ai/featureform/training-sets-and-feature-views.md b/content/develop/ai/featureform/training-sets-and-feature-views.md deleted file mode 100644 index 11acee80c3..0000000000 --- a/content/develop/ai/featureform/training-sets-and-feature-views.md +++ /dev/null @@ -1,116 +0,0 @@ ---- -title: Concepts -description: Learn the core Feature Form concepts behind workspaces, providers, secrets, and serving. -linkTitle: Concepts -weight: 30 ---- - -These pages explain the resource model and the boundaries that matter when you operate Feature Form. - -## Resources and workspace graph - -A Feature Form workspace owns one logical resource graph. When you run `ff apply`, Feature Form compares the submitted desired state with the current graph and commits a new version if the change is accepted. - -### Resource types in the graph - -- entities -- datasets -- transformations -- features -- labels -- training sets -- feature views - -### Why the graph matters - -- it powers lineage and dependency views -- it tracks `last_applied_version` -- it feeds serving metadata from committed state - -### Useful commands - -```bash -ff graph workspace overview --workspace demo-workspace -ff graph workspace stats --workspace demo-workspace -ff graph dataset get demo_transactions --workspace demo-workspace -ff graph feature-view get demo_customer_feature_view --workspace demo-workspace -``` - -## Providers and provider roles - -A provider is a workspace-scoped connection to external infrastructure. Definitions files reference providers by name, but the provider itself must already be registered in the workspace. - -### Provider roles - -- `offline-store` for batch data and materialized datasets -- `online-store` for low-latency serving -- `compute` for transformations and materialization work -- `streaming` for streaming integrations - -### Core providers documented here - -- Postgres: `offline-store`, `compute` -- Redis: `online-store` -- S3: `offline-store` -- Spark: `compute` -- Iceberg catalog: `offline-store` - -### Workflow mapping - -- Datasets and training sets need an offline store. -- Feature views need an online store. -- SQL and Spark transformations need compute. -- One provider can fill more than one role. - - -## Secrets and secret references - -Feature Form stores secret references in provider configuration instead of storing plaintext secret values itself. A provider config can contain a reference like `env:PG_PASSWORD`, which Feature Form resolves through a registered secret provider at runtime. - -### Mental model - -- A secret provider is a workspace-scoped backend such as `env`, Vault, Kubernetes, or AWS Secrets Manager -- A secret reference is the value stored in provider config -- Data providers use secret references but do not own secret storage - -### Default path for a new workspace - -Every new workspace creates a built-in `env` secret provider. That makes references such as `env:PG_PASSWORD` valid as long as the runtime environment actually exposes `PG_PASSWORD`. - -The important detail is runtime scope: in deployed environments, the resolving process is usually the Featureform server, not your local CLI shell. - -### What Featureform stores - -- Secret provider metadata and configuration -- Secret references embedded in provider configuration - -### What Featureform does not store - -- Plaintext secret values from external backends - -## Serving and feature views - -A feature view is the serving interface for a set of features keyed by an entity. In the documented Redis-backed workflow, the feature view is what applications and model services read from at inference time. - -### A feature view includes - -- the feature-view name -- the logical entity and key columns -- the served feature schema -- the online provider -- serving version and key-prefix details - -### Serving requires - -- a registered online store such as Redis -- a committed graph version containing the feature view -- ready serving metadata for that workspace and view - -### Main entry points - -- gRPC: `ServingService.Serve`, `ServingService.GetServingMetadata` -- REST: `/api/v1/serve` -- Python client: `client.serve(...)` - -Serving reads and serving-metadata reads are separate RBAC permissions. - From 5eccecdcb9802ee2fe178a19eb9d32a87a5d9b1d Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Wed, 27 May 2026 10:03:22 -0500 Subject: [PATCH 08/13] concepts rough draft --- content/develop/ai/featureform/concepts.md | 213 ++++++++++++++------- 1 file changed, 141 insertions(+), 72 deletions(-) diff --git a/content/develop/ai/featureform/concepts.md b/content/develop/ai/featureform/concepts.md index 5c6c926cba..f4009cec71 100644 --- a/content/develop/ai/featureform/concepts.md +++ b/content/develop/ai/featureform/concepts.md @@ -1,114 +1,183 @@ --- -title: Concepts -description: Learn the core Feature Form concepts behind workspaces, providers, secrets, and serving. +title: Redis Feature Form concepts +description: Learn the workspace, resource graph, provider, secret, and serving model behind Redis Feature Form. linkTitle: Concepts weight: 30 --- -These pages explain the resource model and the boundaries that matter when you operate Feature Form. +Redis Feature Form is a feature platform: it manages how raw data in your existing systems becomes the entity-keyed values your models read at inference time. This page introduces the vocabulary and the model behind that workflow, so the rest of the documentation reads as application rather than memorization. -## Resources and workspace graph +## How the pieces fit together -A Feature Form workspace owns one logical resource graph. When you run `ff apply`, Feature Form compares the submitted desired state with the current graph and commits a new version if the change is accepted. +A Feature Form deployment runs one or more **workspaces**. Each workspace owns a versioned **resource graph** that describes what features should exist, where their inputs live, and how they're served. You author that graph in a Python **definitions file** and submit it with `ff apply`. -### Resource types in the graph +The graph itself is data, not credentials or connections. **Providers** connect the workspace to external systems (Postgres, Redis, S3, Spark, an Iceberg catalog), and **secret references** in those providers point at a secret backend that holds the actual passwords and tokens. At the end of the chain, a **feature view** is the single resource the rest of your stack reads from to serve features online. -- entities -- datasets -- transformations -- features -- labels -- training sets -- feature views +Each of these terms is unpacked in the rest of this page. -### Why the graph matters +## Workspaces -- it powers lineage and dependency views -- it tracks `last_applied_version` -- it feeds serving metadata from committed state +A workspace is the tenant boundary for everything Feature Form manages. The graph, the providers, the secret references, the catalog of materialized locations, and the serving metadata all live inside one workspace and cannot leak across to another. -### Useful commands +That isolation is the unit you use to separate environments — dev, staging, prod — or to give independent teams their own slice of a shared deployment. Two workspaces can point at the same external Postgres database and still not see each other's resources, because the graph that names those resources is workspace-scoped. -```bash -ff graph workspace overview --workspace demo-workspace -ff graph workspace stats --workspace demo-workspace -ff graph dataset get demo_transactions --workspace demo-workspace -ff graph feature-view get demo_customer_feature_view --workspace demo-workspace +A workspace also tracks `last_applied_version`, a counter that advances each time the graph commits a new version. Inspection and serving commands always read from the latest committed version, not from a draft. + +To create, inspect, update, or delete workspaces, see [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}). + +## The resource graph + +The resource graph is the single object that represents the desired state of a workspace. Every feature, label, transformation, dataset, and feature view belongs to that graph and references the others by name. + +Two properties make the graph the right mental model: + +- **It is versioned as a whole.** When you submit a change, Feature Form commits a new graph version atomically. Either everything in the change lands together or nothing does. You don't end up with half-applied feature definitions. +- **It is declarative.** You describe what the graph should look like, not the sequence of steps to get there. Feature Form is responsible for figuring out the delta between what exists and what you've asked for. + +### Resource types + +A graph is built from seven resource types. New users encountering Feature Form for the first time benefit from learning these as a vocabulary list — every other concept on this page builds on them. + +- **Entities** identify the real-world objects features describe, such as a `customer` or `order`. Other resources join on the entity's key column. +- **Datasets** point at an existing table, view, or file on an offline store and make it visible to the graph. The data itself stays where it lives; Feature Form just registers a handle to it. +- **Transformations** produce new datasets from existing ones, expressed as SQL or as a Spark job. A transformation describes the shape of the output; the compute that runs it is supplied by a provider. +- **Features** are entity-keyed values that get served at inference time. A feature attaches to a column of a dataset, optionally applies an aggregation (such as `SUM` over a 7-day window), and declares which provider owns its computation. +- **Labels** look like features but feed offline training rather than online serving. They carry the value a model is trying to predict. +- **Training sets** join one or more features with a label on the entity key, so an offline training job reads a single time-aligned table instead of stitching things together by hand. +- **Feature views** are the online serving interface for a group of features. They are the only resource that downstream applications and model services interact with directly. + +A short definitions file makes the shape concrete. The reader shouldn't worry about syntax yet — the point is to see how the vocabulary above appears as code. + +```python +import featureform as ff +from datetime import timedelta + +customer = ff.Entity(name="customer") + +transactions = postgres.dataset( + name="transactions_raw", + table="transactions", + timestamp_column="timestamp", +) + +@postgres.sql_transformation(name="customer_daily_rollups", inputs=[transactions]) +def customer_daily_rollups() -> str: + return """ + SELECT customer_id, + date_trunc('day', timestamp) AS event_day, + SUM(transaction_amount) AS total_amount + FROM {{transactions_raw}} + GROUP BY 1, 2 + """ + +customer_total_amount_7d = ( + ff.Feature(name="customer_total_amount_7d") + .from_dataset(customer_daily_rollups, entity="customer", + entity_column="customer_id", value="total_amount", + timestamp="event_day") + .aggregate(function=ff.AggregateFunction.SUM, window=timedelta(days=7)) +) + +customer_risk_view = ff.FeatureView( + name="customer_risk_feature_view", + entity="customer", + features=[customer_total_amount_7d], + inference_store="demo_redis", +) ``` -## Providers and provider roles +### Definitions files and `ff apply` -A provider is a workspace-scoped connection to external infrastructure. Definitions files reference providers by name, but the provider itself must already be registered in the workspace. +The Python file above is the source of truth for what the graph should look like — not a script that mutates Feature Form imperatively. When you run `ff apply`, Feature Form imports the file, collects the resources it defines, and treats that set as the workspace's desired state. A planner compares the submission with the current graph, and if the change is accepted, a new graph version is committed. -### Provider roles +By default, an apply is replacement-oriented: a resource that exists in the workspace but is not in the submitted set is a candidate for removal. That behavior is what makes the file a true source of truth. When you intentionally submit a partial set and want missing resources to stay untouched, you can apply in merge mode instead. -- `offline-store` for batch data and materialized datasets -- `online-store` for low-latency serving -- `compute` for transformations and materialization work -- `streaming` for streaming integrations +{{< note >}} +**Definitions files describe features, not infrastructure.** Providers and secret backends are registered separately by a workspace admin. Definitions files reference providers by name and assume they already exist. This separation keeps feature authors away from credentials and infrastructure choices. +{{< /note >}} -### Core providers documented here +For an end-to-end walkthrough of authoring a definitions file and applying it, see the [Quickstart]({{< relref "/develop/ai/featureform/quickstart" >}}). For the full apply lifecycle and editing loop, see [Define and deploy features]({{< relref "/develop/ai/featureform/define-and-deploy-features" >}}) and [Update features]({{< relref "/develop/ai/featureform/update-features" >}}). -- Postgres: `offline-store`, `compute` -- Redis: `online-store` -- S3: `offline-store` -- Spark: `compute` -- Iceberg catalog: `offline-store` +## Providers -### Workflow mapping +A provider is the workspace's connection to an external system. It carries the host, port, credentials reference, and any backend-specific configuration Feature Form needs to talk to that system. Resources in the graph reference providers by name, so a provider must be registered in the workspace before any resource that uses it can be applied. -- Datasets and training sets need an offline store. -- Feature views need an online store. -- SQL and Spark transformations need compute. -- One provider can fill more than one role. +Every provider fills one or more **roles**, which describe the kind of work it can do for the workspace: -## Secrets and secret references +| Role | What it does | +| --------------- | ----------------------------------------------------------------------- | +| `offline-store` | Holds batch data and materialized datasets the graph reads from. | +| `online-store` | Serves materialized feature values to applications at low latency. | +| `compute` | Runs transformations and materialization jobs. | +| `streaming` | Connects the workspace to streaming sources. | -Feature Form stores secret references in provider configuration instead of storing plaintext secret values itself. A provider config can contain a reference like `env:PG_PASSWORD`, which Feature Form resolves through a registered secret provider at runtime. +One provider often fills more than one role. Postgres, for example, is commonly registered as both `offline-store` and `compute` because the same instance that holds datasets can run SQL transformations against them. The documented integrations and their typical roles: -### Mental model +| Provider | Typical roles | +| ----------------- | ------------------------------ | +| Postgres | `offline-store`, `compute` | +| Redis | `online-store` | +| S3 | `offline-store` | +| Spark | `compute` | +| Iceberg catalog | `offline-store` | -- A secret provider is a workspace-scoped backend such as `env`, Vault, Kubernetes, or AWS Secrets Manager -- A secret reference is the value stored in provider config -- Data providers use secret references but do not own secret storage +The role model is what lets a graph stay portable: a feature definition doesn't care that compute happens to be Postgres in dev and Spark in prod, only that some provider fills the `compute` role. -### Default path for a new workspace +To register providers in a workspace, see [Register providers]({{< relref "/develop/ai/featureform/register-providers" >}}). -Every new workspace creates a built-in `env` secret provider. That makes references such as `env:PG_PASSWORD` valid as long as the runtime environment actually exposes `PG_PASSWORD`. +## Secrets and secret references -The important detail is runtime scope: in deployed environments, the resolving process is usually the Featureform server, not your local CLI shell. +Feature Form never stores plaintext credentials in the graph. A provider configuration carries a **secret reference** that looks like `env:PG_PASSWORD`, and Feature Form resolves that reference through a registered **secret provider** at the moment the credential is needed. -### What Featureform stores +Two consequences are worth internalizing as a new user: -- Secret provider metadata and configuration -- Secret references embedded in provider configuration +- **The graph is safe to inspect and export.** Nothing in it contains a usable credential. You can hand the graph to another team, version it, or paste it into a ticket without leaking secrets. +- **The process that resolves a reference is whichever process actually needs the credential.** In a deployed environment, that's almost always the Feature Form server, not your local CLI shell. A reference such as `env:PG_PASSWORD` reads from the server's process environment, not yours. -### What Featureform does not store +Every new workspace is created with a built-in `env` secret provider, which makes `env:` references work out of the box for local development. Production deployments typically register a Vault, Kubernetes-secrets, or AWS Secrets Manager backend instead, because the `env` backend offers no rotation, no audit, and exposes values in process listings. -- Plaintext secret values from external backends +To register a secret provider for a workspace, see [Configure secret providers]({{< relref "/develop/ai/featureform/register-providers#configure-secret-providers" >}}). -## Serving and feature views +## Feature views and serving + +A feature view is the resource that everything else in the graph eventually feeds. It is the online serving interface — the single name an application or model service uses when it asks Feature Form for the latest features about a particular entity. + +A feature view declares: + +- The entity it is keyed by (for example, `customer`). +- The list of features it exposes. +- The online provider that holds the materialized values — typically Redis. +- A materialization engine that produces those values from offline data. + +```python +customer_risk_view = ff.FeatureView( + name="customer_risk_feature_view", + entity="customer", + features=[customer_total_amount_7d, customer_transaction_count_7d], + inference_store="demo_redis", +) +``` -A feature view is the serving interface for a set of features keyed by an entity. In the documented Redis-backed workflow, the feature view is what applications and model services read from at inference time. +For a feature view to actually serve, three things must line up: the online provider it points to must be registered and reachable, the graph version that introduced the feature view must be committed, and materialization must have populated values for the entities you want to query. If any of those are missing, serving fails immediately rather than returning stale data. -### A feature view includes +The same logical operation is reachable through three surfaces, so applications can pick whichever fits their stack: -- the feature-view name -- the logical entity and key columns -- the served feature schema -- the online provider -- serving version and key-prefix details +- A gRPC service (`ServingService.Serve` and `ServingService.GetServingMetadata`). +- A REST endpoint (`POST /api/v1/serve`). +- A Python client (`client.serve(...)`). -### Serving requires +One subtle but important detail: reading feature values and reading serving metadata are governed by **separate** RBAC permissions. A dashboard or diagnostic principal can be allowed to inspect what a feature view looks like without also being allowed to read live feature values, and vice versa. -- a registered online store such as Redis -- a committed graph version containing the feature view -- ready serving metadata for that workspace and view +To serve from a feature view in an application, see [Serve features]({{< relref "/develop/ai/featureform/serve-features" >}}). To inspect datasets, training sets, or feature views directly, see [Query data]({{< relref "/develop/ai/featureform/query-data" >}}). -### Main entry points +## Next steps -- gRPC: `ServingService.Serve`, `ServingService.GetServingMetadata` -- REST: `/api/v1/serve` -- Python client: `client.serve(...)` +Now that the vocabulary is in place, the rest of the documentation maps cleanly onto these concepts: -Serving reads and serving-metadata reads are separate RBAC permissions. +- [Quickstart]({{< relref "/develop/ai/featureform/quickstart" >}}) — one end-to-end walkthrough that exercises every concept on this page. +- [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}) — create, inspect, update, and delete workspaces. +- [Register providers]({{< relref "/develop/ai/featureform/register-providers" >}}) — connect the workspace to Postgres, Redis, S3, Spark, or an Iceberg catalog, and register secret backends. +- [Define and deploy features]({{< relref "/develop/ai/featureform/define-and-deploy-features" >}}) — author a definitions file and run `ff apply`. +- [Update features]({{< relref "/develop/ai/featureform/update-features" >}}) — iterate on a graph after the first apply. +- [Serve features]({{< relref "/develop/ai/featureform/serve-features" >}}) — read from a feature view in an application. +- [Query data]({{< relref "/develop/ai/featureform/query-data" >}}) — inspect datasets, training sets, and feature views directly. From 31edbcd7ff7cbcc0604ae4688b44bac40a508d73 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Wed, 27 May 2026 10:26:25 -0500 Subject: [PATCH 09/13] intro and how the pieces fit together --- content/develop/ai/featureform/concepts.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/content/develop/ai/featureform/concepts.md b/content/develop/ai/featureform/concepts.md index f4009cec71..fe99b55144 100644 --- a/content/develop/ai/featureform/concepts.md +++ b/content/develop/ai/featureform/concepts.md @@ -5,13 +5,13 @@ linkTitle: Concepts weight: 30 --- -Redis Feature Form is a feature platform: it manages how raw data in your existing systems becomes the entity-keyed values your models read at inference time. This page introduces the vocabulary and the model behind that workflow, so the rest of the documentation reads as application rather than memorization. +Redis Feature Form is a feature platform. It turns raw data from your existing systems into the values your models read at inference time. This page introduces the core concepts behind that workflow. ## How the pieces fit together -A Feature Form deployment runs one or more **workspaces**. Each workspace owns a versioned **resource graph** that describes what features should exist, where their inputs live, and how they're served. You author that graph in a Python **definitions file** and submit it with `ff apply`. +A Feature Form deployment runs one or more **[workspaces](#workspaces)**. Each workspace owns a versioned **[resource graph](#the-resource-graph)** that describes what features should exist, where their inputs live, and how they're served. You author that graph in a Python **[definitions file](#definitions-files-and-ff-apply)** and submit it with `ff apply`. -The graph itself is data, not credentials or connections. **Providers** connect the workspace to external systems (Postgres, Redis, S3, Spark, an Iceberg catalog), and **secret references** in those providers point at a secret backend that holds the actual passwords and tokens. At the end of the chain, a **feature view** is the single resource the rest of your stack reads from to serve features online. +The graph itself is data, not credentials or connections. **[Providers](#providers)** connect the workspace to external systems (Postgres, Redis, S3, Spark, an Iceberg catalog), and **[secret references](#secrets-and-secret-references)** point to the backend that holds the credentials. At the end of the chain, a **[feature view](#feature-views-and-serving)** is the single resource the rest of your stack reads from to serve features online. Each of these terms is unpacked in the rest of this page. @@ -86,7 +86,7 @@ customer_risk_view = ff.FeatureView( ) ``` -### Definitions files and `ff apply` +### Definitions files and `ff apply` {#definitions-files-and-ff-apply} The Python file above is the source of truth for what the graph should look like — not a script that mutates Feature Form imperatively. When you run `ff apply`, Feature Form imports the file, collects the resources it defines, and treats that set as the workspace's desired state. A planner compares the submission with the current graph, and if the change is accepted, a new graph version is committed. From 7ef2ddc66044e71086d17c9432925f79a85fb41a Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Wed, 27 May 2026 15:11:15 -0500 Subject: [PATCH 10/13] concepts edits --- content/develop/ai/featureform/concepts.md | 66 +++++++++++++--------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/content/develop/ai/featureform/concepts.md b/content/develop/ai/featureform/concepts.md index fe99b55144..c6ed73c56a 100644 --- a/content/develop/ai/featureform/concepts.md +++ b/content/develop/ai/featureform/concepts.md @@ -9,30 +9,28 @@ Redis Feature Form is a feature platform. It turns raw data from your existing s ## How the pieces fit together -A Feature Form deployment runs one or more **[workspaces](#workspaces)**. Each workspace owns a versioned **[resource graph](#the-resource-graph)** that describes what features should exist, where their inputs live, and how they're served. You author that graph in a Python **[definitions file](#definitions-files-and-ff-apply)** and submit it with `ff apply`. +A Feature Form deployment runs one or more [workspaces](#workspaces). Each workspace owns a versioned [resource graph](#the-resource-graph) that describes what features should exist, where their inputs live, and how they're served. You author that graph in a Python [definitions file](#definitions-files-and-ff-apply) and apply it with `ff apply`. -The graph itself is data, not credentials or connections. **[Providers](#providers)** connect the workspace to external systems (Postgres, Redis, S3, Spark, an Iceberg catalog), and **[secret references](#secrets-and-secret-references)** point to the backend that holds the credentials. At the end of the chain, a **[feature view](#feature-views-and-serving)** is the single resource the rest of your stack reads from to serve features online. - -Each of these terms is unpacked in the rest of this page. +The graph itself is data, not credentials or connections. [Providers](#providers) connect the workspace to external systems (Postgres, Redis, S3, Spark, an Iceberg catalog), and [secret references](#secrets-and-secret-references) point to the backend that holds the credentials. At the end of the chain, a [feature view](#feature-views-and-serving) is the single resource the rest of your stack reads from to serve features online. ## Workspaces -A workspace is the tenant boundary for everything Feature Form manages. The graph, the providers, the secret references, the catalog of materialized locations, and the serving metadata all live inside one workspace and cannot leak across to another. +A workspace is a self-contained environment in Feature Form. Each one owns its own resource graph, providers, secret references, and serving metadata. Nothing is shared between workspaces. -That isolation is the unit you use to separate environments — dev, staging, prod — or to give independent teams their own slice of a shared deployment. Two workspaces can point at the same external Postgres database and still not see each other's resources, because the graph that names those resources is workspace-scoped. +Use workspaces to keep environments such as dev, staging, and prod separate, or to give independent teams their own area on a shared deployment. Two workspaces can connect to the same external Postgres database and remain fully isolated, because each workspace tracks its own resources. -A workspace also tracks `last_applied_version`, a counter that advances each time the graph commits a new version. Inspection and serving commands always read from the latest committed version, not from a draft. +Every workspace also has a `last_applied_version` counter that increases each time you successfully apply a change. Read commands always return the latest committed version. To create, inspect, update, or delete workspaces, see [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}). ## The resource graph -The resource graph is the single object that represents the desired state of a workspace. Every feature, label, transformation, dataset, and feature view belongs to that graph and references the others by name. +The resource graph describes what a workspace should look like. Every feature, label, transformation, dataset, and feature view lives in this graph and refers to the others by name. -Two properties make the graph the right mental model: +Two properties shape how you work with it: -- **It is versioned as a whole.** When you submit a change, Feature Form commits a new graph version atomically. Either everything in the change lands together or nothing does. You don't end up with half-applied feature definitions. -- **It is declarative.** You describe what the graph should look like, not the sequence of steps to get there. Feature Form is responsible for figuring out the delta between what exists and what you've asked for. +- It is versioned as a whole. Each successful change creates a new version of the entire graph. Either every resource in the change lands together, or nothing does — you never end up with half-applied feature definitions. +- It is declarative. You describe what the graph should look like, not the steps to get there. Feature Form figures out the difference between the current graph and the new one and applies only what changed. ### Resource types @@ -46,7 +44,7 @@ A graph is built from seven resource types. New users encountering Feature Form - **Training sets** join one or more features with a label on the entity key, so an offline training job reads a single time-aligned table instead of stitching things together by hand. - **Feature views** are the online serving interface for a group of features. They are the only resource that downstream applications and model services interact with directly. -A short definitions file makes the shape concrete. The reader shouldn't worry about syntax yet — the point is to see how the vocabulary above appears as code. +The following example definitions file shows how the vocabulary above appears as code. ```python import featureform as ff @@ -88,21 +86,21 @@ customer_risk_view = ff.FeatureView( ### Definitions files and `ff apply` {#definitions-files-and-ff-apply} -The Python file above is the source of truth for what the graph should look like — not a script that mutates Feature Form imperatively. When you run `ff apply`, Feature Form imports the file, collects the resources it defines, and treats that set as the workspace's desired state. A planner compares the submission with the current graph, and if the change is accepted, a new graph version is committed. +The Python definitions file is the source of truth for what the graph should look like. The file uses Python to declare resources, not to run commands against Feature Form. When you run `ff apply`, Feature Form imports the file, collects those resources, and treats them as the workspace's desired state. Feature Form compares that set with the current graph and, if the change is accepted, commits a new graph version. -By default, an apply is replacement-oriented: a resource that exists in the workspace but is not in the submitted set is a candidate for removal. That behavior is what makes the file a true source of truth. When you intentionally submit a partial set and want missing resources to stay untouched, you can apply in merge mode instead. +By default, `ff apply` replaces the workspace's current graph with the resources defined in the file. Any existing resource not in the file becomes a candidate for removal. To apply a partial set and leave missing resources untouched, run `ff apply --merge` instead. {{< note >}} -**Definitions files describe features, not infrastructure.** Providers and secret backends are registered separately by a workspace admin. Definitions files reference providers by name and assume they already exist. This separation keeps feature authors away from credentials and infrastructure choices. +Definitions files describe features, not infrastructure. Providers and secret backends are registered separately by a workspace admin. Definitions files reference providers by name and assume they already exist. This separation keeps feature authors away from credentials and infrastructure choices. {{< /note >}} For an end-to-end walkthrough of authoring a definitions file and applying it, see the [Quickstart]({{< relref "/develop/ai/featureform/quickstart" >}}). For the full apply lifecycle and editing loop, see [Define and deploy features]({{< relref "/develop/ai/featureform/define-and-deploy-features" >}}) and [Update features]({{< relref "/develop/ai/featureform/update-features" >}}). ## Providers -A provider is the workspace's connection to an external system. It carries the host, port, credentials reference, and any backend-specific configuration Feature Form needs to talk to that system. Resources in the graph reference providers by name, so a provider must be registered in the workspace before any resource that uses it can be applied. +A provider is the workspace's connection to an external system. It carries the host, port, credentials reference, and any configuration Feature Form needs to talk to that system. Resources in the graph reference providers by name, so you must register a provider in the workspace before applying any resource that uses it. -Every provider fills one or more **roles**, which describe the kind of work it can do for the workspace: +Every provider fills one or more roles, which describe the kind of work it can do for the workspace: | Role | What it does | | --------------- | ----------------------------------------------------------------------- | @@ -127,12 +125,12 @@ To register providers in a workspace, see [Register providers]({{< relref "/deve ## Secrets and secret references -Feature Form never stores plaintext credentials in the graph. A provider configuration carries a **secret reference** that looks like `env:PG_PASSWORD`, and Feature Form resolves that reference through a registered **secret provider** at the moment the credential is needed. +Feature Form never stores plaintext credentials in the graph. A provider configuration carries a secret reference. Feature Form resolves it through a registered secret provider when the credential is needed. -Two consequences are worth internalizing as a new user: +Keeping credentials out of the graph has two important consequences: -- **The graph is safe to inspect and export.** Nothing in it contains a usable credential. You can hand the graph to another team, version it, or paste it into a ticket without leaking secrets. -- **The process that resolves a reference is whichever process actually needs the credential.** In a deployed environment, that's almost always the Feature Form server, not your local CLI shell. A reference such as `env:PG_PASSWORD` reads from the server's process environment, not yours. +- The graph is safe to inspect and export. Nothing in it contains a usable credential. You can hand the graph to another team, version it, or paste it into a ticket without leaking secrets. +- The process that resolves a reference is whichever process actually needs the credential. In a deployed environment, that's almost always the Feature Form server, not your local CLI shell. A reference such as `env:PG_PASSWORD` reads from the server's process environment, not yours. Every new workspace is created with a built-in `env` secret provider, which makes `env:` references work out of the box for local development. Production deployments typically register a Vault, Kubernetes-secrets, or AWS Secrets Manager backend instead, because the `env` backend offers no rotation, no audit, and exposes values in process listings. @@ -140,15 +138,17 @@ To register a secret provider for a workspace, see [Configure secret providers]( ## Feature views and serving -A feature view is the resource that everything else in the graph eventually feeds. It is the online serving interface — the single name an application or model service uses when it asks Feature Form for the latest features about a particular entity. +A feature view is the resource that everything else in the graph eventually feeds. Applications query it to get the latest features for an entity. A feature view declares: -- The entity it is keyed by (for example, `customer`). +- The entity used as the lookup key (for example, `customer`). - The list of features it exposes. - The online provider that holds the materialized values — typically Redis. - A materialization engine that produces those values from offline data. +For example: + ```python customer_risk_view = ff.FeatureView( name="customer_risk_feature_view", @@ -158,22 +158,32 @@ customer_risk_view = ff.FeatureView( ) ``` -For a feature view to actually serve, three things must line up: the online provider it points to must be registered and reachable, the graph version that introduced the feature view must be committed, and materialization must have populated values for the entities you want to query. If any of those are missing, serving fails immediately rather than returning stale data. +### Feature view requirements + +Before applications can read from a feature view: + +- The online provider it points to must be registered and reachable. +- The graph version that introduced the feature view must be committed. +- Materialization must have populated values for the entities you want to query. + +If any of those are missing, the read fails immediately rather than returning stale data. -The same logical operation is reachable through three surfaces, so applications can pick whichever fits their stack: +### Serving interfaces + +Applications can read feature values through any of three interfaces: - A gRPC service (`ServingService.Serve` and `ServingService.GetServingMetadata`). - A REST endpoint (`POST /api/v1/serve`). - A Python client (`client.serve(...)`). -One subtle but important detail: reading feature values and reading serving metadata are governed by **separate** RBAC permissions. A dashboard or diagnostic principal can be allowed to inspect what a feature view looks like without also being allowed to read live feature values, and vice versa. +{{< note >}} +Reading feature values and reading serving metadata are governed by separate RBAC permissions. For example, a dashboard user can have access to feature view schemas without access to the actual values — or vice versa. +{{< /note >}} To serve from a feature view in an application, see [Serve features]({{< relref "/develop/ai/featureform/serve-features" >}}). To inspect datasets, training sets, or feature views directly, see [Query data]({{< relref "/develop/ai/featureform/query-data" >}}). ## Next steps -Now that the vocabulary is in place, the rest of the documentation maps cleanly onto these concepts: - - [Quickstart]({{< relref "/develop/ai/featureform/quickstart" >}}) — one end-to-end walkthrough that exercises every concept on this page. - [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}) — create, inspect, update, and delete workspaces. - [Register providers]({{< relref "/develop/ai/featureform/register-providers" >}}) — connect the workspace to Postgres, Redis, S3, Spark, or an Iceberg catalog, and register secret backends. From f9eeec89dd09b76fcd2924aa1bc7b7d1c42493c5 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Wed, 27 May 2026 16:09:23 -0500 Subject: [PATCH 11/13] manage workspaces DOC-6583 --- .../ai/featureform/manage-workspace.md | 131 ++++++++++++++++-- content/develop/ai/featureform/providers.md | 26 ---- content/operate/featureform/auth.md | 76 +--------- 3 files changed, 124 insertions(+), 109 deletions(-) diff --git a/content/develop/ai/featureform/manage-workspace.md b/content/develop/ai/featureform/manage-workspace.md index 32634f3efc..3b3b9fdbcc 100644 --- a/content/develop/ai/featureform/manage-workspace.md +++ b/content/develop/ai/featureform/manage-workspace.md @@ -1,27 +1,142 @@ --- title: Manage workspaces -description: Inspect and manage Redis Feature Form workspaces with the ff CLI. +description: Create, verify access to, monitor, and delete Redis Feature Form workspaces with the ff CLI. linkTitle: Manage workspaces weight: 20 --- -Use these commands when you need to inspect or change a workspace directly. +A workspace is a self-contained environment in Redis Feature Form. Each workspace owns its own resource graph, providers, secret references, catalog entries, and serving metadata—nothing is shared between workspaces. Use workspaces to keep environments such as dev, staging, and prod separate, or to give independent teams their own isolated area on a shared deployment. -## Core commands +Each workspace has: + +- A unique name and an optional description. +- A `last_applied_version` that tracks the most recently applied resource graph. +- A built-in `env` secret provider, created automatically. + +The tasks on this page require one of two roles: + +- A global admin (`global_admin`) creates workspaces and grants access. +- A workspace admin (`workspace_admin`) verifies their access, runs health checks, and updates or deletes the workspace. + +For the full list of built-in roles and the scope model, see [Authentication and RBAC]({{< relref "/operate/featureform/auth" >}}). + +Most commands on this page identify a workspace by its UUID, either as a positional argument or via the `--workspace` flag. Find the UUID with `ff workspace create` or `ff workspace list`. The examples below use ``, substitute the actual UUID. + +## Create a workspace and grant access + +A global admin creates a workspace and hands it off to the workspace admin who will manage it. + +### 1. Create the workspace + +```bash +ff workspace create demo-workspace \ + --description "Workspace for the feature workflow docs path" +``` + +The command returns a table with the new workspace's ID, name, description, version, and timestamps: + +```text +ID NAME DESCRIPTION VERSION CREATED UPDATED +7f2e4d8c-3a91-4b6d-9f0a-5e8c1b2d3f4a demo-workspace Workspace for the feature workflow ... 0 2026-05-12 14:03:21 2026-05-12 14:03:21 +``` + +Save the ID—you'll need it for the RBAC commands below. + +### 2. Grant workspace-admin access + +```bash +ff rbac grant workspace_admin \ + --workspace \ + --user alice@example.com +``` + +To bind a group or service account instead, use `--group ` or `--service-account `. Exactly one of `--user`, `--group`, or `--service-account` is required. + +### 3. Verify the binding + +```bash +ff rbac list --workspace +``` + +You should see the new role bound to the user, group, or service account you specified. For an alternate view that groups bindings by user, group, or service account instead of one row per binding, use `ff rbac subjects --workspace `. + +Creating a workspace does not automatically grant other users access—each member needs their own binding. + +## Confirm access to a workspace + +Use these checks when a workspace already exists and you need to confirm you can register providers, apply resources, or serve features. ```bash +# Verify your identity. +ff auth whoami +ff rbac whoami + +# Confirm the workspace is visible to you. ff workspace list ff workspace get --name demo-workspace + +# Confirm your binding. +ff rbac list --workspace +``` + +You should see your user, group, or service-account binding listed. If you don't, ask a global admin to grant access using the steps above. + +## Check workspace health + +Run these commands routinely, or whenever something looks wrong, to confirm a workspace is healthy: + +```bash +# Confirm the CLI can reach the deployment. +ff ping + +# Inspect workspace metadata, including last_applied_version. +ff workspace get + +# List configured providers and secret providers. +ff provider list --workspace +ff secret-provider list --workspace + +# Inspect graph overview and stats. +ff graph workspace stats --workspace + +# List catalog locations for materialized resources. +ff catalog list --workspace +``` + +Also confirm that your serving and dataframe clients point at the expected transport and state backend. + +## Update a workspace + +Change a workspace's name or description: + +```bash ff workspace update \ --name demo-workspace \ --description "Updated description" +``` + +Update affects metadata only—it doesn't touch providers, the resource graph, or catalog entries. + +## Delete a workspace + +{{< warning >}} +Deleting a workspace permanently removes all workspace-scoped data: providers, secret references, the resource graph, catalog entries, and serving metadata. This cannot be undone. +{{< /warning >}} + +```bash ff workspace delete --force ``` -## Workspace state to remember +`--force` skips the interactive confirmation prompt. Omit it for a safer, interactive delete. + +## Troubleshooting + +- **`permission denied` on provider or apply commands.** Your account is missing workspace write access. Run `ff rbac list --workspace ` to confirm the binding, and ask a global admin to grant the appropriate role if it's missing. +- **`workspace not found`.** Usually means the wrong deployment, the wrong transport, or a typo in the workspace name. Try `ff workspace list` to see what's actually visible. +- **Missing workspaces, providers, or resources after apply.** With memory-backed state, gRPC and REST can behave like separate state domains. Check for transport mismatches first, and use durable PostgreSQL-backed state for shared environments. -- workspaces have unique names and optional descriptions -- each workspace tracks `last_applied_version` -- providers, secret providers, graph state, catalog entries, and serving metadata are workspace-scoped +## Next steps -Deleting a workspace removes its associated workspace-scoped data. +- [Register providers]({{< relref "/develop/ai/featureform/register-providers" >}}) to connect a workspace to its storage, compute, and catalog systems. +- See [Authentication and RBAC]({{< relref "/operate/featureform/auth" >}}) for the deployment-wide role and scope model. +- See [Concepts]({{< relref "/develop/ai/featureform/concepts" >}}) for the workspace, resource graph, and serving model. diff --git a/content/develop/ai/featureform/providers.md b/content/develop/ai/featureform/providers.md index 468e54c88c..191edad964 100644 --- a/content/develop/ai/featureform/providers.md +++ b/content/develop/ai/featureform/providers.md @@ -174,29 +174,3 @@ print(features) - if the feature view is not ready, serving fails - if the online provider is unavailable or unsupported, serving fails - serving-metadata permissions and serving-read permissions are separate RBAC checks - - -## Operate a workspace -Use this how-to for routine operational checks after a workspace is already created and in use. - -### Day-2 checklist - -- verify connectivity with `ff ping` -- inspect workspace metadata and `last_applied_version` -- inspect providers and secret providers -- inspect graph overview and stats -- inspect catalog locations -- confirm serving and dataframe clients point at the expected transport and state backend - -### Useful commands - -```bash -ff ping -ff workspace get -ff provider list --workspace -ff secret-provider list --workspace -ff graph workspace stats --workspace -ff catalog list --workspace -``` - -With memory-backed state, check transport mismatches first when users report missing workspaces, providers, or applied resources. diff --git a/content/operate/featureform/auth.md b/content/operate/featureform/auth.md index 0547e81ab1..14439ae3f9 100644 --- a/content/operate/featureform/auth.md +++ b/content/operate/featureform/auth.md @@ -33,78 +33,4 @@ Feature Form separates deployment-wide administration from workspace-scoped acti - Workspace scope controls providers, secret providers, apply, graph, and audit inside one workspace. - Resource-constrained scope is used for limited serving or training-set access. -## Create a workspace and grant access - -Use this flow when a global admin is creating a new workspace and handing it off to the team that will manage it. - -### 1. Create the workspace - -```bash -ff workspace create demo-workspace \ - --description "Workspace for the feature workflow docs path" -``` - -### 2. Verify it exists - -```bash -ff workspace get --name demo-workspace -ff workspace list -``` - -Capture the workspace ID from the result for later RBAC commands. - -### 3. Grant workspace-admin access - -```bash -ff rbac grant workspace_admin \ - --workspace \ - --user alice@example.com -``` - -You can also bind a group or service account instead of a user. - -### 4. Verify the binding - -```bash -ff rbac list --workspace -ff rbac subjects --workspace -``` - -### Notes - -- Creating the workspace does not automatically grant workspace membership to other principals. -- New workspaces create a built-in `env` secret provider, but it is still workspace-scoped. -- In-memory state can make gRPC and REST behave like separate state domains. Use durable PostgreSQL-backed state for shared environments. - -## Join an existing workspace - -Use this page when a workspace already exists and you need to confirm that the intended principal can proceed with setup, apply, or serving. - -### 1. Verify identity - -```bash -ff auth whoami -ff rbac whoami -``` - -### 2. Confirm the workspace is visible - -```bash -ff workspace list -ff workspace get --name demo-workspace -``` - -### 3. Confirm the effective binding - -```bash -ff rbac list --workspace -``` - -You should see the expected user, group, or service-account binding for that workspace. - -### Common failures - -- `permission denied` on provider or apply commands usually means missing workspace write access. -- `workspace not found` usually means the wrong deployment, wrong transport, or wrong workspace name. -- Missing resources after apply can indicate transport or state-backend mismatch in non-durable environments. - +For the workspace lifecycle—creating a workspace, granting access, joining as a member, and day-2 operations—see [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}). From fe037d43ac385df2b06f5fc3342a365f0baea76b Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Wed, 27 May 2026 16:32:46 -0500 Subject: [PATCH 12/13] auth rough draft DOC-6590 --- .../ai/featureform/manage-workspace.md | 4 +- content/operate/featureform/auth.md | 36 --------------- content/operate/featureform/configure-auth.md | 46 +++++++++++++++++++ 3 files changed, 48 insertions(+), 38 deletions(-) delete mode 100644 content/operate/featureform/auth.md create mode 100644 content/operate/featureform/configure-auth.md diff --git a/content/develop/ai/featureform/manage-workspace.md b/content/develop/ai/featureform/manage-workspace.md index 3b3b9fdbcc..c36c9ee34a 100644 --- a/content/develop/ai/featureform/manage-workspace.md +++ b/content/develop/ai/featureform/manage-workspace.md @@ -18,7 +18,7 @@ The tasks on this page require one of two roles: - A global admin (`global_admin`) creates workspaces and grants access. - A workspace admin (`workspace_admin`) verifies their access, runs health checks, and updates or deletes the workspace. -For the full list of built-in roles and the scope model, see [Authentication and RBAC]({{< relref "/operate/featureform/auth" >}}). +For the full list of built-in roles and the scope model, see [Authentication and RBAC]({{< relref "/operate/featureform/configure-auth" >}}). Most commands on this page identify a workspace by its UUID, either as a positional argument or via the `--workspace` flag. Find the UUID with `ff workspace create` or `ff workspace list`. The examples below use ``, substitute the actual UUID. @@ -138,5 +138,5 @@ ff workspace delete --force ## Next steps - [Register providers]({{< relref "/develop/ai/featureform/register-providers" >}}) to connect a workspace to its storage, compute, and catalog systems. -- See [Authentication and RBAC]({{< relref "/operate/featureform/auth" >}}) for the deployment-wide role and scope model. +- See [Authentication and RBAC]({{< relref "/operate/featureform/configure-auth" >}}) for the deployment-wide role and scope model. - See [Concepts]({{< relref "/develop/ai/featureform/concepts" >}}) for the workspace, resource graph, and serving model. diff --git a/content/operate/featureform/auth.md b/content/operate/featureform/auth.md deleted file mode 100644 index 14439ae3f9..0000000000 --- a/content/operate/featureform/auth.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -Title: Authentication and RBAC -alwaysopen: false -categories: -- docs -- operate -- featureform -description: Manage Feature Form auth and RBAC -linkTitle: Authentication and RBAC -weight: 70 -bannerText: Feature Form is currently in preview and subject to change. Feature Form Docker images are available on Docker Hub; contact your Redis account team for a license key to deploy. -bannerChildren: true ---- -Feature Form separates deployment-wide administration from workspace-scoped actions. A workspace is the isolation boundary, but membership and permissions are managed separately through RBAC bindings. - -## Built-in roles - -- `global_admin` for deployment-wide administration and workspace creation -- `workspace_admin` for workspace setup, membership, apply, and audit -- `operator` for operational workflows -- `viewer` for read-only workspace visibility -- `model` for constrained reads of feature views and training sets - -## Typical handoff - -1. A global admin creates the workspace. -2. The global admin grants `workspace_admin` to the intended principal. -3. That principal verifies access before registering providers or applying resources. - -## Scope model - -- Global scope controls deployment-wide actions. -- Workspace scope controls providers, secret providers, apply, graph, and audit inside one workspace. -- Resource-constrained scope is used for limited serving or training-set access. - -For the workspace lifecycle—creating a workspace, granting access, joining as a member, and day-2 operations—see [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}). diff --git a/content/operate/featureform/configure-auth.md b/content/operate/featureform/configure-auth.md new file mode 100644 index 0000000000..d4ace9fe30 --- /dev/null +++ b/content/operate/featureform/configure-auth.md @@ -0,0 +1,46 @@ +--- +Title: Configure authentication and RBAC +aliases: +- /operate/featureform/auth/ +alwaysopen: false +categories: +- docs +- operate +- featureform +description: Configure deployment-wide authentication and RBAC for Redis Feature Form. +linkTitle: Configure auth and RBAC +weight: 70 +bannerText: Feature Form is currently in preview and subject to change. Feature Form Docker images are available on Docker Hub; contact your Redis account team for a license key to deploy. +bannerChildren: true +--- + +Redis Feature Form separates deployment-wide administration from workspace-scoped actions. A [workspace]({{< relref "/develop/ai/featureform/concepts#workspaces" >}}) isolates resources; RBAC bindings control who can act on it. + +This page covers the built-in roles, the scope model, and the typical handoff between a global admin and a workspace admin. For the CLI commands that grant access and verify bindings, see [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}). + +## Built-in roles + +Feature Form ships with five built-in RBAC roles. The role ID in the left column is the literal string used in `ff rbac grant`. + +| Role ID | Scope | What it grants | +| --- | --- | --- | +| `viewer` | Workspace | Read-only access to workspace metadata, the resource graph, the catalog, providers, and serving metadata. | +| `operator` | Workspace | Everything a viewer has, plus writing providers, planning and applying changes, reading served features, and controlling scheduler workflows. | +| `workspace_admin` | Workspace | Full administration of a single workspace—membership, audit, updates, deletion—plus everything an operator has. | +| `global_admin` | Global | Workspace creation, plus full administration across every workspace in the deployment. | +| `model` | Resource-constrained | Read access to a specific set of feature views, training sets, and serving data—nothing else. Used for model-team service accounts. | + +## Scope model + +Feature Form has three scopes. Each role works at exactly one of them. + +- **Global** Deployment-wide actions, such as creating workspaces. Only `global_admin` operates at this scope. +- **Workspace** Actions inside a single workspace: providers, secret providers, apply, graph, catalog, serving metadata, and audit. A binding at this scope applies to one workspace only—grant the role again on each workspace a user needs. +- **Resource-constrained** A narrower form of workspace scope that limits a binding to a specific set of resources. Used for the `model` role, which only sees serving and training-set reads for the resources it was bound to. + +A binding pairs a role with a scope and a user, group, or service account. For example: "Alice has `workspace_admin` on workspace `7f2e4d8c-…`" or "the `payments-team` group has `global_admin`." + +## Next steps + +- [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}) for the commands that create workspaces, grant roles, and verify bindings. +- [Concepts]({{< relref "/develop/ai/featureform/concepts" >}}) for background on workspaces and the resource graph. From 79a018d79c75d476db0b1efe2b66bb59f195af6a Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Thu, 28 May 2026 10:46:27 -0500 Subject: [PATCH 13/13] auth changes --- content/develop/ai/featureform/reference.md | 42 +++++- content/operate/featureform/configure-auth.md | 142 +++++++++++++++++- 2 files changed, 173 insertions(+), 11 deletions(-) diff --git a/content/develop/ai/featureform/reference.md b/content/develop/ai/featureform/reference.md index 71a7066a02..260f9e2ad4 100644 --- a/content/develop/ai/featureform/reference.md +++ b/content/develop/ai/featureform/reference.md @@ -1,6 +1,38 @@ --- -title: -description: -linkTitle: -weight: 1 ---- \ No newline at end of file +title: Redis Feature Form reference +description: Reference data for the ff CLI, Python client, gRPC API, and RBAC permissions. +linkTitle: Reference +weight: 100 +--- + +This page collects raw reference data for Redis Feature Form. Use it as a lookup—conceptual material lives in the [Concepts]({{< relref "/develop/ai/featureform/concepts" >}}) page, and task-oriented procedures live in the other pages in this section. + +## Permissions + +Each built-in RBAC role is a fixed set of permissions. The role table on [Configure authentication and RBAC]({{< relref "/operate/featureform/configure-auth#built-in-roles" >}}) is the usual way to think about access; the catalog below is what the authorization service actually checks. + +| Permission ID | Category | Resource scope | What it grants | +| --- | --- | --- | --- | +| `workspace.create` | workspace | deployment | Create new workspaces. | +| `workspace.read` | workspace | workspace | Read workspace metadata. | +| `workspace.list` | workspace | deployment | List visible workspaces. | +| `workspace.update` | workspace | workspace | Update workspace metadata. | +| `workspace.delete` | workspace | workspace | Delete a workspace. | +| `workspace.membership.manage` | workspace | workspace | Manage workspace RBAC bindings. | +| `graph.read` | graph | workspace | Read graph and resource metadata. | +| `catalog.read` | catalog | workspace | Read catalog metadata. | +| `provider.read` | infrastructure | workspace | Read provider definitions. | +| `provider.write` | infrastructure | workspace | Mutate provider definitions. | +| `secret_provider.read` | infrastructure | workspace | Read secret-provider definitions. | +| `secret_provider.write` | infrastructure | workspace | Mutate secret-provider definitions. | +| `apply.plan` | mutation | workspace | Run apply planning. | +| `apply.write` | mutation | workspace | Apply workspace changes. | +| `serving.metadata.read` | data | workspace or resource | Read serving metadata. | +| `serving.read` | data | workspace or resource | Read served feature values. | +| `dataframe.read` | data | workspace | Read dataframe data. | +| `training_set.read` | data | workspace or resource | Read training-set data. | +| `scheduler.read` | operations | workspace | Read scheduler state. | +| `scheduler.control` | operations | workspace | Control scheduler state. | +| `audit.read` | audit | workspace or deployment | Read audit logs. | +| `machine_credential.read` | machine credentials | workspace | Read machine credentials. | +| `machine_credential.write` | machine credentials | workspace | Create, rotate, and revoke machine credentials. | diff --git a/content/operate/featureform/configure-auth.md b/content/operate/featureform/configure-auth.md index d4ace9fe30..b24fe6b0bd 100644 --- a/content/operate/featureform/configure-auth.md +++ b/content/operate/featureform/configure-auth.md @@ -14,13 +14,82 @@ bannerText: Feature Form is currently in preview and subject to change. Feature bannerChildren: true --- -Redis Feature Form separates deployment-wide administration from workspace-scoped actions. A [workspace]({{< relref "/develop/ai/featureform/concepts#workspaces" >}}) isolates resources; RBAC bindings control who can act on it. +Redis Feature Form authenticates users and services through an external OIDC identity provider, then authorizes their actions through built-in RBAC roles. Authentication is a deployment-wide concern configured at install time; authorization is per-workspace (with a small number of deployment-scoped exceptions) and managed at runtime through role bindings. -This page covers the built-in roles, the scope model, and the typical handoff between a global admin and a workspace admin. For the CLI commands that grant access and verify bindings, see [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}). +A [workspace]({{< relref "/develop/ai/featureform/concepts#workspaces" >}}) isolates resources; RBAC bindings control who can act on it. -## Built-in roles +## Authentication -Feature Form ships with five built-in RBAC roles. The role ID in the left column is the literal string used in `ff rbac grant`. +### Configure OIDC at deploy time + +Set Feature Form's OIDC parameters in the Helm chart's `auth` block. At minimum, you need an issuer URL and a server-side client ID: + +```yaml +auth: + enabled: true + oidcIssuerURL: "https://idp.example.com/realms/featureform" + oidcClientID: "featureform-server" + + # CLI client. Defaults to "featureform-cli". + oidcCLIClientID: "featureform-cli" + + # Comma-separated list. Restricts which flows the CLI offers. + # Supported values: device_code, authorization_code_pkce + oidcCLILoginMethods: "device_code,authorization_code_pkce" + + # Required only if you use authorization_code_pkce. Must be + # registered with the IdP for the CLI client. + oidcCLIRedirectURI: "http://localhost:8080/callback" +``` + +For deployments where internal services reach the IdP at a different URL than external clients, use `oidcDiscoveryURL`, `oidcPublicIssuerURL`, and `oidcPublicDiscoveryURL` to split the discovery and issuer endpoints. The `oidcSkipIssuerCheck: true` flag disables issuer-claim validation and should only be used during local development. + +Feature Form reads role information from JWT claims on each request. It checks the following claims, in order, for matches against built-in role IDs: + +- `featureform_roles` (string or array) +- `roles` (string or array) +- `role` (string) +- `realm_access.roles` (array; Keycloak convention) + +If any of those claims contain `global_admin`, the user is treated as a global admin for that token's lifetime without a database binding. This is the typical way operators bootstrap the first admin—see [Provision the first global admin](#provision-the-first-global-admin). + +### Sign in with the CLI + +The `ff auth` commands handle login, session inspection, and token retrieval: + +```bash +# Interactive login. Defaults to device-code flow if the IdP +# supports it; falls back to authorization_code_pkce otherwise. +ff auth login + +# Force a specific flow. +ff auth login --login-method device_code +ff auth login --login-method authorization_code_pkce + +# Non-interactive password grant (CI, scripts). +ff auth login --username alice@example.com --password-stdin + +# Inspect the current session. +ff auth status +ff auth whoami + +# Print the active access token (for use in tools that don't +# integrate with the CLI session). +ff auth token + +# Clear the local session. Does not revoke tokens on the IdP. +ff auth logout +``` + +CLI sessions are stored per profile on the local machine. To skip interactive login entirely, set `FEATUREFORM_TOKEN` to a valid access token, or configure a service account with client credentials (see [Service accounts and machine credentials](#service-accounts-and-machine-credentials)). + +## RBAC + +### Built-in roles + +Feature Form has five built-in RBAC roles. The role ID in the left column is the literal string used in `ff rbac grant`. + +Each built-in role is a fixed set of finer-grained permissions—the underlying checks the authorization service runs on each request. For the full permission catalog, see [Reference > Permissions]({{< relref "/develop/ai/featureform/reference#permissions" >}}). | Role ID | Scope | What it grants | | --- | --- | --- | @@ -30,9 +99,10 @@ Feature Form ships with five built-in RBAC roles. The role ID in the left column | `global_admin` | Global | Workspace creation, plus full administration across every workspace in the deployment. | | `model` | Resource-constrained | Read access to a specific set of feature views, training sets, and serving data—nothing else. Used for model-team service accounts. | -## Scope model -Feature Form has three scopes. Each role works at exactly one of them. +### Role scopes + +Every role applies at a defined breadth—deployment-wide, a single workspace, or a specific set of resources within a workspace. Feature Form has three scopes, and each role works at exactly one: - **Global** Deployment-wide actions, such as creating workspaces. Only `global_admin` operates at this scope. - **Workspace** Actions inside a single workspace: providers, secret providers, apply, graph, catalog, serving metadata, and audit. A binding at this scope applies to one workspace only—grant the role again on each workspace a user needs. @@ -40,6 +110,66 @@ Feature Form has three scopes. Each role works at exactly one of them. A binding pairs a role with a scope and a user, group, or service account. For example: "Alice has `workspace_admin` on workspace `7f2e4d8c-…`" or "the `payments-team` group has `global_admin`." +### Provision the first global admin + +A fresh Feature Form deployment has no role bindings in its database. To get the first global admin in place, choose one of two paths: + +**Map an IdP claim to `global_admin` (recommended for production).** Configure your IdP to issue a `featureform_roles` claim that contains `global_admin` for the appropriate user or group. Feature Form treats those tokens as global admin without a database binding, so the first admin can sign in and start granting roles to others immediately. + +**Bind manually after the first login.** A user with no role can still authenticate; they just can't do anything yet. From a host that already has an access token for a privileged account, run: + +```bash +ff rbac grant global_admin --global --user +``` + +This option requires that *some* identity already has `global_admin`, which makes it suitable only for redirecting access from a temporary IdP-claim admin to a database-bound one, or for environments where you can run `ff` commands with a bootstrap token issued out-of-band. + +There is no dedicated Helm value for an initial admin. Plan your IdP claim mapping before installing. + +## Service accounts and machine credentials + +Non-human identities—CI runners, model-serving processes, batch jobs—authenticate with a service account that holds a public key registered with Feature Form. Feature Form supports Ed25519 keys today. + +Create a credential for a service account inside a workspace: + +```bash +ff machine-credential create ci-runner-key \ + --workspace \ + --service-account \ + --public-key "" \ + --algorithm Ed25519 +``` + +The `ff machine-credential` command also has subcommands for `list`, `get`, `rotate`, `revoke`, and `usage` (for audit-style usage records). All of them require the `machine_credential.write` or `machine_credential.read` permission on the target workspace. + +Grant the service account a workspace role the same way you would a user—use `--service-account ` instead of `--user `: + +```bash +ff rbac grant operator \ + --workspace \ + --service-account ci-runner +``` + +## Audit + +Feature Form records authorization-relevant events in an audit log. List events with: + +```bash +ff audit list \ + --workspace \ + --event-type workspace.delete \ + --page-size 50 +``` + +Useful filters: + +- `--workspace ` — scope to one workspace. +- `--global` — only deployment-scoped events. Requires `global_admin`. +- `--principal-id ` — events for a specific user, group, or service account. +- `--event-type ` — filter by event name (`workspace.create`, `rbac.grant`, `apply.write`, and so on). + +Each event includes the scope, workspace ID (if applicable), actor ID, event type, and creation timestamp. Reading the log requires the `audit.read` permission; deployment-scope reads additionally require `global_admin`. + ## Next steps - [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}) for the commands that create workspaces, grant roles, and verify bindings.