From a820f4e490152ffd25031312f1f87a90e7e12c41 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Tue, 12 May 2026 11:42:47 -0500 Subject: [PATCH 01/18] create new files in develop/ai --- content/develop/ai/featureform/concepts.md | 6 ++++++ content/develop/ai/featureform/configure-auth.md | 6 ++++++ .../develop/ai/featureform/define-and-deploy-features.md | 6 ++++++ content/develop/ai/featureform/mange-workspace.md | 6 ++++++ content/develop/ai/featureform/query-data.md | 6 ++++++ content/develop/ai/featureform/reference.md | 6 ++++++ content/develop/ai/featureform/register-providers.md | 6 ++++++ content/develop/ai/featureform/serve-features.md | 6 ++++++ content/develop/ai/featureform/update-features.md | 6 ++++++ 9 files changed, 54 insertions(+) create mode 100644 content/develop/ai/featureform/concepts.md create mode 100644 content/develop/ai/featureform/configure-auth.md create mode 100644 content/develop/ai/featureform/define-and-deploy-features.md create mode 100644 content/develop/ai/featureform/mange-workspace.md create mode 100644 content/develop/ai/featureform/query-data.md create mode 100644 content/develop/ai/featureform/reference.md create mode 100644 content/develop/ai/featureform/register-providers.md create mode 100644 content/develop/ai/featureform/serve-features.md create mode 100644 content/develop/ai/featureform/update-features.md diff --git a/content/develop/ai/featureform/concepts.md b/content/develop/ai/featureform/concepts.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/concepts.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/configure-auth.md b/content/develop/ai/featureform/configure-auth.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/configure-auth.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/define-and-deploy-features.md b/content/develop/ai/featureform/define-and-deploy-features.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/define-and-deploy-features.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/mange-workspace.md b/content/develop/ai/featureform/mange-workspace.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/mange-workspace.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/query-data.md b/content/develop/ai/featureform/query-data.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/query-data.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/reference.md b/content/develop/ai/featureform/reference.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/reference.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/register-providers.md b/content/develop/ai/featureform/register-providers.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/register-providers.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/serve-features.md b/content/develop/ai/featureform/serve-features.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/serve-features.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file diff --git a/content/develop/ai/featureform/update-features.md b/content/develop/ai/featureform/update-features.md new file mode 100644 index 0000000000..71a7066a02 --- /dev/null +++ b/content/develop/ai/featureform/update-features.md @@ -0,0 +1,6 @@ +--- +title: +description: +linkTitle: +weight: 1 +--- \ No newline at end of file From 935f357e3a86fdfdb1b85c1c7b3be423d264731b Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Tue, 12 May 2026 11:48:37 -0500 Subject: [PATCH 02/18] typo --- .../ai/featureform/{mange-workspace.md => manage-workspace.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename content/develop/ai/featureform/{mange-workspace.md => manage-workspace.md} (100%) diff --git a/content/develop/ai/featureform/mange-workspace.md b/content/develop/ai/featureform/manage-workspace.md similarity index 100% rename from content/develop/ai/featureform/mange-workspace.md rename to content/develop/ai/featureform/manage-workspace.md From 53b3e06d5d235aa672a2d8f69f6e36925b54cc51 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Tue, 12 May 2026 12:49:19 -0500 Subject: [PATCH 03/18] DOC-6581 DOC-6582 --- .../ai/featureform/register-providers.md | 203 +++++++++++++++++- 1 file changed, 198 insertions(+), 5 deletions(-) diff --git a/content/develop/ai/featureform/register-providers.md b/content/develop/ai/featureform/register-providers.md index 71a7066a02..071af22751 100644 --- a/content/develop/ai/featureform/register-providers.md +++ b/content/develop/ai/featureform/register-providers.md @@ -1,6 +1,199 @@ --- -title: -description: -linkTitle: -weight: 1 ---- \ No newline at end of file +title: Register providers +description: Register storage, compute, and catalog providers in a Redis Feature Form workspace, and configure secret backends. +linkTitle: Register providers +weight: 30 +--- + +Register the providers and secret backends a Redis Feature Form workspace needs before you author features or transformations. Providers connect the workspace to external systems for storage, compute, serving, or catalog-backed access, and definitions files reference them by name. + +## Prerequisites + +Before you register providers, make sure you have: + +- A workspace. See [Manage workspaces](./manage-workspace.md) for the workspace lifecycle commands. +- The `ff` CLI installed and able to reach the Feature Form server. The CLI connects to `localhost:9090` by default; override with `--server ` or by setting `ServerAddress` in `~/.featureform/config.yaml`. +- Any environment variables your provider commands reference set **in the Feature Form server's environment**, not in your shell. + For example, `--pg-password-secret env:PG_PASSWORD` makes the server resolve `PG_PASSWORD` from its own process environment at runtime. For Helm-based deployments, set these through chart values; for binary deployments, export them where the server starts. + +The examples on this page use placeholder names like `demo-workspace`, `demo_postgres`, and `spark-main`. Substitute the names you want to use in your own deployment. + +{{< note >}} +**Best practice:** keep the default health check on. Registration surfaces connectivity and secret-resolution problems at the point you can fix them, rather than as silent failures during materialization or serving. Reserve `--skip-health-check` for cases where you've already validated the provider through another channel. +{{< /note >}} + +## Register Postgres for offline storage + +Use Postgres when the workspace needs an offline store and Postgres-backed SQL execution in the same path. As an `offline-store`, Postgres holds the historical feature values that training sets read from. As a `compute` provider, it runs the SQL transformations that produce those values. + +The `` placeholder in `--pg-host` and in the Redis `--redis-host` stands for your Helm release name. With release name `my-ff`, the bundled Postgres service is `my-ff-featureform-provider-postgres`. If you connect to an external Postgres or Redis instance instead of the bundled chart addons, use that hostname directly. + +```bash +ff provider register demo_postgres \ + --workspace demo-workspace \ + --type postgres \ + --pg-host -featureform-provider-postgres \ + --pg-port 5432 \ + --pg-database featureform_test \ + --pg-user testuser \ + --pg-password-secret env:PG_PASSWORD \ + --pg-ssl-mode disable +``` + +See the [PostgreSQL documentation](https://www.postgresql.org/docs/) for connection and SSL options. + +## Register Redis as the online store + +Use Redis when the workspace needs an online store for low-latency feature serving. As an `online-store`, Redis holds the latest materialized feature values and serves them to applications at inference time. + +```bash +ff provider register demo_redis \ + --workspace demo-workspace \ + --type redis \ + --redis-host -featureform-redis \ + --redis-port 6379 +``` + +In the quickstart definitions file, the feature view references this provider with `inference_store="demo_redis"`. See the [Redis documentation](https://redis.io/docs/latest/) for deployment options. + +## Register S3 as an offline store + +Use S3 when Feature Form needs an object-storage-backed offline location. As an `offline-store`, S3 holds historical feature values as files (typically Parquet) that training sets read from. Choose S3 when dataset size or retention exceeds what a relational store fits. + +```bash +ff provider register data-lake \ + --workspace demo-workspace \ + --type s3 \ + --s3-bucket featureform-data \ + --s3-region us-west-2 \ + --s3-access-key-id-secret env:AWS_ACCESS_KEY_ID \ + --s3-secret-access-key-secret env:AWS_SECRET_ACCESS_KEY +``` + +Use `--s3-endpoint` for MinIO or LocalStack-style endpoints when needed. See the [Amazon S3 documentation](https://docs.aws.amazon.com/AmazonS3/latest/userguide/) for bucket and IAM setup. + +## Register Spark for compute + +Use Spark when the workspace needs a compute provider for transformation or materialization workloads. As a `compute` provider, Spark runs the transformation and materialization jobs that produce feature values. Choose Spark when dataset size exceeds what a single SQL engine can handle. + +```bash +ff provider register spark-main \ + --workspace demo-workspace \ + --type spark \ + --spark-master spark://spark-master:7077 +``` + +See the [Apache Spark documentation](https://spark.apache.org/docs/latest/) for cluster and master configuration. + +## Register an Iceberg catalog + +Use an Iceberg catalog provider when the workspace needs catalog-backed offline storage. As an `offline-store`, the catalog tracks versioned table snapshots over object storage. The workspace reads historical feature values from those tables, with schema evolution and time-travel queries. + +```bash +ff provider register iceberg-main \ + --workspace demo-workspace \ + --type iceberg_catalog \ + --iceberg-warehouse s3://featureform-data/warehouse \ + --iceberg-catalog-name featureform \ + --iceberg-rest-uri https://iceberg.example.com +``` + +This example uses the REST catalog backend; the exact required fields depend on which backend (REST, Hive, Glue, and so on) you choose. See the [Apache Iceberg documentation](https://iceberg.apache.org/docs/latest/) for catalog backend options. + +## Verify registration + +```bash +ff provider list --workspace demo-workspace +ff provider get demo_postgres --workspace demo-workspace +``` + +A successful list returns one row per registered provider: + +```text +NAME TYPE WORKSPACE CREATED UPDATED +demo_postgres postgres demo-workspace 2026-05-12T10:14:02Z 2026-05-12T10:14:02Z +demo_redis redis demo-workspace 2026-05-12T10:14:18Z 2026-05-12T10:14:18Z +``` + +Pass `--output json` or `--output yaml` for machine-readable output. If the list is empty or `get` returns an error, the register command did not complete. Rerun `ff provider register` to see its health-check output, and confirm the provider name and workspace match the ones you registered. + +## Update or delete a provider + +```bash +ff provider update demo_postgres \ + --workspace demo-workspace \ + --pg-port 5433 + +ff provider delete demo_postgres --workspace demo-workspace +``` + +Use `--force` on `update` when changing values that may break running workloads, such as host, port, or broker addresses. + +## Configure secret providers + +Confirm which secret backend a workspace uses, or register an alternate when `env` is not enough. Production deployments typically move off `env` because it mixes secrets with general configuration, offers no rotation or audit, and surfaces values in process listings. Vault, Kubernetes secrets, and AWS Secrets Manager each address those gaps. + +### Check the built-in `env` provider + +```bash +ff secret-provider list --workspace demo-workspace +ff secret-provider get env --workspace demo-workspace +``` + +### Register another secret provider + +Each backend has different preconditions on the Feature Form server. Pick the one that matches how your server is deployed. + +**Environment provider** — best for local development and bootstrap. The server reads variables from its own process environment. Use a prefix (`--env-prefix FF_`) to avoid collisions with other system variables. + +```bash +ff secret-provider register local-env \ + --workspace demo-workspace \ + --type env \ + --env-prefix FF_ +``` + +**Vault** — best for shared deployments that need rotation and audit. The server must be able to authenticate to Vault: export `VAULT_TOKEN` for token auth, or configure Kubernetes auth (when the server runs in-cluster) or AppRole. The backend uses the KV v2 secrets engine. + +```bash +ff secret-provider register vault-main \ + --workspace demo-workspace \ + --type vault \ + --vault-address https://vault.example.com \ + --vault-token-path /var/run/secrets/vault-token +``` + +**Kubernetes secrets** — best when the server runs inside a Kubernetes cluster and provider credentials are already managed as `Secret` resources. The server's service account needs `get` and `list` permissions on `secrets` in the target namespace. + +```bash +ff secret-provider register k8s-main \ + --workspace demo-workspace \ + --type k8s \ + --k8s-namespace featureform \ + --k8s-secret-name provider-secrets +``` + +**AWS Secrets Manager** — best when provider credentials already live in AWS. The server authenticates using the standard AWS credentials chain (IAM role on the host, instance profile, or `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` in the server environment). + +```bash +ff secret-provider register aws-main \ + --workspace demo-workspace \ + --type aws \ + --aws-region us-west-2 +``` + +### Update or delete a secret provider + +```bash +ff secret-provider update local-env \ + --workspace demo-workspace \ + --env-prefix PROD_ + +ff secret-provider delete local-env \ + --workspace demo-workspace \ + --yes +``` + +## Next steps + +With providers registered, the workspace is ready to receive feature definitions. See [Define and deploy features](./define-and-deploy-features.md) for authoring a definitions file and running `ff apply`. From e3c16af35ab40e2cc3d758cc99f901097012904a Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Tue, 12 May 2026 12:49:48 -0500 Subject: [PATCH 04/18] DOC-6583 --- .../ai/featureform/manage-workspace.md | 25 +- content/develop/ai/featureform/streaming.md | 287 ------------------ 2 files changed, 24 insertions(+), 288 deletions(-) diff --git a/content/develop/ai/featureform/manage-workspace.md b/content/develop/ai/featureform/manage-workspace.md index 71a7066a02..10eab4100d 100644 --- a/content/develop/ai/featureform/manage-workspace.md +++ b/content/develop/ai/featureform/manage-workspace.md @@ -3,4 +3,27 @@ title: description: linkTitle: weight: 1 ---- \ No newline at end of file +--- + +## Manage workspaces + +Use these commands when you need to inspect or change a workspace directly. + +### Core commands + +```bash +ff workspace list +ff workspace get --name demo-workspace +ff workspace update \ + --name demo-workspace \ + --description "Updated description" +ff workspace delete --force +``` + +### Workspace state to remember + +- workspaces have unique names and descriptions +- each workspace tracks `last_applied_version` +- providers, secret providers, graph state, catalog entries, and serving metadata are workspace-scoped + +Deleting a workspace removes its associated workspace-scoped data. diff --git a/content/develop/ai/featureform/streaming.md b/content/develop/ai/featureform/streaming.md index 24e07e804c..dc49ccd5ae 100644 --- a/content/develop/ai/featureform/streaming.md +++ b/content/develop/ai/featureform/streaming.md @@ -5,292 +5,5 @@ linkTitle: Providers and workspaces weight: 70 --- -Redis Feature Form supports multiple providers, secrets provider management, and workspaces. - -## Register providers - -Registering a provider binds one workspace to an external system used for storage, compute, serving, or catalog-backed access. Definitions files refer to providers by name, so provider registration comes first. - -### Register a Postgres provider - -```bash -ff provider register demo_postgres \ - --workspace \ - --type postgres \ - --pg-host -featureform-provider-postgres \ - --pg-port 5432 \ - --pg-database featureform_test \ - --pg-user testuser \ - --pg-password-secret env:PG_PASSWORD \ - --pg-ssl-mode disable -``` - -### Register a Redis provider - -```bash -ff provider register demo_redis \ - --workspace \ - --type redis \ - --redis-host -featureform-redis \ - --redis-port 6379 -``` - -If your deployment uses bundled provider addons, the default service names typically include the Helm release name. Otherwise, use the reachable hostnames for your external systems. - -### Verify registration - -```bash -ff provider list --workspace -ff provider get demo_postgres --workspace -``` - -Provider registration performs health validation by default. Fix connectivity or secret-resolution failures instead of treating `--skip-health-check` as the standard path. - -## Postgres provider setup - -```json metadata -{ - "title": "Postgres provider setup", - "description": "Register a Postgres provider for offline storage and SQL execution in Featureform.", - "categories": null, - "tableOfContents": {"sections":[{"id":"registration","title":"Registration"},{"id":"provider-role","title":"Provider role"}]} - -, - "codeExamples": [] -} -``` -Use Postgres when the workspace needs an offline store and Postgres-backed SQL execution in the same path. - -### Registration - -```bash -ff provider register demo_postgres \ - --workspace demo-workspace \ - --type postgres \ - --pg-host -featureform-provider-postgres \ - --pg-port 5432 \ - --pg-database featureform_test \ - --pg-user testuser \ - --pg-password-secret env:PG_PASSWORD \ - --pg-ssl-mode disable -``` - -### Provider role - -`offline-store`, `compute` - -The password reference is resolved through the workspace secret provider at runtime. - -## Redis provider setup - -```json metadata -{ - "title": "Redis provider setup", - "description": "Register Redis as the online store used by Featureform feature-view serving.", - "categories": null, - "tableOfContents": {"sections":[{"id":"registration","title":"Registration"},{"id":"provider-role","title":"Provider role"}]} - -, - "codeExamples": [] -} -``` -Use Redis when the workspace needs an online store for low-latency feature serving. - -### Registration - -```bash -ff provider register demo_redis \ - --workspace demo-workspace \ - --type redis \ - --redis-host -featureform-redis \ - --redis-port 6379 -``` - -### Provider role - -`online-store` - -In the quickstart definitions file, the feature view references this provider with `inference_store="demo_redis"`. - -## S3 provider setup - -```json metadata -{ - "title": "S3 provider setup", - "description": "Register an S3 provider for Featureform offline-store-backed object locations.", - "categories": null, - "tableOfContents": {"sections":[{"id":"registration","title":"Registration"},{"id":"provider-role","title":"Provider role"}]} - -, - "codeExamples": [] -} -``` -Use S3 when Featureform needs an object-storage-backed offline location. - -### Registration - -```bash -ff provider register data-lake \ - --workspace demo-workspace \ - --type s3 \ - --s3-bucket featureform-data \ - --s3-region us-west-2 \ - --s3-access-key-id-secret env:AWS_ACCESS_KEY_ID \ - --s3-secret-access-key-secret env:AWS_SECRET_ACCESS_KEY -``` - -### Provider role - -`offline-store` - -Use `--s3-endpoint` for MinIO or LocalStack-style endpoints when needed. - -## Spark provider setup - -```json metadata -{ - "title": "Spark provider setup", - "description": "Register a Spark compute provider for Featureform transformation and materialization workloads.", - "categories": null, - "tableOfContents": {"sections":[{"id":"minimal-registration","title":"Minimal registration"},{"id":"provider-role","title":"Provider role"}]} - -, - "codeExamples": [] -} -``` -Use Spark when the workspace needs a compute provider for transformation or materialization workloads. - -### Minimal registration - -```bash -ff provider register spark-main \ - --workspace demo-workspace \ - --type spark \ - --spark-master spark://spark-master:7077 -``` - -### Provider role - -`compute` - -Keep Spark registration separate from dataset authoring and from Iceberg catalog registration. - -## Iceberg provider setup - -```json metadata -{ - "title": "Iceberg provider setup", - "description": "Register an Iceberg catalog provider for Featureform offline-store workflows.", - "categories": null, - "tableOfContents": {"sections":[{"id":"registration","title":"Registration"},{"id":"provider-role","title":"Provider role"}]} - -, - "codeExamples": [] -} -``` -Use an Iceberg catalog provider when the workspace needs catalog-backed offline storage. - -### Registration - -```bash -ff provider register iceberg-main \ - --workspace demo-workspace \ - --type iceberg_catalog \ - --iceberg-warehouse s3://featureform-data/warehouse \ - --iceberg-catalog-name featureform \ - --iceberg-rest-uri https://iceberg.example.com -``` - -### Provider role - -`offline-store` - -The exact required fields depend on the catalog backend you choose. - -## Configure secret providers - -Use this section to confirm which secret backend a workspace will use and to register additional backends when `env` is not enough. - -### Check the built-in `env` provider - -```bash -ff secret-provider list --workspace demo-workspace -ff secret-provider get env --workspace demo-workspace -``` - -### Register another secret provider - -Environment provider: - -```bash -ff secret-provider register local-env \ - --workspace demo-workspace \ - --type env \ - --env-prefix FF_ -``` - -Vault: - -```bash -ff secret-provider register vault-main \ - --workspace demo-workspace \ - --type vault \ - --vault-address https://vault.example.com \ - --vault-token-path /var/run/secrets/vault-token -``` - -Kubernetes: - -```bash -ff secret-provider register k8s-main \ - --workspace demo-workspace \ - --type k8s \ - --k8s-namespace featureform \ - --k8s-secret-name provider-secrets -``` - -AWS Secrets Manager: - -```bash -ff secret-provider register aws-main \ - --workspace demo-workspace \ - --type aws \ - --aws-region us-west-2 -``` - -### Update or delete - -```bash -ff secret-provider update local-env \ - --workspace demo-workspace \ - --env-prefix PROD_ - -ff secret-provider delete local-env \ - --workspace demo-workspace \ - --yes -``` - -## Manage workspaces - -Use these commands when you need to inspect or change a workspace directly. - -### Core commands - -```bash -ff workspace list -ff workspace get --name demo-workspace -ff workspace update \ - --name demo-workspace \ - --description "Updated description" -ff workspace delete --force -``` - -### Workspace state to remember - -- workspaces have unique names and descriptions -- each workspace tracks `last_applied_version` -- providers, secret providers, graph state, catalog entries, and serving metadata are workspace-scoped - -Deleting a workspace removes its associated workspace-scoped data. From 734b8ed57aa88f0ab542c6aa81fbac38ad77dda0 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Tue, 12 May 2026 13:38:08 -0500 Subject: [PATCH 05/18] indent list --- content/develop/ai/featureform/register-providers.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/content/develop/ai/featureform/register-providers.md b/content/develop/ai/featureform/register-providers.md index 071af22751..de1cb2d1a9 100644 --- a/content/develop/ai/featureform/register-providers.md +++ b/content/develop/ai/featureform/register-providers.md @@ -12,9 +12,10 @@ Register the providers and secret backends a Redis Feature Form workspace needs Before you register providers, make sure you have: - A workspace. See [Manage workspaces](./manage-workspace.md) for the workspace lifecycle commands. -- The `ff` CLI installed and able to reach the Feature Form server. The CLI connects to `localhost:9090` by default; override with `--server ` or by setting `ServerAddress` in `~/.featureform/config.yaml`. -- Any environment variables your provider commands reference set **in the Feature Form server's environment**, not in your shell. - For example, `--pg-password-secret env:PG_PASSWORD` makes the server resolve `PG_PASSWORD` from its own process environment at runtime. For Helm-based deployments, set these through chart values; for binary deployments, export them where the server starts. +- The `ff` CLI installed and able to reach the Feature Form server. + - The CLI connects to `localhost:9090` by default; override with `--server ` or by setting `ServerAddress` in `~/.featureform/config.yaml`. +- Any environment variables your provider commands reference set **in the Feature Form server's environment**, not in your shell. + - For example, `--pg-password-secret env:PG_PASSWORD` makes the server resolve `PG_PASSWORD` from its own process environment at runtime. For Helm-based deployments, set these through chart values; for binary deployments, export them where the server starts. The examples on this page use placeholder names like `demo-workspace`, `demo_postgres`, and `spark-main`. Substitute the names you want to use in your own deployment. From cc4899b77e687a7728f4964aed5a1c11fb747572 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Thu, 14 May 2026 14:28:18 -0500 Subject: [PATCH 06/18] review feedback --- .../ai/featureform/manage-workspace.md | 16 +- content/develop/ai/featureform/overview.md | 2 +- .../ai/featureform/register-providers.md | 169 +++++++++--------- content/develop/ai/featureform/streaming.md | 11 +- 4 files changed, 97 insertions(+), 101 deletions(-) diff --git a/content/develop/ai/featureform/manage-workspace.md b/content/develop/ai/featureform/manage-workspace.md index 10eab4100d..32634f3efc 100644 --- a/content/develop/ai/featureform/manage-workspace.md +++ b/content/develop/ai/featureform/manage-workspace.md @@ -1,15 +1,13 @@ --- -title: -description: -linkTitle: -weight: 1 +title: Manage workspaces +description: Inspect and manage Redis Feature Form workspaces with the ff CLI. +linkTitle: Manage workspaces +weight: 20 --- -## Manage workspaces - Use these commands when you need to inspect or change a workspace directly. -### Core commands +## Core commands ```bash ff workspace list @@ -20,9 +18,9 @@ ff workspace update \ ff workspace delete --force ``` -### Workspace state to remember +## Workspace state to remember -- workspaces have unique names and descriptions +- workspaces have unique names and optional descriptions - each workspace tracks `last_applied_version` - providers, secret providers, graph state, catalog entries, and serving metadata are workspace-scoped diff --git a/content/develop/ai/featureform/overview.md b/content/develop/ai/featureform/overview.md index 39c2f12b6e..77ef44e215 100644 --- a/content/develop/ai/featureform/overview.md +++ b/content/develop/ai/featureform/overview.md @@ -45,4 +45,4 @@ The latest release adds enterprise-oriented capabilities: ## What to read next - [Quickstart]({{< relref "/develop/ai/featureform/quickstart" >}}) -- [Connect providers]({{< relref "/develop/ai/featureform/streaming" >}}) +- [Register providers]({{< relref "/develop/ai/featureform/register-providers" >}}) diff --git a/content/develop/ai/featureform/register-providers.md b/content/develop/ai/featureform/register-providers.md index de1cb2d1a9..cdae9a1a12 100644 --- a/content/develop/ai/featureform/register-providers.md +++ b/content/develop/ai/featureform/register-providers.md @@ -5,17 +5,16 @@ linkTitle: Register providers weight: 30 --- -Register the providers and secret backends a Redis Feature Form workspace needs before you author features or transformations. Providers connect the workspace to external systems for storage, compute, serving, or catalog-backed access, and definitions files reference them by name. +Register the providers and secret backends Redis Feature Form needs before you author features or transformations. Providers connect the workspace to external systems for storage, compute, serving, or catalog-backed access, and definitions files reference them by name. ## Prerequisites Before you register providers, make sure you have: -- A workspace. See [Manage workspaces](./manage-workspace.md) for the workspace lifecycle commands. -- The `ff` CLI installed and able to reach the Feature Form server. +- A workspace. See [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}) for the workspace lifecycle commands. +- The `ff` CLI installed and able to reach the Feature Form server. - The CLI connects to `localhost:9090` by default; override with `--server ` or by setting `ServerAddress` in `~/.featureform/config.yaml`. -- Any environment variables your provider commands reference set **in the Feature Form server's environment**, not in your shell. - - For example, `--pg-password-secret env:PG_PASSWORD` makes the server resolve `PG_PASSWORD` from its own process environment at runtime. For Helm-based deployments, set these through chart values; for binary deployments, export them where the server starts. +- A secret provider registered to back any credentials your provider commands reference. Each workspace ships with a default `env` secret provider that reads from Feature Form server's process environment. To use Vault, Kubernetes secrets, or AWS Secrets Manager instead, register that backend before you register providers that reference it. See [Configure secret providers](#configure-secret-providers). The examples on this page use placeholder names like `demo-workspace`, `demo_postgres`, and `spark-main`. Substitute the names you want to use in your own deployment. @@ -23,43 +22,110 @@ The examples on this page use placeholder names like `demo-workspace`, `demo_pos **Best practice:** keep the default health check on. Registration surfaces connectivity and secret-resolution problems at the point you can fix them, rather than as silent failures during materialization or serving. Reserve `--skip-health-check` for cases where you've already validated the provider through another channel. {{< /note >}} -## Register Postgres for offline storage +## Configure secret providers + +Each workspace starts with a default `env` secret provider that resolves references such as `env:PG_PASSWORD` from Feature Form server's process environment. Production deployments typically move off `env` because it mixes secrets with general configuration, offers no rotation or audit, and surfaces values in process listings. Vault, Kubernetes secrets, and AWS Secrets Manager each address those gaps. + +### Check the built-in `env` secret provider + +```bash +ff secret-provider list --workspace demo-workspace +ff secret-provider get env --workspace demo-workspace +``` + +### Register another secret provider + +Each backend has different preconditions on the Feature Form server. Pick the one that matches how your server is deployed. + +**Environment secret provider** — best for local development and bootstrap. The server reads variables from its own process environment. Use a prefix (`--env-prefix FF_`) to avoid collisions with other system variables. + +```bash +ff secret-provider register local-env \ + --workspace demo-workspace \ + --type env \ + --env-prefix FF_ +``` -Use Postgres when the workspace needs an offline store and Postgres-backed SQL execution in the same path. As an `offline-store`, Postgres holds the historical feature values that training sets read from. As a `compute` provider, it runs the SQL transformations that produce those values. +**Vault** — best for shared deployments that need rotation and audit. The server must be able to authenticate to Vault: export `VAULT_TOKEN` for token auth, or configure Kubernetes auth (when the server runs in-cluster) or AppRole. The backend uses the KV v2 secrets engine. + +```bash +ff secret-provider register vault-main \ + --workspace demo-workspace \ + --type vault \ + --vault-address https://vault.example.com \ + --vault-token-path /var/run/secrets/vault-token +``` -The `` placeholder in `--pg-host` and in the Redis `--redis-host` stands for your Helm release name. With release name `my-ff`, the bundled Postgres service is `my-ff-featureform-provider-postgres`. If you connect to an external Postgres or Redis instance instead of the bundled chart addons, use that hostname directly. +**Kubernetes secrets** — best when the server runs inside a Kubernetes cluster and provider credentials are already managed as `Secret` resources. The server's service account needs `get` and `list` permissions on `secrets` in the target namespace. + +```bash +ff secret-provider register k8s-main \ + --workspace demo-workspace \ + --type k8s \ + --k8s-namespace featureform \ + --k8s-secret-name provider-secrets +``` + +**AWS Secrets Manager** — best when provider credentials already live in AWS. The server authenticates using the standard AWS credentials chain (IAM role on the host, instance profile, or `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` in the server environment). + +```bash +ff secret-provider register aws-main \ + --workspace demo-workspace \ + --type aws \ + --aws-region us-west-2 +``` + +### Update or delete a secret provider + +```bash +ff secret-provider update local-env \ + --workspace demo-workspace \ + --env-prefix PROD_ + +ff secret-provider delete local-env \ + --workspace demo-workspace \ + --yes +``` + +## Register Postgres as an offline store and compute provider + +Register Postgres when your analytical data lives in Postgres and you want Feature Form to manage feature engineering against it. As an `offline-store`, Postgres holds dataset candidates for feature engineering and the Feature Form-managed datasets that result, such as transformed datasets, training sets, and feature views. As a `compute` provider, Postgres runs the workloads Feature Form orchestrates, such as SQL transformations on primary datasets. + +Point `--pg-host` at the Postgres instance you want Feature Form to use — typically a managed instance such as Amazon RDS or Aurora in production. To use the Postgres service bundled with the Helm chart for local or non-production work, set `--pg-host` to `-featureform-provider-postgres`, where `` is your Helm release name. ```bash ff provider register demo_postgres \ --workspace demo-workspace \ --type postgres \ - --pg-host -featureform-provider-postgres \ + --pg-host featureform-prod.cluster-abc123.us-west-2.rds.amazonaws.com \ --pg-port 5432 \ --pg-database featureform_test \ --pg-user testuser \ --pg-password-secret env:PG_PASSWORD \ - --pg-ssl-mode disable + --pg-ssl-mode require ``` See the [PostgreSQL documentation](https://www.postgresql.org/docs/) for connection and SSL options. -## Register Redis as the online store +## Register Redis as an online store + +Register Redis when Redis is your low-latency inference database for serving features. As an `online-store`, Redis holds the latest materialized feature values and serves them to applications at inference time. -Use Redis when the workspace needs an online store for low-latency feature serving. As an `online-store`, Redis holds the latest materialized feature values and serves them to applications at inference time. +Point `--redis-host` at the Redis deployment you want Feature Form to use — typically a managed deployment such as Redis Cloud in production. To use the Redis service bundled with the Helm chart for local or non-production work, set `--redis-host` to `-featureform-redis`, where `` is your Helm release name. ```bash ff provider register demo_redis \ --workspace demo-workspace \ --type redis \ - --redis-host -featureform-redis \ - --redis-port 6379 + --redis-host redis-12345.c1.us-west-2-2.ec2.cloud.redislabs.com \ + --redis-port 12345 ``` In the quickstart definitions file, the feature view references this provider with `inference_store="demo_redis"`. See the [Redis documentation](https://redis.io/docs/latest/) for deployment options. ## Register S3 as an offline store -Use S3 when Feature Form needs an object-storage-backed offline location. As an `offline-store`, S3 holds historical feature values as files (typically Parquet) that training sets read from. Choose S3 when dataset size or retention exceeds what a relational store fits. +Register S3 when Feature Form needs an object-storage-backed offline location. As an `offline-store`, S3 holds historical feature values as files (typically Parquet) that training sets read from. Choose S3 when dataset size or retention exceeds what a relational store fits. ```bash ff provider register data-lake \ @@ -73,9 +139,9 @@ ff provider register data-lake \ Use `--s3-endpoint` for MinIO or LocalStack-style endpoints when needed. See the [Amazon S3 documentation](https://docs.aws.amazon.com/AmazonS3/latest/userguide/) for bucket and IAM setup. -## Register Spark for compute +## Register Spark as a compute provider -Use Spark when the workspace needs a compute provider for transformation or materialization workloads. As a `compute` provider, Spark runs the transformation and materialization jobs that produce feature values. Choose Spark when dataset size exceeds what a single SQL engine can handle. +Register Spark when you need a compute provider for transformation or materialization workloads at scale. As a `compute` provider, Spark runs the transformation and materialization jobs that produce feature values. Choose Spark when dataset size exceeds what a single SQL engine can handle. ```bash ff provider register spark-main \ @@ -88,7 +154,7 @@ See the [Apache Spark documentation](https://spark.apache.org/docs/latest/) for ## Register an Iceberg catalog -Use an Iceberg catalog provider when the workspace needs catalog-backed offline storage. As an `offline-store`, the catalog tracks versioned table snapshots over object storage. The workspace reads historical feature values from those tables, with schema evolution and time-travel queries. +Register an Iceberg catalog provider when you need catalog-backed offline storage. As an `offline-store`, the catalog tracks versioned table snapshots over object storage. The workspace reads historical feature values from those tables, with schema evolution and time-travel queries. ```bash ff provider register iceberg-main \ @@ -130,71 +196,6 @@ ff provider delete demo_postgres --workspace demo-workspace Use `--force` on `update` when changing values that may break running workloads, such as host, port, or broker addresses. -## Configure secret providers - -Confirm which secret backend a workspace uses, or register an alternate when `env` is not enough. Production deployments typically move off `env` because it mixes secrets with general configuration, offers no rotation or audit, and surfaces values in process listings. Vault, Kubernetes secrets, and AWS Secrets Manager each address those gaps. - -### Check the built-in `env` provider - -```bash -ff secret-provider list --workspace demo-workspace -ff secret-provider get env --workspace demo-workspace -``` - -### Register another secret provider - -Each backend has different preconditions on the Feature Form server. Pick the one that matches how your server is deployed. - -**Environment provider** — best for local development and bootstrap. The server reads variables from its own process environment. Use a prefix (`--env-prefix FF_`) to avoid collisions with other system variables. - -```bash -ff secret-provider register local-env \ - --workspace demo-workspace \ - --type env \ - --env-prefix FF_ -``` - -**Vault** — best for shared deployments that need rotation and audit. The server must be able to authenticate to Vault: export `VAULT_TOKEN` for token auth, or configure Kubernetes auth (when the server runs in-cluster) or AppRole. The backend uses the KV v2 secrets engine. - -```bash -ff secret-provider register vault-main \ - --workspace demo-workspace \ - --type vault \ - --vault-address https://vault.example.com \ - --vault-token-path /var/run/secrets/vault-token -``` - -**Kubernetes secrets** — best when the server runs inside a Kubernetes cluster and provider credentials are already managed as `Secret` resources. The server's service account needs `get` and `list` permissions on `secrets` in the target namespace. - -```bash -ff secret-provider register k8s-main \ - --workspace demo-workspace \ - --type k8s \ - --k8s-namespace featureform \ - --k8s-secret-name provider-secrets -``` - -**AWS Secrets Manager** — best when provider credentials already live in AWS. The server authenticates using the standard AWS credentials chain (IAM role on the host, instance profile, or `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` in the server environment). - -```bash -ff secret-provider register aws-main \ - --workspace demo-workspace \ - --type aws \ - --aws-region us-west-2 -``` - -### Update or delete a secret provider - -```bash -ff secret-provider update local-env \ - --workspace demo-workspace \ - --env-prefix PROD_ - -ff secret-provider delete local-env \ - --workspace demo-workspace \ - --yes -``` - ## Next steps -With providers registered, the workspace is ready to receive feature definitions. See [Define and deploy features](./define-and-deploy-features.md) for authoring a definitions file and running `ff apply`. +With providers registered, the workspace is ready to receive feature definitions. See [Define and deploy features]({{< relref "/develop/ai/featureform/define-and-deploy-features" >}}) for authoring a definitions file and running `ff apply`. diff --git a/content/develop/ai/featureform/streaming.md b/content/develop/ai/featureform/streaming.md index dc49ccd5ae..41a6bd4852 100644 --- a/content/develop/ai/featureform/streaming.md +++ b/content/develop/ai/featureform/streaming.md @@ -1,9 +1,6 @@ --- -title: Providers and workspaces -description: Build stream-backed features with Kafka, streaming transformations, and Redis serving. -linkTitle: Providers and workspaces -weight: 70 +title: +description: +linkTitle: +weight: 1 --- - - - From 12a7a2ed5c6171a27c49b584602085aefb503cd4 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Wed, 27 May 2026 09:49:39 -0500 Subject: [PATCH 07/18] DOC-6588 --- content/develop/ai/featureform/concepts.md | 118 +++++++++++++++++- .../training-sets-and-feature-views.md | 116 ----------------- 2 files changed, 113 insertions(+), 121 deletions(-) delete mode 100644 content/develop/ai/featureform/training-sets-and-feature-views.md diff --git a/content/develop/ai/featureform/concepts.md b/content/develop/ai/featureform/concepts.md index 71a7066a02..5c6c926cba 100644 --- a/content/develop/ai/featureform/concepts.md +++ b/content/develop/ai/featureform/concepts.md @@ -1,6 +1,114 @@ --- -title: -description: -linkTitle: -weight: 1 ---- \ No newline at end of file +title: Concepts +description: Learn the core Feature Form concepts behind workspaces, providers, secrets, and serving. +linkTitle: Concepts +weight: 30 +--- + +These pages explain the resource model and the boundaries that matter when you operate Feature Form. + +## Resources and workspace graph + +A Feature Form workspace owns one logical resource graph. When you run `ff apply`, Feature Form compares the submitted desired state with the current graph and commits a new version if the change is accepted. + +### Resource types in the graph + +- entities +- datasets +- transformations +- features +- labels +- training sets +- feature views + +### Why the graph matters + +- it powers lineage and dependency views +- it tracks `last_applied_version` +- it feeds serving metadata from committed state + +### Useful commands + +```bash +ff graph workspace overview --workspace demo-workspace +ff graph workspace stats --workspace demo-workspace +ff graph dataset get demo_transactions --workspace demo-workspace +ff graph feature-view get demo_customer_feature_view --workspace demo-workspace +``` + +## Providers and provider roles + +A provider is a workspace-scoped connection to external infrastructure. Definitions files reference providers by name, but the provider itself must already be registered in the workspace. + +### Provider roles + +- `offline-store` for batch data and materialized datasets +- `online-store` for low-latency serving +- `compute` for transformations and materialization work +- `streaming` for streaming integrations + +### Core providers documented here + +- Postgres: `offline-store`, `compute` +- Redis: `online-store` +- S3: `offline-store` +- Spark: `compute` +- Iceberg catalog: `offline-store` + +### Workflow mapping + +- Datasets and training sets need an offline store. +- Feature views need an online store. +- SQL and Spark transformations need compute. +- One provider can fill more than one role. + +## Secrets and secret references + +Feature Form stores secret references in provider configuration instead of storing plaintext secret values itself. A provider config can contain a reference like `env:PG_PASSWORD`, which Feature Form resolves through a registered secret provider at runtime. + +### Mental model + +- A secret provider is a workspace-scoped backend such as `env`, Vault, Kubernetes, or AWS Secrets Manager +- A secret reference is the value stored in provider config +- Data providers use secret references but do not own secret storage + +### Default path for a new workspace + +Every new workspace creates a built-in `env` secret provider. That makes references such as `env:PG_PASSWORD` valid as long as the runtime environment actually exposes `PG_PASSWORD`. + +The important detail is runtime scope: in deployed environments, the resolving process is usually the Featureform server, not your local CLI shell. + +### What Featureform stores + +- Secret provider metadata and configuration +- Secret references embedded in provider configuration + +### What Featureform does not store + +- Plaintext secret values from external backends + +## Serving and feature views + +A feature view is the serving interface for a set of features keyed by an entity. In the documented Redis-backed workflow, the feature view is what applications and model services read from at inference time. + +### A feature view includes + +- the feature-view name +- the logical entity and key columns +- the served feature schema +- the online provider +- serving version and key-prefix details + +### Serving requires + +- a registered online store such as Redis +- a committed graph version containing the feature view +- ready serving metadata for that workspace and view + +### Main entry points + +- gRPC: `ServingService.Serve`, `ServingService.GetServingMetadata` +- REST: `/api/v1/serve` +- Python client: `client.serve(...)` + +Serving reads and serving-metadata reads are separate RBAC permissions. diff --git a/content/develop/ai/featureform/training-sets-and-feature-views.md b/content/develop/ai/featureform/training-sets-and-feature-views.md deleted file mode 100644 index 11acee80c3..0000000000 --- a/content/develop/ai/featureform/training-sets-and-feature-views.md +++ /dev/null @@ -1,116 +0,0 @@ ---- -title: Concepts -description: Learn the core Feature Form concepts behind workspaces, providers, secrets, and serving. -linkTitle: Concepts -weight: 30 ---- - -These pages explain the resource model and the boundaries that matter when you operate Feature Form. - -## Resources and workspace graph - -A Feature Form workspace owns one logical resource graph. When you run `ff apply`, Feature Form compares the submitted desired state with the current graph and commits a new version if the change is accepted. - -### Resource types in the graph - -- entities -- datasets -- transformations -- features -- labels -- training sets -- feature views - -### Why the graph matters - -- it powers lineage and dependency views -- it tracks `last_applied_version` -- it feeds serving metadata from committed state - -### Useful commands - -```bash -ff graph workspace overview --workspace demo-workspace -ff graph workspace stats --workspace demo-workspace -ff graph dataset get demo_transactions --workspace demo-workspace -ff graph feature-view get demo_customer_feature_view --workspace demo-workspace -``` - -## Providers and provider roles - -A provider is a workspace-scoped connection to external infrastructure. Definitions files reference providers by name, but the provider itself must already be registered in the workspace. - -### Provider roles - -- `offline-store` for batch data and materialized datasets -- `online-store` for low-latency serving -- `compute` for transformations and materialization work -- `streaming` for streaming integrations - -### Core providers documented here - -- Postgres: `offline-store`, `compute` -- Redis: `online-store` -- S3: `offline-store` -- Spark: `compute` -- Iceberg catalog: `offline-store` - -### Workflow mapping - -- Datasets and training sets need an offline store. -- Feature views need an online store. -- SQL and Spark transformations need compute. -- One provider can fill more than one role. - - -## Secrets and secret references - -Feature Form stores secret references in provider configuration instead of storing plaintext secret values itself. A provider config can contain a reference like `env:PG_PASSWORD`, which Feature Form resolves through a registered secret provider at runtime. - -### Mental model - -- A secret provider is a workspace-scoped backend such as `env`, Vault, Kubernetes, or AWS Secrets Manager -- A secret reference is the value stored in provider config -- Data providers use secret references but do not own secret storage - -### Default path for a new workspace - -Every new workspace creates a built-in `env` secret provider. That makes references such as `env:PG_PASSWORD` valid as long as the runtime environment actually exposes `PG_PASSWORD`. - -The important detail is runtime scope: in deployed environments, the resolving process is usually the Featureform server, not your local CLI shell. - -### What Featureform stores - -- Secret provider metadata and configuration -- Secret references embedded in provider configuration - -### What Featureform does not store - -- Plaintext secret values from external backends - -## Serving and feature views - -A feature view is the serving interface for a set of features keyed by an entity. In the documented Redis-backed workflow, the feature view is what applications and model services read from at inference time. - -### A feature view includes - -- the feature-view name -- the logical entity and key columns -- the served feature schema -- the online provider -- serving version and key-prefix details - -### Serving requires - -- a registered online store such as Redis -- a committed graph version containing the feature view -- ready serving metadata for that workspace and view - -### Main entry points - -- gRPC: `ServingService.Serve`, `ServingService.GetServingMetadata` -- REST: `/api/v1/serve` -- Python client: `client.serve(...)` - -Serving reads and serving-metadata reads are separate RBAC permissions. - From 5eccecdcb9802ee2fe178a19eb9d32a87a5d9b1d Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Wed, 27 May 2026 10:03:22 -0500 Subject: [PATCH 08/18] concepts rough draft --- content/develop/ai/featureform/concepts.md | 213 ++++++++++++++------- 1 file changed, 141 insertions(+), 72 deletions(-) diff --git a/content/develop/ai/featureform/concepts.md b/content/develop/ai/featureform/concepts.md index 5c6c926cba..f4009cec71 100644 --- a/content/develop/ai/featureform/concepts.md +++ b/content/develop/ai/featureform/concepts.md @@ -1,114 +1,183 @@ --- -title: Concepts -description: Learn the core Feature Form concepts behind workspaces, providers, secrets, and serving. +title: Redis Feature Form concepts +description: Learn the workspace, resource graph, provider, secret, and serving model behind Redis Feature Form. linkTitle: Concepts weight: 30 --- -These pages explain the resource model and the boundaries that matter when you operate Feature Form. +Redis Feature Form is a feature platform: it manages how raw data in your existing systems becomes the entity-keyed values your models read at inference time. This page introduces the vocabulary and the model behind that workflow, so the rest of the documentation reads as application rather than memorization. -## Resources and workspace graph +## How the pieces fit together -A Feature Form workspace owns one logical resource graph. When you run `ff apply`, Feature Form compares the submitted desired state with the current graph and commits a new version if the change is accepted. +A Feature Form deployment runs one or more **workspaces**. Each workspace owns a versioned **resource graph** that describes what features should exist, where their inputs live, and how they're served. You author that graph in a Python **definitions file** and submit it with `ff apply`. -### Resource types in the graph +The graph itself is data, not credentials or connections. **Providers** connect the workspace to external systems (Postgres, Redis, S3, Spark, an Iceberg catalog), and **secret references** in those providers point at a secret backend that holds the actual passwords and tokens. At the end of the chain, a **feature view** is the single resource the rest of your stack reads from to serve features online. -- entities -- datasets -- transformations -- features -- labels -- training sets -- feature views +Each of these terms is unpacked in the rest of this page. -### Why the graph matters +## Workspaces -- it powers lineage and dependency views -- it tracks `last_applied_version` -- it feeds serving metadata from committed state +A workspace is the tenant boundary for everything Feature Form manages. The graph, the providers, the secret references, the catalog of materialized locations, and the serving metadata all live inside one workspace and cannot leak across to another. -### Useful commands +That isolation is the unit you use to separate environments — dev, staging, prod — or to give independent teams their own slice of a shared deployment. Two workspaces can point at the same external Postgres database and still not see each other's resources, because the graph that names those resources is workspace-scoped. -```bash -ff graph workspace overview --workspace demo-workspace -ff graph workspace stats --workspace demo-workspace -ff graph dataset get demo_transactions --workspace demo-workspace -ff graph feature-view get demo_customer_feature_view --workspace demo-workspace +A workspace also tracks `last_applied_version`, a counter that advances each time the graph commits a new version. Inspection and serving commands always read from the latest committed version, not from a draft. + +To create, inspect, update, or delete workspaces, see [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}). + +## The resource graph + +The resource graph is the single object that represents the desired state of a workspace. Every feature, label, transformation, dataset, and feature view belongs to that graph and references the others by name. + +Two properties make the graph the right mental model: + +- **It is versioned as a whole.** When you submit a change, Feature Form commits a new graph version atomically. Either everything in the change lands together or nothing does. You don't end up with half-applied feature definitions. +- **It is declarative.** You describe what the graph should look like, not the sequence of steps to get there. Feature Form is responsible for figuring out the delta between what exists and what you've asked for. + +### Resource types + +A graph is built from seven resource types. New users encountering Feature Form for the first time benefit from learning these as a vocabulary list — every other concept on this page builds on them. + +- **Entities** identify the real-world objects features describe, such as a `customer` or `order`. Other resources join on the entity's key column. +- **Datasets** point at an existing table, view, or file on an offline store and make it visible to the graph. The data itself stays where it lives; Feature Form just registers a handle to it. +- **Transformations** produce new datasets from existing ones, expressed as SQL or as a Spark job. A transformation describes the shape of the output; the compute that runs it is supplied by a provider. +- **Features** are entity-keyed values that get served at inference time. A feature attaches to a column of a dataset, optionally applies an aggregation (such as `SUM` over a 7-day window), and declares which provider owns its computation. +- **Labels** look like features but feed offline training rather than online serving. They carry the value a model is trying to predict. +- **Training sets** join one or more features with a label on the entity key, so an offline training job reads a single time-aligned table instead of stitching things together by hand. +- **Feature views** are the online serving interface for a group of features. They are the only resource that downstream applications and model services interact with directly. + +A short definitions file makes the shape concrete. The reader shouldn't worry about syntax yet — the point is to see how the vocabulary above appears as code. + +```python +import featureform as ff +from datetime import timedelta + +customer = ff.Entity(name="customer") + +transactions = postgres.dataset( + name="transactions_raw", + table="transactions", + timestamp_column="timestamp", +) + +@postgres.sql_transformation(name="customer_daily_rollups", inputs=[transactions]) +def customer_daily_rollups() -> str: + return """ + SELECT customer_id, + date_trunc('day', timestamp) AS event_day, + SUM(transaction_amount) AS total_amount + FROM {{transactions_raw}} + GROUP BY 1, 2 + """ + +customer_total_amount_7d = ( + ff.Feature(name="customer_total_amount_7d") + .from_dataset(customer_daily_rollups, entity="customer", + entity_column="customer_id", value="total_amount", + timestamp="event_day") + .aggregate(function=ff.AggregateFunction.SUM, window=timedelta(days=7)) +) + +customer_risk_view = ff.FeatureView( + name="customer_risk_feature_view", + entity="customer", + features=[customer_total_amount_7d], + inference_store="demo_redis", +) ``` -## Providers and provider roles +### Definitions files and `ff apply` -A provider is a workspace-scoped connection to external infrastructure. Definitions files reference providers by name, but the provider itself must already be registered in the workspace. +The Python file above is the source of truth for what the graph should look like — not a script that mutates Feature Form imperatively. When you run `ff apply`, Feature Form imports the file, collects the resources it defines, and treats that set as the workspace's desired state. A planner compares the submission with the current graph, and if the change is accepted, a new graph version is committed. -### Provider roles +By default, an apply is replacement-oriented: a resource that exists in the workspace but is not in the submitted set is a candidate for removal. That behavior is what makes the file a true source of truth. When you intentionally submit a partial set and want missing resources to stay untouched, you can apply in merge mode instead. -- `offline-store` for batch data and materialized datasets -- `online-store` for low-latency serving -- `compute` for transformations and materialization work -- `streaming` for streaming integrations +{{< note >}} +**Definitions files describe features, not infrastructure.** Providers and secret backends are registered separately by a workspace admin. Definitions files reference providers by name and assume they already exist. This separation keeps feature authors away from credentials and infrastructure choices. +{{< /note >}} -### Core providers documented here +For an end-to-end walkthrough of authoring a definitions file and applying it, see the [Quickstart]({{< relref "/develop/ai/featureform/quickstart" >}}). For the full apply lifecycle and editing loop, see [Define and deploy features]({{< relref "/develop/ai/featureform/define-and-deploy-features" >}}) and [Update features]({{< relref "/develop/ai/featureform/update-features" >}}). -- Postgres: `offline-store`, `compute` -- Redis: `online-store` -- S3: `offline-store` -- Spark: `compute` -- Iceberg catalog: `offline-store` +## Providers -### Workflow mapping +A provider is the workspace's connection to an external system. It carries the host, port, credentials reference, and any backend-specific configuration Feature Form needs to talk to that system. Resources in the graph reference providers by name, so a provider must be registered in the workspace before any resource that uses it can be applied. -- Datasets and training sets need an offline store. -- Feature views need an online store. -- SQL and Spark transformations need compute. -- One provider can fill more than one role. +Every provider fills one or more **roles**, which describe the kind of work it can do for the workspace: -## Secrets and secret references +| Role | What it does | +| --------------- | ----------------------------------------------------------------------- | +| `offline-store` | Holds batch data and materialized datasets the graph reads from. | +| `online-store` | Serves materialized feature values to applications at low latency. | +| `compute` | Runs transformations and materialization jobs. | +| `streaming` | Connects the workspace to streaming sources. | -Feature Form stores secret references in provider configuration instead of storing plaintext secret values itself. A provider config can contain a reference like `env:PG_PASSWORD`, which Feature Form resolves through a registered secret provider at runtime. +One provider often fills more than one role. Postgres, for example, is commonly registered as both `offline-store` and `compute` because the same instance that holds datasets can run SQL transformations against them. The documented integrations and their typical roles: -### Mental model +| Provider | Typical roles | +| ----------------- | ------------------------------ | +| Postgres | `offline-store`, `compute` | +| Redis | `online-store` | +| S3 | `offline-store` | +| Spark | `compute` | +| Iceberg catalog | `offline-store` | -- A secret provider is a workspace-scoped backend such as `env`, Vault, Kubernetes, or AWS Secrets Manager -- A secret reference is the value stored in provider config -- Data providers use secret references but do not own secret storage +The role model is what lets a graph stay portable: a feature definition doesn't care that compute happens to be Postgres in dev and Spark in prod, only that some provider fills the `compute` role. -### Default path for a new workspace +To register providers in a workspace, see [Register providers]({{< relref "/develop/ai/featureform/register-providers" >}}). -Every new workspace creates a built-in `env` secret provider. That makes references such as `env:PG_PASSWORD` valid as long as the runtime environment actually exposes `PG_PASSWORD`. +## Secrets and secret references -The important detail is runtime scope: in deployed environments, the resolving process is usually the Featureform server, not your local CLI shell. +Feature Form never stores plaintext credentials in the graph. A provider configuration carries a **secret reference** that looks like `env:PG_PASSWORD`, and Feature Form resolves that reference through a registered **secret provider** at the moment the credential is needed. -### What Featureform stores +Two consequences are worth internalizing as a new user: -- Secret provider metadata and configuration -- Secret references embedded in provider configuration +- **The graph is safe to inspect and export.** Nothing in it contains a usable credential. You can hand the graph to another team, version it, or paste it into a ticket without leaking secrets. +- **The process that resolves a reference is whichever process actually needs the credential.** In a deployed environment, that's almost always the Feature Form server, not your local CLI shell. A reference such as `env:PG_PASSWORD` reads from the server's process environment, not yours. -### What Featureform does not store +Every new workspace is created with a built-in `env` secret provider, which makes `env:` references work out of the box for local development. Production deployments typically register a Vault, Kubernetes-secrets, or AWS Secrets Manager backend instead, because the `env` backend offers no rotation, no audit, and exposes values in process listings. -- Plaintext secret values from external backends +To register a secret provider for a workspace, see [Configure secret providers]({{< relref "/develop/ai/featureform/register-providers#configure-secret-providers" >}}). -## Serving and feature views +## Feature views and serving + +A feature view is the resource that everything else in the graph eventually feeds. It is the online serving interface — the single name an application or model service uses when it asks Feature Form for the latest features about a particular entity. + +A feature view declares: + +- The entity it is keyed by (for example, `customer`). +- The list of features it exposes. +- The online provider that holds the materialized values — typically Redis. +- A materialization engine that produces those values from offline data. + +```python +customer_risk_view = ff.FeatureView( + name="customer_risk_feature_view", + entity="customer", + features=[customer_total_amount_7d, customer_transaction_count_7d], + inference_store="demo_redis", +) +``` -A feature view is the serving interface for a set of features keyed by an entity. In the documented Redis-backed workflow, the feature view is what applications and model services read from at inference time. +For a feature view to actually serve, three things must line up: the online provider it points to must be registered and reachable, the graph version that introduced the feature view must be committed, and materialization must have populated values for the entities you want to query. If any of those are missing, serving fails immediately rather than returning stale data. -### A feature view includes +The same logical operation is reachable through three surfaces, so applications can pick whichever fits their stack: -- the feature-view name -- the logical entity and key columns -- the served feature schema -- the online provider -- serving version and key-prefix details +- A gRPC service (`ServingService.Serve` and `ServingService.GetServingMetadata`). +- A REST endpoint (`POST /api/v1/serve`). +- A Python client (`client.serve(...)`). -### Serving requires +One subtle but important detail: reading feature values and reading serving metadata are governed by **separate** RBAC permissions. A dashboard or diagnostic principal can be allowed to inspect what a feature view looks like without also being allowed to read live feature values, and vice versa. -- a registered online store such as Redis -- a committed graph version containing the feature view -- ready serving metadata for that workspace and view +To serve from a feature view in an application, see [Serve features]({{< relref "/develop/ai/featureform/serve-features" >}}). To inspect datasets, training sets, or feature views directly, see [Query data]({{< relref "/develop/ai/featureform/query-data" >}}). -### Main entry points +## Next steps -- gRPC: `ServingService.Serve`, `ServingService.GetServingMetadata` -- REST: `/api/v1/serve` -- Python client: `client.serve(...)` +Now that the vocabulary is in place, the rest of the documentation maps cleanly onto these concepts: -Serving reads and serving-metadata reads are separate RBAC permissions. +- [Quickstart]({{< relref "/develop/ai/featureform/quickstart" >}}) — one end-to-end walkthrough that exercises every concept on this page. +- [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}) — create, inspect, update, and delete workspaces. +- [Register providers]({{< relref "/develop/ai/featureform/register-providers" >}}) — connect the workspace to Postgres, Redis, S3, Spark, or an Iceberg catalog, and register secret backends. +- [Define and deploy features]({{< relref "/develop/ai/featureform/define-and-deploy-features" >}}) — author a definitions file and run `ff apply`. +- [Update features]({{< relref "/develop/ai/featureform/update-features" >}}) — iterate on a graph after the first apply. +- [Serve features]({{< relref "/develop/ai/featureform/serve-features" >}}) — read from a feature view in an application. +- [Query data]({{< relref "/develop/ai/featureform/query-data" >}}) — inspect datasets, training sets, and feature views directly. From 31edbcd7ff7cbcc0604ae4688b44bac40a508d73 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Wed, 27 May 2026 10:26:25 -0500 Subject: [PATCH 09/18] intro and how the pieces fit together --- content/develop/ai/featureform/concepts.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/content/develop/ai/featureform/concepts.md b/content/develop/ai/featureform/concepts.md index f4009cec71..fe99b55144 100644 --- a/content/develop/ai/featureform/concepts.md +++ b/content/develop/ai/featureform/concepts.md @@ -5,13 +5,13 @@ linkTitle: Concepts weight: 30 --- -Redis Feature Form is a feature platform: it manages how raw data in your existing systems becomes the entity-keyed values your models read at inference time. This page introduces the vocabulary and the model behind that workflow, so the rest of the documentation reads as application rather than memorization. +Redis Feature Form is a feature platform. It turns raw data from your existing systems into the values your models read at inference time. This page introduces the core concepts behind that workflow. ## How the pieces fit together -A Feature Form deployment runs one or more **workspaces**. Each workspace owns a versioned **resource graph** that describes what features should exist, where their inputs live, and how they're served. You author that graph in a Python **definitions file** and submit it with `ff apply`. +A Feature Form deployment runs one or more **[workspaces](#workspaces)**. Each workspace owns a versioned **[resource graph](#the-resource-graph)** that describes what features should exist, where their inputs live, and how they're served. You author that graph in a Python **[definitions file](#definitions-files-and-ff-apply)** and submit it with `ff apply`. -The graph itself is data, not credentials or connections. **Providers** connect the workspace to external systems (Postgres, Redis, S3, Spark, an Iceberg catalog), and **secret references** in those providers point at a secret backend that holds the actual passwords and tokens. At the end of the chain, a **feature view** is the single resource the rest of your stack reads from to serve features online. +The graph itself is data, not credentials or connections. **[Providers](#providers)** connect the workspace to external systems (Postgres, Redis, S3, Spark, an Iceberg catalog), and **[secret references](#secrets-and-secret-references)** point to the backend that holds the credentials. At the end of the chain, a **[feature view](#feature-views-and-serving)** is the single resource the rest of your stack reads from to serve features online. Each of these terms is unpacked in the rest of this page. @@ -86,7 +86,7 @@ customer_risk_view = ff.FeatureView( ) ``` -### Definitions files and `ff apply` +### Definitions files and `ff apply` {#definitions-files-and-ff-apply} The Python file above is the source of truth for what the graph should look like — not a script that mutates Feature Form imperatively. When you run `ff apply`, Feature Form imports the file, collects the resources it defines, and treats that set as the workspace's desired state. A planner compares the submission with the current graph, and if the change is accepted, a new graph version is committed. From 7ef2ddc66044e71086d17c9432925f79a85fb41a Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Wed, 27 May 2026 15:11:15 -0500 Subject: [PATCH 10/18] concepts edits --- content/develop/ai/featureform/concepts.md | 66 +++++++++++++--------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/content/develop/ai/featureform/concepts.md b/content/develop/ai/featureform/concepts.md index fe99b55144..c6ed73c56a 100644 --- a/content/develop/ai/featureform/concepts.md +++ b/content/develop/ai/featureform/concepts.md @@ -9,30 +9,28 @@ Redis Feature Form is a feature platform. It turns raw data from your existing s ## How the pieces fit together -A Feature Form deployment runs one or more **[workspaces](#workspaces)**. Each workspace owns a versioned **[resource graph](#the-resource-graph)** that describes what features should exist, where their inputs live, and how they're served. You author that graph in a Python **[definitions file](#definitions-files-and-ff-apply)** and submit it with `ff apply`. +A Feature Form deployment runs one or more [workspaces](#workspaces). Each workspace owns a versioned [resource graph](#the-resource-graph) that describes what features should exist, where their inputs live, and how they're served. You author that graph in a Python [definitions file](#definitions-files-and-ff-apply) and apply it with `ff apply`. -The graph itself is data, not credentials or connections. **[Providers](#providers)** connect the workspace to external systems (Postgres, Redis, S3, Spark, an Iceberg catalog), and **[secret references](#secrets-and-secret-references)** point to the backend that holds the credentials. At the end of the chain, a **[feature view](#feature-views-and-serving)** is the single resource the rest of your stack reads from to serve features online. - -Each of these terms is unpacked in the rest of this page. +The graph itself is data, not credentials or connections. [Providers](#providers) connect the workspace to external systems (Postgres, Redis, S3, Spark, an Iceberg catalog), and [secret references](#secrets-and-secret-references) point to the backend that holds the credentials. At the end of the chain, a [feature view](#feature-views-and-serving) is the single resource the rest of your stack reads from to serve features online. ## Workspaces -A workspace is the tenant boundary for everything Feature Form manages. The graph, the providers, the secret references, the catalog of materialized locations, and the serving metadata all live inside one workspace and cannot leak across to another. +A workspace is a self-contained environment in Feature Form. Each one owns its own resource graph, providers, secret references, and serving metadata. Nothing is shared between workspaces. -That isolation is the unit you use to separate environments — dev, staging, prod — or to give independent teams their own slice of a shared deployment. Two workspaces can point at the same external Postgres database and still not see each other's resources, because the graph that names those resources is workspace-scoped. +Use workspaces to keep environments such as dev, staging, and prod separate, or to give independent teams their own area on a shared deployment. Two workspaces can connect to the same external Postgres database and remain fully isolated, because each workspace tracks its own resources. -A workspace also tracks `last_applied_version`, a counter that advances each time the graph commits a new version. Inspection and serving commands always read from the latest committed version, not from a draft. +Every workspace also has a `last_applied_version` counter that increases each time you successfully apply a change. Read commands always return the latest committed version. To create, inspect, update, or delete workspaces, see [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}). ## The resource graph -The resource graph is the single object that represents the desired state of a workspace. Every feature, label, transformation, dataset, and feature view belongs to that graph and references the others by name. +The resource graph describes what a workspace should look like. Every feature, label, transformation, dataset, and feature view lives in this graph and refers to the others by name. -Two properties make the graph the right mental model: +Two properties shape how you work with it: -- **It is versioned as a whole.** When you submit a change, Feature Form commits a new graph version atomically. Either everything in the change lands together or nothing does. You don't end up with half-applied feature definitions. -- **It is declarative.** You describe what the graph should look like, not the sequence of steps to get there. Feature Form is responsible for figuring out the delta between what exists and what you've asked for. +- It is versioned as a whole. Each successful change creates a new version of the entire graph. Either every resource in the change lands together, or nothing does — you never end up with half-applied feature definitions. +- It is declarative. You describe what the graph should look like, not the steps to get there. Feature Form figures out the difference between the current graph and the new one and applies only what changed. ### Resource types @@ -46,7 +44,7 @@ A graph is built from seven resource types. New users encountering Feature Form - **Training sets** join one or more features with a label on the entity key, so an offline training job reads a single time-aligned table instead of stitching things together by hand. - **Feature views** are the online serving interface for a group of features. They are the only resource that downstream applications and model services interact with directly. -A short definitions file makes the shape concrete. The reader shouldn't worry about syntax yet — the point is to see how the vocabulary above appears as code. +The following example definitions file shows how the vocabulary above appears as code. ```python import featureform as ff @@ -88,21 +86,21 @@ customer_risk_view = ff.FeatureView( ### Definitions files and `ff apply` {#definitions-files-and-ff-apply} -The Python file above is the source of truth for what the graph should look like — not a script that mutates Feature Form imperatively. When you run `ff apply`, Feature Form imports the file, collects the resources it defines, and treats that set as the workspace's desired state. A planner compares the submission with the current graph, and if the change is accepted, a new graph version is committed. +The Python definitions file is the source of truth for what the graph should look like. The file uses Python to declare resources, not to run commands against Feature Form. When you run `ff apply`, Feature Form imports the file, collects those resources, and treats them as the workspace's desired state. Feature Form compares that set with the current graph and, if the change is accepted, commits a new graph version. -By default, an apply is replacement-oriented: a resource that exists in the workspace but is not in the submitted set is a candidate for removal. That behavior is what makes the file a true source of truth. When you intentionally submit a partial set and want missing resources to stay untouched, you can apply in merge mode instead. +By default, `ff apply` replaces the workspace's current graph with the resources defined in the file. Any existing resource not in the file becomes a candidate for removal. To apply a partial set and leave missing resources untouched, run `ff apply --merge` instead. {{< note >}} -**Definitions files describe features, not infrastructure.** Providers and secret backends are registered separately by a workspace admin. Definitions files reference providers by name and assume they already exist. This separation keeps feature authors away from credentials and infrastructure choices. +Definitions files describe features, not infrastructure. Providers and secret backends are registered separately by a workspace admin. Definitions files reference providers by name and assume they already exist. This separation keeps feature authors away from credentials and infrastructure choices. {{< /note >}} For an end-to-end walkthrough of authoring a definitions file and applying it, see the [Quickstart]({{< relref "/develop/ai/featureform/quickstart" >}}). For the full apply lifecycle and editing loop, see [Define and deploy features]({{< relref "/develop/ai/featureform/define-and-deploy-features" >}}) and [Update features]({{< relref "/develop/ai/featureform/update-features" >}}). ## Providers -A provider is the workspace's connection to an external system. It carries the host, port, credentials reference, and any backend-specific configuration Feature Form needs to talk to that system. Resources in the graph reference providers by name, so a provider must be registered in the workspace before any resource that uses it can be applied. +A provider is the workspace's connection to an external system. It carries the host, port, credentials reference, and any configuration Feature Form needs to talk to that system. Resources in the graph reference providers by name, so you must register a provider in the workspace before applying any resource that uses it. -Every provider fills one or more **roles**, which describe the kind of work it can do for the workspace: +Every provider fills one or more roles, which describe the kind of work it can do for the workspace: | Role | What it does | | --------------- | ----------------------------------------------------------------------- | @@ -127,12 +125,12 @@ To register providers in a workspace, see [Register providers]({{< relref "/deve ## Secrets and secret references -Feature Form never stores plaintext credentials in the graph. A provider configuration carries a **secret reference** that looks like `env:PG_PASSWORD`, and Feature Form resolves that reference through a registered **secret provider** at the moment the credential is needed. +Feature Form never stores plaintext credentials in the graph. A provider configuration carries a secret reference. Feature Form resolves it through a registered secret provider when the credential is needed. -Two consequences are worth internalizing as a new user: +Keeping credentials out of the graph has two important consequences: -- **The graph is safe to inspect and export.** Nothing in it contains a usable credential. You can hand the graph to another team, version it, or paste it into a ticket without leaking secrets. -- **The process that resolves a reference is whichever process actually needs the credential.** In a deployed environment, that's almost always the Feature Form server, not your local CLI shell. A reference such as `env:PG_PASSWORD` reads from the server's process environment, not yours. +- The graph is safe to inspect and export. Nothing in it contains a usable credential. You can hand the graph to another team, version it, or paste it into a ticket without leaking secrets. +- The process that resolves a reference is whichever process actually needs the credential. In a deployed environment, that's almost always the Feature Form server, not your local CLI shell. A reference such as `env:PG_PASSWORD` reads from the server's process environment, not yours. Every new workspace is created with a built-in `env` secret provider, which makes `env:` references work out of the box for local development. Production deployments typically register a Vault, Kubernetes-secrets, or AWS Secrets Manager backend instead, because the `env` backend offers no rotation, no audit, and exposes values in process listings. @@ -140,15 +138,17 @@ To register a secret provider for a workspace, see [Configure secret providers]( ## Feature views and serving -A feature view is the resource that everything else in the graph eventually feeds. It is the online serving interface — the single name an application or model service uses when it asks Feature Form for the latest features about a particular entity. +A feature view is the resource that everything else in the graph eventually feeds. Applications query it to get the latest features for an entity. A feature view declares: -- The entity it is keyed by (for example, `customer`). +- The entity used as the lookup key (for example, `customer`). - The list of features it exposes. - The online provider that holds the materialized values — typically Redis. - A materialization engine that produces those values from offline data. +For example: + ```python customer_risk_view = ff.FeatureView( name="customer_risk_feature_view", @@ -158,22 +158,32 @@ customer_risk_view = ff.FeatureView( ) ``` -For a feature view to actually serve, three things must line up: the online provider it points to must be registered and reachable, the graph version that introduced the feature view must be committed, and materialization must have populated values for the entities you want to query. If any of those are missing, serving fails immediately rather than returning stale data. +### Feature view requirements + +Before applications can read from a feature view: + +- The online provider it points to must be registered and reachable. +- The graph version that introduced the feature view must be committed. +- Materialization must have populated values for the entities you want to query. + +If any of those are missing, the read fails immediately rather than returning stale data. -The same logical operation is reachable through three surfaces, so applications can pick whichever fits their stack: +### Serving interfaces + +Applications can read feature values through any of three interfaces: - A gRPC service (`ServingService.Serve` and `ServingService.GetServingMetadata`). - A REST endpoint (`POST /api/v1/serve`). - A Python client (`client.serve(...)`). -One subtle but important detail: reading feature values and reading serving metadata are governed by **separate** RBAC permissions. A dashboard or diagnostic principal can be allowed to inspect what a feature view looks like without also being allowed to read live feature values, and vice versa. +{{< note >}} +Reading feature values and reading serving metadata are governed by separate RBAC permissions. For example, a dashboard user can have access to feature view schemas without access to the actual values — or vice versa. +{{< /note >}} To serve from a feature view in an application, see [Serve features]({{< relref "/develop/ai/featureform/serve-features" >}}). To inspect datasets, training sets, or feature views directly, see [Query data]({{< relref "/develop/ai/featureform/query-data" >}}). ## Next steps -Now that the vocabulary is in place, the rest of the documentation maps cleanly onto these concepts: - - [Quickstart]({{< relref "/develop/ai/featureform/quickstart" >}}) — one end-to-end walkthrough that exercises every concept on this page. - [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}) — create, inspect, update, and delete workspaces. - [Register providers]({{< relref "/develop/ai/featureform/register-providers" >}}) — connect the workspace to Postgres, Redis, S3, Spark, or an Iceberg catalog, and register secret backends. From f9eeec89dd09b76fcd2924aa1bc7b7d1c42493c5 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Wed, 27 May 2026 16:09:23 -0500 Subject: [PATCH 11/18] manage workspaces DOC-6583 --- .../ai/featureform/manage-workspace.md | 131 ++++++++++++++++-- content/develop/ai/featureform/providers.md | 26 ---- content/operate/featureform/auth.md | 76 +--------- 3 files changed, 124 insertions(+), 109 deletions(-) diff --git a/content/develop/ai/featureform/manage-workspace.md b/content/develop/ai/featureform/manage-workspace.md index 32634f3efc..3b3b9fdbcc 100644 --- a/content/develop/ai/featureform/manage-workspace.md +++ b/content/develop/ai/featureform/manage-workspace.md @@ -1,27 +1,142 @@ --- title: Manage workspaces -description: Inspect and manage Redis Feature Form workspaces with the ff CLI. +description: Create, verify access to, monitor, and delete Redis Feature Form workspaces with the ff CLI. linkTitle: Manage workspaces weight: 20 --- -Use these commands when you need to inspect or change a workspace directly. +A workspace is a self-contained environment in Redis Feature Form. Each workspace owns its own resource graph, providers, secret references, catalog entries, and serving metadata—nothing is shared between workspaces. Use workspaces to keep environments such as dev, staging, and prod separate, or to give independent teams their own isolated area on a shared deployment. -## Core commands +Each workspace has: + +- A unique name and an optional description. +- A `last_applied_version` that tracks the most recently applied resource graph. +- A built-in `env` secret provider, created automatically. + +The tasks on this page require one of two roles: + +- A global admin (`global_admin`) creates workspaces and grants access. +- A workspace admin (`workspace_admin`) verifies their access, runs health checks, and updates or deletes the workspace. + +For the full list of built-in roles and the scope model, see [Authentication and RBAC]({{< relref "/operate/featureform/auth" >}}). + +Most commands on this page identify a workspace by its UUID, either as a positional argument or via the `--workspace` flag. Find the UUID with `ff workspace create` or `ff workspace list`. The examples below use ``, substitute the actual UUID. + +## Create a workspace and grant access + +A global admin creates a workspace and hands it off to the workspace admin who will manage it. + +### 1. Create the workspace + +```bash +ff workspace create demo-workspace \ + --description "Workspace for the feature workflow docs path" +``` + +The command returns a table with the new workspace's ID, name, description, version, and timestamps: + +```text +ID NAME DESCRIPTION VERSION CREATED UPDATED +7f2e4d8c-3a91-4b6d-9f0a-5e8c1b2d3f4a demo-workspace Workspace for the feature workflow ... 0 2026-05-12 14:03:21 2026-05-12 14:03:21 +``` + +Save the ID—you'll need it for the RBAC commands below. + +### 2. Grant workspace-admin access + +```bash +ff rbac grant workspace_admin \ + --workspace \ + --user alice@example.com +``` + +To bind a group or service account instead, use `--group ` or `--service-account `. Exactly one of `--user`, `--group`, or `--service-account` is required. + +### 3. Verify the binding + +```bash +ff rbac list --workspace +``` + +You should see the new role bound to the user, group, or service account you specified. For an alternate view that groups bindings by user, group, or service account instead of one row per binding, use `ff rbac subjects --workspace `. + +Creating a workspace does not automatically grant other users access—each member needs their own binding. + +## Confirm access to a workspace + +Use these checks when a workspace already exists and you need to confirm you can register providers, apply resources, or serve features. ```bash +# Verify your identity. +ff auth whoami +ff rbac whoami + +# Confirm the workspace is visible to you. ff workspace list ff workspace get --name demo-workspace + +# Confirm your binding. +ff rbac list --workspace +``` + +You should see your user, group, or service-account binding listed. If you don't, ask a global admin to grant access using the steps above. + +## Check workspace health + +Run these commands routinely, or whenever something looks wrong, to confirm a workspace is healthy: + +```bash +# Confirm the CLI can reach the deployment. +ff ping + +# Inspect workspace metadata, including last_applied_version. +ff workspace get + +# List configured providers and secret providers. +ff provider list --workspace +ff secret-provider list --workspace + +# Inspect graph overview and stats. +ff graph workspace stats --workspace + +# List catalog locations for materialized resources. +ff catalog list --workspace +``` + +Also confirm that your serving and dataframe clients point at the expected transport and state backend. + +## Update a workspace + +Change a workspace's name or description: + +```bash ff workspace update \ --name demo-workspace \ --description "Updated description" +``` + +Update affects metadata only—it doesn't touch providers, the resource graph, or catalog entries. + +## Delete a workspace + +{{< warning >}} +Deleting a workspace permanently removes all workspace-scoped data: providers, secret references, the resource graph, catalog entries, and serving metadata. This cannot be undone. +{{< /warning >}} + +```bash ff workspace delete --force ``` -## Workspace state to remember +`--force` skips the interactive confirmation prompt. Omit it for a safer, interactive delete. + +## Troubleshooting + +- **`permission denied` on provider or apply commands.** Your account is missing workspace write access. Run `ff rbac list --workspace ` to confirm the binding, and ask a global admin to grant the appropriate role if it's missing. +- **`workspace not found`.** Usually means the wrong deployment, the wrong transport, or a typo in the workspace name. Try `ff workspace list` to see what's actually visible. +- **Missing workspaces, providers, or resources after apply.** With memory-backed state, gRPC and REST can behave like separate state domains. Check for transport mismatches first, and use durable PostgreSQL-backed state for shared environments. -- workspaces have unique names and optional descriptions -- each workspace tracks `last_applied_version` -- providers, secret providers, graph state, catalog entries, and serving metadata are workspace-scoped +## Next steps -Deleting a workspace removes its associated workspace-scoped data. +- [Register providers]({{< relref "/develop/ai/featureform/register-providers" >}}) to connect a workspace to its storage, compute, and catalog systems. +- See [Authentication and RBAC]({{< relref "/operate/featureform/auth" >}}) for the deployment-wide role and scope model. +- See [Concepts]({{< relref "/develop/ai/featureform/concepts" >}}) for the workspace, resource graph, and serving model. diff --git a/content/develop/ai/featureform/providers.md b/content/develop/ai/featureform/providers.md index 468e54c88c..191edad964 100644 --- a/content/develop/ai/featureform/providers.md +++ b/content/develop/ai/featureform/providers.md @@ -174,29 +174,3 @@ print(features) - if the feature view is not ready, serving fails - if the online provider is unavailable or unsupported, serving fails - serving-metadata permissions and serving-read permissions are separate RBAC checks - - -## Operate a workspace -Use this how-to for routine operational checks after a workspace is already created and in use. - -### Day-2 checklist - -- verify connectivity with `ff ping` -- inspect workspace metadata and `last_applied_version` -- inspect providers and secret providers -- inspect graph overview and stats -- inspect catalog locations -- confirm serving and dataframe clients point at the expected transport and state backend - -### Useful commands - -```bash -ff ping -ff workspace get -ff provider list --workspace -ff secret-provider list --workspace -ff graph workspace stats --workspace -ff catalog list --workspace -``` - -With memory-backed state, check transport mismatches first when users report missing workspaces, providers, or applied resources. diff --git a/content/operate/featureform/auth.md b/content/operate/featureform/auth.md index 0547e81ab1..14439ae3f9 100644 --- a/content/operate/featureform/auth.md +++ b/content/operate/featureform/auth.md @@ -33,78 +33,4 @@ Feature Form separates deployment-wide administration from workspace-scoped acti - Workspace scope controls providers, secret providers, apply, graph, and audit inside one workspace. - Resource-constrained scope is used for limited serving or training-set access. -## Create a workspace and grant access - -Use this flow when a global admin is creating a new workspace and handing it off to the team that will manage it. - -### 1. Create the workspace - -```bash -ff workspace create demo-workspace \ - --description "Workspace for the feature workflow docs path" -``` - -### 2. Verify it exists - -```bash -ff workspace get --name demo-workspace -ff workspace list -``` - -Capture the workspace ID from the result for later RBAC commands. - -### 3. Grant workspace-admin access - -```bash -ff rbac grant workspace_admin \ - --workspace \ - --user alice@example.com -``` - -You can also bind a group or service account instead of a user. - -### 4. Verify the binding - -```bash -ff rbac list --workspace -ff rbac subjects --workspace -``` - -### Notes - -- Creating the workspace does not automatically grant workspace membership to other principals. -- New workspaces create a built-in `env` secret provider, but it is still workspace-scoped. -- In-memory state can make gRPC and REST behave like separate state domains. Use durable PostgreSQL-backed state for shared environments. - -## Join an existing workspace - -Use this page when a workspace already exists and you need to confirm that the intended principal can proceed with setup, apply, or serving. - -### 1. Verify identity - -```bash -ff auth whoami -ff rbac whoami -``` - -### 2. Confirm the workspace is visible - -```bash -ff workspace list -ff workspace get --name demo-workspace -``` - -### 3. Confirm the effective binding - -```bash -ff rbac list --workspace -``` - -You should see the expected user, group, or service-account binding for that workspace. - -### Common failures - -- `permission denied` on provider or apply commands usually means missing workspace write access. -- `workspace not found` usually means the wrong deployment, wrong transport, or wrong workspace name. -- Missing resources after apply can indicate transport or state-backend mismatch in non-durable environments. - +For the workspace lifecycle—creating a workspace, granting access, joining as a member, and day-2 operations—see [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}). From fe037d43ac385df2b06f5fc3342a365f0baea76b Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Wed, 27 May 2026 16:32:46 -0500 Subject: [PATCH 12/18] auth rough draft DOC-6590 --- .../ai/featureform/manage-workspace.md | 4 +- content/operate/featureform/auth.md | 36 --------------- content/operate/featureform/configure-auth.md | 46 +++++++++++++++++++ 3 files changed, 48 insertions(+), 38 deletions(-) delete mode 100644 content/operate/featureform/auth.md create mode 100644 content/operate/featureform/configure-auth.md diff --git a/content/develop/ai/featureform/manage-workspace.md b/content/develop/ai/featureform/manage-workspace.md index 3b3b9fdbcc..c36c9ee34a 100644 --- a/content/develop/ai/featureform/manage-workspace.md +++ b/content/develop/ai/featureform/manage-workspace.md @@ -18,7 +18,7 @@ The tasks on this page require one of two roles: - A global admin (`global_admin`) creates workspaces and grants access. - A workspace admin (`workspace_admin`) verifies their access, runs health checks, and updates or deletes the workspace. -For the full list of built-in roles and the scope model, see [Authentication and RBAC]({{< relref "/operate/featureform/auth" >}}). +For the full list of built-in roles and the scope model, see [Authentication and RBAC]({{< relref "/operate/featureform/configure-auth" >}}). Most commands on this page identify a workspace by its UUID, either as a positional argument or via the `--workspace` flag. Find the UUID with `ff workspace create` or `ff workspace list`. The examples below use ``, substitute the actual UUID. @@ -138,5 +138,5 @@ ff workspace delete --force ## Next steps - [Register providers]({{< relref "/develop/ai/featureform/register-providers" >}}) to connect a workspace to its storage, compute, and catalog systems. -- See [Authentication and RBAC]({{< relref "/operate/featureform/auth" >}}) for the deployment-wide role and scope model. +- See [Authentication and RBAC]({{< relref "/operate/featureform/configure-auth" >}}) for the deployment-wide role and scope model. - See [Concepts]({{< relref "/develop/ai/featureform/concepts" >}}) for the workspace, resource graph, and serving model. diff --git a/content/operate/featureform/auth.md b/content/operate/featureform/auth.md deleted file mode 100644 index 14439ae3f9..0000000000 --- a/content/operate/featureform/auth.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -Title: Authentication and RBAC -alwaysopen: false -categories: -- docs -- operate -- featureform -description: Manage Feature Form auth and RBAC -linkTitle: Authentication and RBAC -weight: 70 -bannerText: Feature Form is currently in preview and subject to change. Feature Form Docker images are available on Docker Hub; contact your Redis account team for a license key to deploy. -bannerChildren: true ---- -Feature Form separates deployment-wide administration from workspace-scoped actions. A workspace is the isolation boundary, but membership and permissions are managed separately through RBAC bindings. - -## Built-in roles - -- `global_admin` for deployment-wide administration and workspace creation -- `workspace_admin` for workspace setup, membership, apply, and audit -- `operator` for operational workflows -- `viewer` for read-only workspace visibility -- `model` for constrained reads of feature views and training sets - -## Typical handoff - -1. A global admin creates the workspace. -2. The global admin grants `workspace_admin` to the intended principal. -3. That principal verifies access before registering providers or applying resources. - -## Scope model - -- Global scope controls deployment-wide actions. -- Workspace scope controls providers, secret providers, apply, graph, and audit inside one workspace. -- Resource-constrained scope is used for limited serving or training-set access. - -For the workspace lifecycle—creating a workspace, granting access, joining as a member, and day-2 operations—see [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}). diff --git a/content/operate/featureform/configure-auth.md b/content/operate/featureform/configure-auth.md new file mode 100644 index 0000000000..d4ace9fe30 --- /dev/null +++ b/content/operate/featureform/configure-auth.md @@ -0,0 +1,46 @@ +--- +Title: Configure authentication and RBAC +aliases: +- /operate/featureform/auth/ +alwaysopen: false +categories: +- docs +- operate +- featureform +description: Configure deployment-wide authentication and RBAC for Redis Feature Form. +linkTitle: Configure auth and RBAC +weight: 70 +bannerText: Feature Form is currently in preview and subject to change. Feature Form Docker images are available on Docker Hub; contact your Redis account team for a license key to deploy. +bannerChildren: true +--- + +Redis Feature Form separates deployment-wide administration from workspace-scoped actions. A [workspace]({{< relref "/develop/ai/featureform/concepts#workspaces" >}}) isolates resources; RBAC bindings control who can act on it. + +This page covers the built-in roles, the scope model, and the typical handoff between a global admin and a workspace admin. For the CLI commands that grant access and verify bindings, see [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}). + +## Built-in roles + +Feature Form ships with five built-in RBAC roles. The role ID in the left column is the literal string used in `ff rbac grant`. + +| Role ID | Scope | What it grants | +| --- | --- | --- | +| `viewer` | Workspace | Read-only access to workspace metadata, the resource graph, the catalog, providers, and serving metadata. | +| `operator` | Workspace | Everything a viewer has, plus writing providers, planning and applying changes, reading served features, and controlling scheduler workflows. | +| `workspace_admin` | Workspace | Full administration of a single workspace—membership, audit, updates, deletion—plus everything an operator has. | +| `global_admin` | Global | Workspace creation, plus full administration across every workspace in the deployment. | +| `model` | Resource-constrained | Read access to a specific set of feature views, training sets, and serving data—nothing else. Used for model-team service accounts. | + +## Scope model + +Feature Form has three scopes. Each role works at exactly one of them. + +- **Global** Deployment-wide actions, such as creating workspaces. Only `global_admin` operates at this scope. +- **Workspace** Actions inside a single workspace: providers, secret providers, apply, graph, catalog, serving metadata, and audit. A binding at this scope applies to one workspace only—grant the role again on each workspace a user needs. +- **Resource-constrained** A narrower form of workspace scope that limits a binding to a specific set of resources. Used for the `model` role, which only sees serving and training-set reads for the resources it was bound to. + +A binding pairs a role with a scope and a user, group, or service account. For example: "Alice has `workspace_admin` on workspace `7f2e4d8c-…`" or "the `payments-team` group has `global_admin`." + +## Next steps + +- [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}) for the commands that create workspaces, grant roles, and verify bindings. +- [Concepts]({{< relref "/develop/ai/featureform/concepts" >}}) for background on workspaces and the resource graph. From 79a018d79c75d476db0b1efe2b66bb59f195af6a Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Thu, 28 May 2026 10:46:27 -0500 Subject: [PATCH 13/18] auth changes --- content/develop/ai/featureform/reference.md | 42 +++++- content/operate/featureform/configure-auth.md | 142 +++++++++++++++++- 2 files changed, 173 insertions(+), 11 deletions(-) diff --git a/content/develop/ai/featureform/reference.md b/content/develop/ai/featureform/reference.md index 71a7066a02..260f9e2ad4 100644 --- a/content/develop/ai/featureform/reference.md +++ b/content/develop/ai/featureform/reference.md @@ -1,6 +1,38 @@ --- -title: -description: -linkTitle: -weight: 1 ---- \ No newline at end of file +title: Redis Feature Form reference +description: Reference data for the ff CLI, Python client, gRPC API, and RBAC permissions. +linkTitle: Reference +weight: 100 +--- + +This page collects raw reference data for Redis Feature Form. Use it as a lookup—conceptual material lives in the [Concepts]({{< relref "/develop/ai/featureform/concepts" >}}) page, and task-oriented procedures live in the other pages in this section. + +## Permissions + +Each built-in RBAC role is a fixed set of permissions. The role table on [Configure authentication and RBAC]({{< relref "/operate/featureform/configure-auth#built-in-roles" >}}) is the usual way to think about access; the catalog below is what the authorization service actually checks. + +| Permission ID | Category | Resource scope | What it grants | +| --- | --- | --- | --- | +| `workspace.create` | workspace | deployment | Create new workspaces. | +| `workspace.read` | workspace | workspace | Read workspace metadata. | +| `workspace.list` | workspace | deployment | List visible workspaces. | +| `workspace.update` | workspace | workspace | Update workspace metadata. | +| `workspace.delete` | workspace | workspace | Delete a workspace. | +| `workspace.membership.manage` | workspace | workspace | Manage workspace RBAC bindings. | +| `graph.read` | graph | workspace | Read graph and resource metadata. | +| `catalog.read` | catalog | workspace | Read catalog metadata. | +| `provider.read` | infrastructure | workspace | Read provider definitions. | +| `provider.write` | infrastructure | workspace | Mutate provider definitions. | +| `secret_provider.read` | infrastructure | workspace | Read secret-provider definitions. | +| `secret_provider.write` | infrastructure | workspace | Mutate secret-provider definitions. | +| `apply.plan` | mutation | workspace | Run apply planning. | +| `apply.write` | mutation | workspace | Apply workspace changes. | +| `serving.metadata.read` | data | workspace or resource | Read serving metadata. | +| `serving.read` | data | workspace or resource | Read served feature values. | +| `dataframe.read` | data | workspace | Read dataframe data. | +| `training_set.read` | data | workspace or resource | Read training-set data. | +| `scheduler.read` | operations | workspace | Read scheduler state. | +| `scheduler.control` | operations | workspace | Control scheduler state. | +| `audit.read` | audit | workspace or deployment | Read audit logs. | +| `machine_credential.read` | machine credentials | workspace | Read machine credentials. | +| `machine_credential.write` | machine credentials | workspace | Create, rotate, and revoke machine credentials. | diff --git a/content/operate/featureform/configure-auth.md b/content/operate/featureform/configure-auth.md index d4ace9fe30..b24fe6b0bd 100644 --- a/content/operate/featureform/configure-auth.md +++ b/content/operate/featureform/configure-auth.md @@ -14,13 +14,82 @@ bannerText: Feature Form is currently in preview and subject to change. Feature bannerChildren: true --- -Redis Feature Form separates deployment-wide administration from workspace-scoped actions. A [workspace]({{< relref "/develop/ai/featureform/concepts#workspaces" >}}) isolates resources; RBAC bindings control who can act on it. +Redis Feature Form authenticates users and services through an external OIDC identity provider, then authorizes their actions through built-in RBAC roles. Authentication is a deployment-wide concern configured at install time; authorization is per-workspace (with a small number of deployment-scoped exceptions) and managed at runtime through role bindings. -This page covers the built-in roles, the scope model, and the typical handoff between a global admin and a workspace admin. For the CLI commands that grant access and verify bindings, see [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}). +A [workspace]({{< relref "/develop/ai/featureform/concepts#workspaces" >}}) isolates resources; RBAC bindings control who can act on it. -## Built-in roles +## Authentication -Feature Form ships with five built-in RBAC roles. The role ID in the left column is the literal string used in `ff rbac grant`. +### Configure OIDC at deploy time + +Set Feature Form's OIDC parameters in the Helm chart's `auth` block. At minimum, you need an issuer URL and a server-side client ID: + +```yaml +auth: + enabled: true + oidcIssuerURL: "https://idp.example.com/realms/featureform" + oidcClientID: "featureform-server" + + # CLI client. Defaults to "featureform-cli". + oidcCLIClientID: "featureform-cli" + + # Comma-separated list. Restricts which flows the CLI offers. + # Supported values: device_code, authorization_code_pkce + oidcCLILoginMethods: "device_code,authorization_code_pkce" + + # Required only if you use authorization_code_pkce. Must be + # registered with the IdP for the CLI client. + oidcCLIRedirectURI: "http://localhost:8080/callback" +``` + +For deployments where internal services reach the IdP at a different URL than external clients, use `oidcDiscoveryURL`, `oidcPublicIssuerURL`, and `oidcPublicDiscoveryURL` to split the discovery and issuer endpoints. The `oidcSkipIssuerCheck: true` flag disables issuer-claim validation and should only be used during local development. + +Feature Form reads role information from JWT claims on each request. It checks the following claims, in order, for matches against built-in role IDs: + +- `featureform_roles` (string or array) +- `roles` (string or array) +- `role` (string) +- `realm_access.roles` (array; Keycloak convention) + +If any of those claims contain `global_admin`, the user is treated as a global admin for that token's lifetime without a database binding. This is the typical way operators bootstrap the first admin—see [Provision the first global admin](#provision-the-first-global-admin). + +### Sign in with the CLI + +The `ff auth` commands handle login, session inspection, and token retrieval: + +```bash +# Interactive login. Defaults to device-code flow if the IdP +# supports it; falls back to authorization_code_pkce otherwise. +ff auth login + +# Force a specific flow. +ff auth login --login-method device_code +ff auth login --login-method authorization_code_pkce + +# Non-interactive password grant (CI, scripts). +ff auth login --username alice@example.com --password-stdin + +# Inspect the current session. +ff auth status +ff auth whoami + +# Print the active access token (for use in tools that don't +# integrate with the CLI session). +ff auth token + +# Clear the local session. Does not revoke tokens on the IdP. +ff auth logout +``` + +CLI sessions are stored per profile on the local machine. To skip interactive login entirely, set `FEATUREFORM_TOKEN` to a valid access token, or configure a service account with client credentials (see [Service accounts and machine credentials](#service-accounts-and-machine-credentials)). + +## RBAC + +### Built-in roles + +Feature Form has five built-in RBAC roles. The role ID in the left column is the literal string used in `ff rbac grant`. + +Each built-in role is a fixed set of finer-grained permissions—the underlying checks the authorization service runs on each request. For the full permission catalog, see [Reference > Permissions]({{< relref "/develop/ai/featureform/reference#permissions" >}}). | Role ID | Scope | What it grants | | --- | --- | --- | @@ -30,9 +99,10 @@ Feature Form ships with five built-in RBAC roles. The role ID in the left column | `global_admin` | Global | Workspace creation, plus full administration across every workspace in the deployment. | | `model` | Resource-constrained | Read access to a specific set of feature views, training sets, and serving data—nothing else. Used for model-team service accounts. | -## Scope model -Feature Form has three scopes. Each role works at exactly one of them. +### Role scopes + +Every role applies at a defined breadth—deployment-wide, a single workspace, or a specific set of resources within a workspace. Feature Form has three scopes, and each role works at exactly one: - **Global** Deployment-wide actions, such as creating workspaces. Only `global_admin` operates at this scope. - **Workspace** Actions inside a single workspace: providers, secret providers, apply, graph, catalog, serving metadata, and audit. A binding at this scope applies to one workspace only—grant the role again on each workspace a user needs. @@ -40,6 +110,66 @@ Feature Form has three scopes. Each role works at exactly one of them. A binding pairs a role with a scope and a user, group, or service account. For example: "Alice has `workspace_admin` on workspace `7f2e4d8c-…`" or "the `payments-team` group has `global_admin`." +### Provision the first global admin + +A fresh Feature Form deployment has no role bindings in its database. To get the first global admin in place, choose one of two paths: + +**Map an IdP claim to `global_admin` (recommended for production).** Configure your IdP to issue a `featureform_roles` claim that contains `global_admin` for the appropriate user or group. Feature Form treats those tokens as global admin without a database binding, so the first admin can sign in and start granting roles to others immediately. + +**Bind manually after the first login.** A user with no role can still authenticate; they just can't do anything yet. From a host that already has an access token for a privileged account, run: + +```bash +ff rbac grant global_admin --global --user +``` + +This option requires that *some* identity already has `global_admin`, which makes it suitable only for redirecting access from a temporary IdP-claim admin to a database-bound one, or for environments where you can run `ff` commands with a bootstrap token issued out-of-band. + +There is no dedicated Helm value for an initial admin. Plan your IdP claim mapping before installing. + +## Service accounts and machine credentials + +Non-human identities—CI runners, model-serving processes, batch jobs—authenticate with a service account that holds a public key registered with Feature Form. Feature Form supports Ed25519 keys today. + +Create a credential for a service account inside a workspace: + +```bash +ff machine-credential create ci-runner-key \ + --workspace \ + --service-account \ + --public-key "" \ + --algorithm Ed25519 +``` + +The `ff machine-credential` command also has subcommands for `list`, `get`, `rotate`, `revoke`, and `usage` (for audit-style usage records). All of them require the `machine_credential.write` or `machine_credential.read` permission on the target workspace. + +Grant the service account a workspace role the same way you would a user—use `--service-account ` instead of `--user `: + +```bash +ff rbac grant operator \ + --workspace \ + --service-account ci-runner +``` + +## Audit + +Feature Form records authorization-relevant events in an audit log. List events with: + +```bash +ff audit list \ + --workspace \ + --event-type workspace.delete \ + --page-size 50 +``` + +Useful filters: + +- `--workspace ` — scope to one workspace. +- `--global` — only deployment-scoped events. Requires `global_admin`. +- `--principal-id ` — events for a specific user, group, or service account. +- `--event-type ` — filter by event name (`workspace.create`, `rbac.grant`, `apply.write`, and so on). + +Each event includes the scope, workspace ID (if applicable), actor ID, event type, and creation timestamp. Reading the log requires the `audit.read` permission; deployment-scope reads additionally require `global_admin`. + ## Next steps - [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}) for the commands that create workspaces, grant roles, and verify bindings. From 49752f7f28745a097be29f3df8447f8d8868fa1d Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Tue, 16 Jun 2026 11:15:40 -0500 Subject: [PATCH 14/18] remove old pages --- .../develop/ai/featureform/configure-auth.md | 6 - .../datasets-and-transformations.md | 234 ------------------ .../ai/featureform/features-and-labels.md | 96 ------- content/develop/ai/featureform/providers.md | 176 ------------- content/develop/ai/featureform/streaming.md | 6 - 5 files changed, 518 deletions(-) delete mode 100644 content/develop/ai/featureform/configure-auth.md delete mode 100644 content/develop/ai/featureform/datasets-and-transformations.md delete mode 100644 content/develop/ai/featureform/features-and-labels.md delete mode 100644 content/develop/ai/featureform/providers.md delete mode 100644 content/develop/ai/featureform/streaming.md diff --git a/content/develop/ai/featureform/configure-auth.md b/content/develop/ai/featureform/configure-auth.md deleted file mode 100644 index 71a7066a02..0000000000 --- a/content/develop/ai/featureform/configure-auth.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -title: -description: -linkTitle: -weight: 1 ---- \ No newline at end of file diff --git a/content/develop/ai/featureform/datasets-and-transformations.md b/content/develop/ai/featureform/datasets-and-transformations.md deleted file mode 100644 index c2183ba515..0000000000 --- a/content/develop/ai/featureform/datasets-and-transformations.md +++ /dev/null @@ -1,234 +0,0 @@ ---- -title: Reference -description: Reference documentation for Featureform APIs, CLI commands, Python surfaces, and RBAC. -linkTitle: Reference -weight: 90 ---- - -Use these pages when you need current interfaces rather than a guided workflow. - -## Featureform gRPC services - -```json metadata -{ - "title": "Featureform gRPC services", - "description": "Review the current public Featureform gRPC service surface, resource APIs, and major message types.", - "categories": null, - "tableOfContents": {"sections":[{"id":"service-index","title":"Service index"},{"id":"notable-service-areas","title":"Notable service areas"},{"id":"important-apply-fields","title":"Important apply fields"},{"id":"common-provider-related-models","title":"Common provider-related models"}]} - -, - "codeExamples": [] -} -``` -This section indexes the public gRPC API surface exposed by Feature Form. - -### Service index - -| Service | Purpose | -| --- | --- | -| `WorkspaceService` | Workspace CRUD and lookup | -| `ProviderService` | Workspace-scoped provider CRUD | -| `SecretProviderService` | Workspace-scoped secret-provider CRUD | -| `ApplyService` | Declarative graph apply and dry-run planning | -| `ResourceService` | Graph browsing, lineage, search, versions, and workspace stats | -| `CatalogService` | Physical catalog inspection | -| `ServingService` | Online serving and serving metadata | -| `DataframeService` | Dataframe plan resolution | -| `RbacService` | Roles, permissions, access, and bindings | -| `MachineCredentialService` | Machine credential lifecycle | -| `AuditService` | Audit log listing | -| `VersionService` | Version compatibility and auth discovery metadata | - -### Notable service areas - -- `WorkspaceService`: create, get, list, update, delete -- `ProviderService`: register, get, list, update, delete -- `SecretProviderService`: register, get, list, update, delete -- `ApplyService`: apply and plan -- `ResourceService`: per-resource get and list plus lineage, impact, versions, and workspace stats - -### Important apply fields - -- `workspace_id` -- `resources` -- `dry_run` -- `apply_strategy` -- `execution_mode` - -### Common provider-related models - -- `PostgresConfig` -- `SnowflakeConfig` -- `S3Config` -- `SparkProviderConfig` -- `ProviderHealth` -- `SecretRef` - -## Feature Form CLI - -This section documents the current public `ff` CLI surface. - -### Global flags - -Connection and transport: - -- `--server`, `-s` -- `--grpc-server` -- `--transport rest|grpc` -- `--timeout`, `-t` -- `--no-tls` - -Authentication: - -- `--token` -- `--client-id` -- `--client-secret` -- `--issuer-url` - -CLI behavior: - -- `--output`, `-o` -- `--config` -- `--no-color` -- `--verbose`, `-v` -- `--skip-version-check` - -### Top-level commands - -- `ff version` -- `ff ping` -- `ff workspace` -- `ff provider` -- `ff secret-provider` -- `ff apply` -- `ff auth` -- `ff rbac` -- `ff machine-credential` -- `ff audit` -- `ff catalog` -- `ff graph` -- `ff scheduler` -- `ff dataframe` -- `ff config` - -### Transport note - -The CLI defaults to REST in code, but many operational examples use explicit gRPC. In memory-backed deployments, REST and gRPC do not share one durable state backend. - -## Python client and DSL reference - -```json metadata -{ - "title": "Python client and DSL reference", - "description": "Review the main Featureform Python client APIs, resource types, helpers, and common authoring patterns.", - "categories": null, - "tableOfContents": {"sections":[{"id":"client-apis","title":"Client APIs"},{"id":"common-error-types","title":"Common error types"},{"id":"secret-provider-and-provider-helpers","title":"Secret-provider and provider helpers"},{"id":"core-resource-types","title":"Core resource types"},{"id":"common-pattern","title":"Common pattern"}]} - -, - "codeExamples": [] -} -``` -This section indexes the main public Python surface exported from `featureform`. - -### Client APIs - -- `ff.Client` -- `ff.WorkspaceClient` -- `ff.ProviderClient` -- `ff.SecretProviderClient` -- `ff.ApplyResult` -- `ff.ApplyWaitResult` - -### Common error types - -- `ff.FeatureformError` -- `ff.ConnectionError` -- `ff.InvalidArgumentError` -- `ff.NotFoundError` -- `ff.TimeoutError` -- `ff.ValidationError` - -### Secret-provider and provider helpers - -- `ff.EnvSecretProvider` -- `ff.VaultSecretProvider` -- `ff.K8sSecretProvider` -- `ff.PostgresProvider` -- `ff.RedisProvider` -- `ff.S3Provider` -- `ff.SparkProvider` -- `ff.get_postgres(name)` -- `ff.get_provider(name)` - -### Core resource types - -- `ff.Entity` -- `ff.Dataset` -- `ff.Feature` -- `ff.Label` -- `ff.TrainingSet` -- `ff.FeatureView` - -### Common pattern - -```python -postgres = ff.get_postgres("demo_postgres") -transactions = postgres.dataset(name="transactions") -``` - -The onboarding quickstart in this repo uses an explicit `resources = [...]` list because it is easier to reason about during apply. - -## RBAC roles and persmissions - -# Roles and permissions - -```json metadata -{ - "title": "Roles and permissions", - "description": "Review the built-in Featureform RBAC roles, permission areas, and useful inspection commands.", - "categories": null, - "tableOfContents": {"sections":[{"id":"roles","title":"Roles"},{"id":"permission-areas","title":"Permission areas"},{"id":"useful-inspection-commands","title":"Useful inspection commands"}]} - -, - "codeExamples": [] -} -``` -This section summarizes the built-in RBAC catalog exposed by the current authorization service. - -### Roles - -- `viewer` for read-only workspace visibility -- `operator` for resource and scheduler operations -- `workspace_admin` for full workspace administration -- `global_admin` for deployment-wide administration -- `model` for constrained serving and training-set access - -### Permission areas - -- workspace -- graph -- catalog -- provider -- secret provider -- apply -- serving -- dataframe -- training set -- scheduler -- audit -- machine credential - -`model` is not a reduced workspace-admin role. It depends on explicit resource bindings for the feature views or training sets it can read. - -### Useful inspection commands - -```bash -ff --transport grpc --grpc-server localhost:9090 --no-tls rbac roles -ff --transport grpc --grpc-server localhost:9090 --no-tls rbac permissions -``` - - - - - - diff --git a/content/develop/ai/featureform/features-and-labels.md b/content/develop/ai/featureform/features-and-labels.md deleted file mode 100644 index 5eaa0a0ace..0000000000 --- a/content/develop/ai/featureform/features-and-labels.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: Tutorials -description: Step through guided Feature Form workflows for definitions files and end-to-end resource setup. -linkTitle: Tutorials -weight: 50 ---- - -Use these walkthroughs when you want more context than a short how-to provides. - -## Work with a python definitions file - -Featureform treats a Python definitions file as the source of a desired resource graph. The quickstart example in this repo is intentionally small so you can see how the pieces fit together. - -### Typical file structure - -- import `featureform as ff` -- define entities and datasets -- define transformations -- define features and labels -- define a training set and feature view -- export a `resources = [...]` list - -### Supported loading patterns - -`ff apply` loads resources from Python in this order: - -1. an explicit `resources = [...]` list -2. the auto-registration registry, if no explicit list is present - -The explicit list is the clearer onboarding pattern and is what the published quickstart uses. - -### The file should reference - -- registered provider names such as `demo_postgres` and `demo_redis` -- secret references such as `env:PG_PASSWORD` -- stable resource names that make sense across re-apply cycles - -### The file should not do - -- replace provider registration -- assume providers exist before the workspace registers them -- mix infrastructure provisioning into the definitions entrypoint - -## Build your first feature workflow - -Use this tutorial when you want a guided run through the core Featureform path rather than a short checklist. - -### 1. Verify the workspace - -```bash -ff workspace get --name -ff secret-provider get env --workspace -``` - -### 2. Register providers - -Use the provider setup guides for: - -- [Postgres]() -- [Redis]() - -### 3. Review the definitions file - -The quickstart definitions entrypoint is resources.py. - -### 4. Apply the resources - -```bash -ff apply \ - --workspace \ - --file examples/featureform/docs/resources.py \ - --wait \ - --wait-for finished -``` - -### 5. Inspect the workspace - -```bash -ff graph workspace stats --workspace -ff graph feature list --workspace -ff catalog list --workspace -``` - -### 6. Iterate safely - -```bash -ff apply \ - --workspace \ - --file examples/featureform/docs/resources.py \ - --plan -``` - -### 7. Serve from the feature view - -Continue with the Serve features how-to. - diff --git a/content/develop/ai/featureform/providers.md b/content/develop/ai/featureform/providers.md deleted file mode 100644 index 191edad964..0000000000 --- a/content/develop/ai/featureform/providers.md +++ /dev/null @@ -1,176 +0,0 @@ ---- -title: How-to guides -description: Register offline providers and Redis online stores for Feature Form workflows. -linkTitle: How-to guides -weight: 40 ---- - -Perform common Feature Form tasks for secrets, providers, apply, catalog inspection, and serving. - -## Apply a definitions file - -`ff apply` executes one Python entrypoint, collects the resources it defines, and submits that set as desired state for a single workspace. - -### What `--file` can point to - -- a Python file -- a package `__init__.py` -- a package directory that contains `__init__.py` - -```bash -ff apply --workspace --file examples/featureform/docs/resources.py --plan -``` - -### Loading order - -`ff apply` loads resources in this order: - -1. `resources = [...]` from the entry module, if present -2. the resource registry, if no explicit list exists - -### Preview with `--plan` - -```bash -ff apply \ - --workspace \ - --file examples/featureform/docs/resources.py \ - --plan -``` - -Use this before large changes or whenever the file might be incomplete relative to the workspace's full desired state. - -### Standard apply - -```bash -ff apply \ - --workspace \ - --file examples/featureform/docs/resources.py \ - --wait \ - --wait-for finished -``` - -### Apply modes - -- default apply: replacement-oriented desired state -- `--merge`: safer for intentionally partial definition sets -- `--update`: exposed in the CLI, but treat as provisional -- `--full-rematerialize`: also exposed, but treat as provisional - -Only one of `--merge`, `--update`, or `--full-rematerialize` can be used at a time. - -If neither an explicit `resources` list nor any auto-registered resources are present after the entrypoint executes, `ff apply` fails. - -## Update resources and rematerialize - -After the first successful apply, most Feature Form work is an iteration loop: edit the definitions file, preview the delta, apply the change, and inspect the resulting graph or catalog state. - -### Typical cycle - -1. Change a resource definition. -2. Run a plan. -3. Apply the change. -4. Verify the resulting graph or catalog state. - -```bash -ff apply \ - --workspace \ - --file examples/featureform/docs/resources.py \ - --plan -``` - -### When to use `--merge` - -Use `--merge` when the file you are applying is intentionally partial and omitted resources should not be treated as deletions. - -### Caution with `--update` and `--full-rematerialize` - -The CLI exposes both flags, but the current user-facing workflow is not as mature or as well documented as normal apply and merge. Use them only when your deployment has already validated that behavior. - -### Verify the outcome - -```bash -ff graph workspace overview --workspace -ff catalog list --workspace -``` - -## Inspect materialized locations - -The Feature Form catalog records where resources managed by Feature Form physically landed after apply and materialization. It is distinct from systems such as Unity Catalog, Glue, or an Iceberg catalog. - -### List catalog entries - -```bash -ff catalog list --workspace -``` - -### Inspect one resource - -```bash -ff catalog get demo_transactions --workspace -``` - -### The catalog shows - -- logical resource name -- owning provider -- status -- physical table, path, or namespace -- update timestamps - -Use the catalog together with graph views: graph explains why a resource exists; catalog shows where it landed. - -## Query datasets and training sets - -Use this command when the target dataset, training set, or feature view already exists and you want to inspect rows directly. - -### Query a dataset - -```bash -ff dataframe query demo_transactions \ - --workspace demo-workspace \ - --server localhost:9090 \ - --kind dataset \ - --limit 10 \ - --insecure -``` - -### Supported kinds - -- `dataset` -- `training_set` -- `feature_view` - -### Useful flags - -- `--columns` -- `--filter` -- `--limit` -- `--output table|json|csv` - -The dataframe command talks to the Flight endpoint on the gRPC side, so transport and endpoint mismatches are common troubleshooting points. - -## Serve feature view - -Use this page after a feature view already exists and the online store is ready. In the quickstart flow, that means `demo_customer_feature_view` has already been applied successfully. - -### Verify the feature view exists - -```bash -ff graph feature-view get demo_customer_feature_view --workspace demo-workspace -``` - -### Minimal Python workflow - -```python -import featureform as ff - -client = ff.Client(host="127.0.0.1:9090", insecure=True, workspace="demo-workspace") -features = client.serve("demo_customer_feature_view", entity="C1001") -print(features) -``` - -### Operational checks - -- if the feature view is not ready, serving fails -- if the online provider is unavailable or unsupported, serving fails -- serving-metadata permissions and serving-read permissions are separate RBAC checks diff --git a/content/develop/ai/featureform/streaming.md b/content/develop/ai/featureform/streaming.md deleted file mode 100644 index 41a6bd4852..0000000000 --- a/content/develop/ai/featureform/streaming.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -title: -description: -linkTitle: -weight: 1 ---- From 2905db88be603e35bcd43dddb74bf33b1d45bbcb Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Tue, 16 Jun 2026 11:15:55 -0500 Subject: [PATCH 15/18] define and deploy features --- .../featureform/define-and-deploy-features.md | 150 +++++++++++++++++- 1 file changed, 145 insertions(+), 5 deletions(-) diff --git a/content/develop/ai/featureform/define-and-deploy-features.md b/content/develop/ai/featureform/define-and-deploy-features.md index 71a7066a02..10d383f4c1 100644 --- a/content/develop/ai/featureform/define-and-deploy-features.md +++ b/content/develop/ai/featureform/define-and-deploy-features.md @@ -1,6 +1,146 @@ --- -title: -description: -linkTitle: -weight: 1 ---- \ No newline at end of file +title: Define and deploy features +description: Author a Python definitions file and apply it to a Redis Feature Form workspace. +linkTitle: Define and deploy features +weight: 40 +aliases: +- /develop/ai/featureform/features-and-labels/ +- /develop/ai/featureform/providers/ +--- + +A feature workflow starts with a Python [definitions file]({{< relref "/develop/ai/featureform/concepts#definitions-files-and-ff-apply" >}}). This file declares the [entities, datasets, transformations, features, labels, training sets, and feature views]({{< relref "/develop/ai/featureform/concepts#resource-types" >}}) you want in a [workspace]({{< relref "/develop/ai/featureform/concepts#workspaces" >}}). Run `ff apply` to submit that file as the workspace's [desired state]({{< relref "/develop/ai/featureform/concepts#the-resource-graph" >}}). Feature Form compares the file with the workspace's current resource graph and applies only the differences. If Feature Form accepts the change, it commits a new graph version. The model is declarative — the file describes the end state, not the steps to get there. Re-applying the same file leaves the workspace unchanged. + +## Author a definitions file + +Redis Feature Form treats a Python definitions file as the source of a desired resource graph. The example below declares a single workflow end to end, from a Postgres dataset through to a Redis-backed feature view. + +```python +import featureform as ff +from datetime import timedelta + +postgres = ff.get_postgres("demo_postgres") + +customer = ff.Entity(name="customer") + +transactions = postgres.dataset( + name="transactions_raw", + table="transactions", + timestamp_column="timestamp", +) + +@postgres.sql_transformation(name="customer_daily_rollups", inputs=[transactions]) +def customer_daily_rollups() -> str: + return """ + SELECT customer_id, + date_trunc('day', timestamp) AS event_day, + SUM(transaction_amount) AS total_amount + FROM {{transactions_raw}} + GROUP BY 1, 2 + """ + +customer_total_amount_7d = ( + ff.Feature(name="customer_total_amount_7d") + .from_dataset(customer_daily_rollups, entity="customer", + entity_column="customer_id", value="total_amount", + timestamp="event_day") + .aggregate(function=ff.AggregateFunction.SUM, window=timedelta(days=7)) +) + +customer_risk_view = ff.FeatureView( + name="customer_risk_feature_view", + entity="customer", + features=[customer_total_amount_7d], + inference_store="demo_redis", +) + +resources = [ + customer, + transactions, + customer_daily_rollups, + customer_total_amount_7d, + customer_risk_view, +] +``` + +### Typical file structure + +A definitions file typically declares resources in this order: + +1. **Import the module** with `import featureform as ff`, which exposes the resource builders and provider helpers. +2. **[Entities]({{< relref "/develop/ai/featureform/concepts#resource-types" >}})** — identify the real-world objects features describe, such as `customer` or `order`. Other resources join on the entity's key column. +3. **[Datasets]({{< relref "/develop/ai/featureform/concepts#resource-types" >}})** — point at an existing table, view, or file on an offline store. The data remains in its original location. +4. **[Transformations]({{< relref "/develop/ai/featureform/concepts#resource-types" >}})** — produce new datasets from existing ones, expressed as SQL or as a Spark job. +5. **[Features]({{< relref "/develop/ai/featureform/concepts#resource-types" >}}) and [labels]({{< relref "/develop/ai/featureform/concepts#resource-types" >}})** — entity-keyed values served at inference time (features) and used as the prediction target offline (labels). +6. **[Training sets]({{< relref "/develop/ai/featureform/concepts#resource-types" >}}) and [feature views]({{< relref "/develop/ai/featureform/concepts#feature-views-and-serving" >}})** — join features with a label on the entity key (training set) and expose features for online serving (feature view). +7. **Export a `resources = [...]` list** that names every resource above. See [Definitions files and `ff apply`]({{< relref "/develop/ai/featureform/concepts#definitions-files-and-ff-apply" >}}) for how the loader uses it. + +{{< note >}} +If your file doesn't export `resources = [...]`, `ff apply` falls back to its auto-registration registry. Prefer the explicit list during onboarding; it's easier to reason about and is what the [Quickstart]({{< relref "/develop/ai/featureform/quickstart" >}}) uses. +{{< /note >}} + +### The file should reference + +- Registered [provider names]({{< relref "/develop/ai/featureform/register-providers" >}}) such as `demo_postgres` and `demo_redis`. Providers must already exist in the workspace before they're referenced. +- [Secret references]({{< relref "/develop/ai/featureform/concepts#secrets-and-secret-references" >}}) such as `env:PG_PASSWORD`, resolved at runtime by a [secret provider]({{< relref "/develop/ai/featureform/register-providers#configure-secret-providers" >}}) registered in the workspace. +- Stable resource names that make sense across re-apply cycles, since the graph compares the file to current state by name. + +### The file should not do + +- Don't replace provider registration. +- Don't assume providers exist before the workspace registers them. +- Don't mix infrastructure provisioning into the definitions entrypoint. + +## Apply a definitions file + +`ff apply` reads the file you pass with `--file` and submits its resources to a workspace. `--file` accepts a Python file, a package `__init__.py`, or a package directory containing one. + +Get the workspace ID with `ff workspace list`. See [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}) for the full workspace lifecycle. + +### Preview with `--plan` + +Preview the change without applying it: + +```bash +ff apply \ + --workspace \ + --file examples/featureform/docs/resources.py \ + --plan +``` + +Use this before large changes or whenever the file might be incomplete relative to the workspace's full desired state. + +### Standard apply + +Apply the file: + +```bash +ff apply \ + --workspace \ + --file examples/featureform/docs/resources.py +``` + +### Apply modes + +- **Default apply** replaces the workspace's current resource graph with the file. +- **`--merge`** is safer for intentionally partial definition sets; resources omitted from the file aren't treated as deletions. + +## Verify the apply + +After apply finishes, confirm the change with: + +```bash +ff graph workspace stats --workspace +ff graph feature list --workspace +ff catalog list --workspace +``` + +The graph commands show the resources Feature Form recognizes; `ff catalog` shows where each materialized resource physically landed. See [Query data]({{< relref "/develop/ai/featureform/query-data" >}}) for more inspection options. + +## If apply fails + +Common reasons: + +- **Provider not registered.** A resource references a provider name the workspace doesn't know. Confirm with `ff provider list --workspace ` and register the missing provider per [Register providers]({{< relref "/develop/ai/featureform/register-providers" >}}). +- **Secret can't be resolved.** A provider config uses a reference such as `env:PG_PASSWORD`, but the Feature Form server's environment doesn't expose that variable. Check the secret provider with `ff secret-provider get env --workspace `. +- **No resources to apply.** The entrypoint produced no resources. Make sure your file exports a `resources = [...]` list, or that auto-registration finds the resources you declared. +- **Validation error.** The CLI prints the specific resource and field that failed; fix the file and re-run with `--plan`. From 4983779de666f210d6fe52f1e88194e55975fe4a Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Tue, 16 Jun 2026 11:16:25 -0500 Subject: [PATCH 16/18] aliases and links --- content/develop/ai/featureform/concepts.md | 2 ++ content/develop/ai/featureform/quickstart.md | 8 ++------ content/develop/ai/featureform/register-providers.md | 2 ++ 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/content/develop/ai/featureform/concepts.md b/content/develop/ai/featureform/concepts.md index c6ed73c56a..917e30b81a 100644 --- a/content/develop/ai/featureform/concepts.md +++ b/content/develop/ai/featureform/concepts.md @@ -3,6 +3,8 @@ title: Redis Feature Form concepts description: Learn the workspace, resource graph, provider, secret, and serving model behind Redis Feature Form. linkTitle: Concepts weight: 30 +aliases: +- /develop/ai/featureform/training-sets-and-feature-views/ --- Redis Feature Form is a feature platform. It turns raw data from your existing systems into the values your models read at inference time. This page introduces the core concepts behind that workflow. diff --git a/content/develop/ai/featureform/quickstart.md b/content/develop/ai/featureform/quickstart.md index 31beedbbc7..7e6be7cd76 100644 --- a/content/develop/ai/featureform/quickstart.md +++ b/content/develop/ai/featureform/quickstart.md @@ -32,7 +32,7 @@ If your Postgres provider uses `env:PG_PASSWORD`, make sure that variable exists ## 3. Register the demo providers -See the Providers and workpsaces page for steps to register the offline and online providers before applying resources. +See [Register providers]({{< relref "/develop/ai/featureform/register-providers" >}}) for steps to register the offline and online providers before applying resources. ## 4. Review the quickstart definitions file @@ -53,9 +53,7 @@ That file defines: ```bash ff apply \ --workspace \ - --file examples/featureform/docs/resources.py \ - --wait \ - --wait-for finished + --file examples/featureform/docs/resources.py ``` For a dry run first: @@ -87,5 +85,3 @@ features = client.serve("demo_customer_feature_view", entity="C1001") print(features) ``` - - diff --git a/content/develop/ai/featureform/register-providers.md b/content/develop/ai/featureform/register-providers.md index cdae9a1a12..8e7a150385 100644 --- a/content/develop/ai/featureform/register-providers.md +++ b/content/develop/ai/featureform/register-providers.md @@ -3,6 +3,8 @@ title: Register providers description: Register storage, compute, and catalog providers in a Redis Feature Form workspace, and configure secret backends. linkTitle: Register providers weight: 30 +aliases: +- /develop/ai/featureform/streaming/ --- Register the providers and secret backends Redis Feature Form needs before you author features or transformations. Providers connect the workspace to external systems for storage, compute, serving, or catalog-backed access, and definitions files reference them by name. From 8d13679b010bdaf99b77b865d973de3f41e91388 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Wed, 17 Jun 2026 14:12:41 -0500 Subject: [PATCH 17/18] ai drafts --- content/develop/ai/featureform/query-data.md | 60 +++++++- content/develop/ai/featureform/reference.md | 137 ++++++++++++++++++ .../develop/ai/featureform/serve-features.md | 38 ++++- .../develop/ai/featureform/update-features.md | 47 +++++- 4 files changed, 267 insertions(+), 15 deletions(-) diff --git a/content/develop/ai/featureform/query-data.md b/content/develop/ai/featureform/query-data.md index 71a7066a02..dd20f7783f 100644 --- a/content/develop/ai/featureform/query-data.md +++ b/content/develop/ai/featureform/query-data.md @@ -1,6 +1,56 @@ --- -title: -description: -linkTitle: -weight: 1 ---- \ No newline at end of file +title: Query data +description: Inspect the Redis Feature Form catalog and query datasets, training sets, and feature views with the ff CLI. +linkTitle: Query data +weight: 70 +--- + +After you apply resources, two commands help you inspect the result: `ff catalog` reports where each resource was materialized, and `ff dataframe query` reads sampled rows from datasets, training sets, and feature views. + +## Inspect materialized locations + +The Feature Form catalog records where each resource is physically materialized. It is distinct from systems such as Unity Catalog, Glue, or an Iceberg catalog. Each catalog entry shows the logical resource name, owning provider, status, physical table/path/namespace, and update timestamps. + +### List catalog entries + +```bash +ff catalog list --workspace +``` + +### Inspect one resource + +```bash +ff catalog get demo_transactions --workspace +``` + +Use the catalog together with graph views: graph explains why a resource exists; catalog shows where it's materialized. + +## Query datasets, training sets, and feature views + +Use `ff dataframe query` to inspect rows from a dataset, training set, or feature view after it's been applied. + +### Query a dataset + +```bash +ff dataframe query demo_transactions \ + --workspace \ + --server localhost:9090 \ + --kind dataset \ + --limit 10 \ + --insecure +``` + +### Supported kinds + +- `dataset` +- `training_set` +- `feature_view` + +### Useful flags + +- `--columns` +- `--filter` +- `--limit` +- `--output table|json|csv` + +The dataframe command uses the gRPC Flight endpoint. Connection failures usually indicate transport or endpoint mismatches with the Feature Form server. diff --git a/content/develop/ai/featureform/reference.md b/content/develop/ai/featureform/reference.md index 260f9e2ad4..5975405c5a 100644 --- a/content/develop/ai/featureform/reference.md +++ b/content/develop/ai/featureform/reference.md @@ -3,6 +3,8 @@ title: Redis Feature Form reference description: Reference data for the ff CLI, Python client, gRPC API, and RBAC permissions. linkTitle: Reference weight: 100 +aliases: +- /develop/ai/featureform/datasets-and-transformations/ --- This page collects raw reference data for Redis Feature Form. Use it as a lookup—conceptual material lives in the [Concepts]({{< relref "/develop/ai/featureform/concepts" >}}) page, and task-oriented procedures live in the other pages in this section. @@ -36,3 +38,138 @@ Each built-in RBAC role is a fixed set of permissions. The role table on [Config | `audit.read` | audit | workspace or deployment | Read audit logs. | | `machine_credential.read` | machine credentials | workspace | Read machine credentials. | | `machine_credential.write` | machine credentials | workspace | Create, rotate, and revoke machine credentials. | + +## gRPC services + +This section indexes the public gRPC API surface exposed by Feature Form. + +### Service index + +| Service | Purpose | +| --- | --- | +| `WorkspaceService` | Workspace CRUD and lookup | +| `ProviderService` | Workspace-scoped provider CRUD | +| `SecretProviderService` | Workspace-scoped secret-provider CRUD | +| `ApplyService` | Declarative graph apply and dry-run planning | +| `ResourceService` | Graph browsing, lineage, search, versions, and workspace stats | +| `CatalogService` | Physical catalog inspection | +| `ServingService` | Online serving and serving metadata | +| `DataframeService` | Dataframe plan resolution | +| `RbacService` | Roles, permissions, access, and bindings | +| `MachineCredentialService` | Machine credential lifecycle | +| `AuditService` | Audit log listing | +| `VersionService` | Version compatibility and auth discovery metadata | + +### Notable service areas + +- `WorkspaceService`: create, get, list, update, delete +- `ProviderService`: register, get, list, update, delete +- `SecretProviderService`: register, get, list, update, delete +- `ApplyService`: apply and plan +- `ResourceService`: per-resource get and list plus lineage, impact, versions, and workspace stats + +### Important apply fields + +- `workspace_id` +- `resources` +- `dry_run` +- `apply_strategy` +- `execution_mode` + +### Common provider-related models + +- `PostgresConfig` +- `SnowflakeConfig` +- `S3Config` +- `SparkProviderConfig` +- `ProviderHealth` +- `SecretRef` + +## CLI + +This section documents the current public `ff` CLI surface. + +### Global flags + +Connection and transport: + +- `--server`, `-s` +- `--grpc-server` +- `--transport rest|grpc` +- `--timeout`, `-t` +- `--no-tls` + +Authentication: + +- `--token` +- `--client-id` +- `--client-secret` +- `--issuer-url` + +CLI behavior: + +- `--output`, `-o` +- `--config` +- `--no-color` +- `--skip-version-check` + +### Top-level commands + +- `ff version` +- `ff ping` +- `ff workspace` +- `ff provider` +- `ff secret-provider` +- `ff apply` +- `ff auth` +- `ff rbac` +- `ff machine-credential` +- `ff audit` +- `ff catalog` +- `ff graph` +- `ff scheduler` +- `ff dataframe` +- `ff config` + +## Python client + +This section indexes the main public Python surface exported from `featureform`. + +### Client APIs + +- `ff.Client` +- `ff.WorkspaceClient` +- `ff.ProviderClient` +- `ff.SecretProviderClient` +- `ff.ApplyResult` +- `ff.ApplyWaitResult` + +### Common error types + +- `ff.FeatureformError` +- `ff.ConnectionError` +- `ff.InvalidArgumentError` +- `ff.NotFoundError` +- `ff.TimeoutError` +- `ff.ValidationError` + +### Secret-provider and provider helpers + +- `ff.EnvSecretProvider` +- `ff.VaultSecretProvider` +- `ff.K8sSecretProvider` +- `ff.PostgresProvider` +- `ff.RedisProvider` +- `ff.S3Provider` +- `ff.SparkProvider` +- `ff.get_postgres(name)` +- `ff.get_provider(name)` + +### Core resource types + +- `ff.Entity` +- `ff.Dataset` +- `ff.Feature` +- `ff.Label` +- `ff.TrainingSet` +- `ff.FeatureView` diff --git a/content/develop/ai/featureform/serve-features.md b/content/develop/ai/featureform/serve-features.md index 71a7066a02..f4a138020b 100644 --- a/content/develop/ai/featureform/serve-features.md +++ b/content/develop/ai/featureform/serve-features.md @@ -1,6 +1,34 @@ --- -title: -description: -linkTitle: -weight: 1 ---- \ No newline at end of file +title: Serve features +description: Read materialized features from a Redis Feature Form feature view at inference time. +linkTitle: Serve features +weight: 60 +--- + +Read materialized features from a feature view by calling `ff.Client.serve(...)`. This page assumes the feature view exists and the online store is ready — the [Quickstart]({{< relref "/develop/ai/featureform/quickstart" >}})'s `demo_customer_feature_view` is the canonical example. + +The examples use `` as the workspace UUID. Get yours with `ff workspace list`. See [Manage workspaces]({{< relref "/develop/ai/featureform/manage-workspace" >}}) for the full workspace lifecycle. + +## Verify the feature view exists + +```bash +ff graph feature-view get demo_customer_feature_view --workspace +``` + +## Minimal Python workflow + +```python +import featureform as ff + +client = ff.Client(host="127.0.0.1:9090", insecure=True, workspace="") +features = client.serve("demo_customer_feature_view", entity="C1001") +print(features) +``` + +The example connects to a local server with TLS off. For a deployed server, change `host` and set `insecure=False`. + +## If serving fails + +- The feature view isn't ready. Confirm with `ff graph feature-view get` and re-check after materialization completes. +- The online provider is unavailable or not registered. Confirm with `ff provider list --workspace `. +- The caller lacks the required RBAC permission. Serving values and reading serving metadata are governed by separate permissions; see the [Permissions table]({{< relref "/develop/ai/featureform/reference#permissions" >}}). diff --git a/content/develop/ai/featureform/update-features.md b/content/develop/ai/featureform/update-features.md index 71a7066a02..05299174f9 100644 --- a/content/develop/ai/featureform/update-features.md +++ b/content/develop/ai/featureform/update-features.md @@ -1,6 +1,43 @@ --- -title: -description: -linkTitle: -weight: 1 ---- \ No newline at end of file +title: Update features +description: Iterate on a Redis Feature Form definitions file with plan, merge, and re-apply workflows. +linkTitle: Update features +weight: 50 +--- + +Most Feature Form work after the first apply is iterative: edit the definitions file, preview the delta, apply, and verify. For the full apply mechanics and failure modes, see [Define and deploy features]({{< relref "/develop/ai/featureform/define-and-deploy-features" >}}). + +## Typical cycle + +1. Change a resource definition. +2. Run a plan. +3. Apply the change. +4. Verify the resulting graph or catalog state. + +Preview the change first: + +```bash +ff apply \ + --workspace \ + --file examples/featureform/docs/resources.py \ + --plan +``` + +Then apply: + +```bash +ff apply \ + --workspace \ + --file examples/featureform/docs/resources.py +``` + +## When to use `--merge` + +Use `--merge` when your file is intentionally partial and omitted resources should not be treated as deletions. + +## Verify the outcome + +```bash +ff graph workspace stats --workspace +ff catalog list --workspace +``` From 90aedbd8eec412cd46a2f99d130bf97fed603342 Mon Sep 17 00:00:00 2001 From: Kaitlyn Michael Date: Wed, 17 Jun 2026 17:01:57 -0500 Subject: [PATCH 18/18] SME feedback --- .../featureform/define-and-deploy-features.md | 22 ++++++++++++++++--- content/develop/ai/featureform/quickstart.md | 5 +++-- content/develop/ai/featureform/reference.md | 1 + .../develop/ai/featureform/update-features.md | 4 +++- 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/content/develop/ai/featureform/define-and-deploy-features.md b/content/develop/ai/featureform/define-and-deploy-features.md index 10d383f4c1..a63e461c61 100644 --- a/content/develop/ai/featureform/define-and-deploy-features.md +++ b/content/develop/ai/featureform/define-and-deploy-features.md @@ -111,18 +111,32 @@ Use this before large changes or whenever the file might be incomplete relative ### Standard apply -Apply the file: +Apply the file and wait for it to finish: ```bash ff apply \ --workspace \ - --file examples/featureform/docs/resources.py + --file examples/featureform/docs/resources.py \ + --wait \ + --wait-for finished +``` + +Without `--wait`, `ff apply` returns as soon as the server accepts the request and runs the job asynchronously. `--wait` blocks until the job reaches a target state: `--wait-for finished` waits for terminal success; `--wait-for running` returns as soon as the job is actively running. + +If you skip `--wait`, the response includes a job ID. Check the job's status and per-task progress with: + +```bash +ff scheduler job get ``` ### Apply modes - **Default apply** replaces the workspace's current resource graph with the file. -- **`--merge`** is safer for intentionally partial definition sets; resources omitted from the file aren't treated as deletions. +- **`--merge`** applies a partial definition file without treating omitted resources as deletions. +- **`--update`** is an advanced scheduler-backed mode that re-runs supported resources' normal update or incremental path, even when the graph definition is unchanged. +- **`--full-rematerialize`** is an advanced scheduler-backed mode that forces full-refresh behavior on supported materialized resources. + +Use only one of `--merge`, `--update`, or `--full-rematerialize` at a time. Support for `--update` and `--full-rematerialize` is resource-family dependent — run `--plan` first to inspect the planned job, and pair them with `--wait` to see the outcome. ## Verify the apply @@ -144,3 +158,5 @@ Common reasons: - **Secret can't be resolved.** A provider config uses a reference such as `env:PG_PASSWORD`, but the Feature Form server's environment doesn't expose that variable. Check the secret provider with `ff secret-provider get env --workspace `. - **No resources to apply.** The entrypoint produced no resources. Make sure your file exports a `resources = [...]` list, or that auto-registration finds the resources you declared. - **Validation error.** The CLI prints the specific resource and field that failed; fix the file and re-run with `--plan`. + +For more detail, re-run with `--verbose` to enable debug logging to stderr. For most failures, though, the apply job itself surfaces clearer errors than the debug log — let `--wait` finish, or run `ff scheduler job get `, before reaching for `--verbose`. diff --git a/content/develop/ai/featureform/quickstart.md b/content/develop/ai/featureform/quickstart.md index 7e6be7cd76..ee91220521 100644 --- a/content/develop/ai/featureform/quickstart.md +++ b/content/develop/ai/featureform/quickstart.md @@ -53,7 +53,9 @@ That file defines: ```bash ff apply \ --workspace \ - --file examples/featureform/docs/resources.py + --file examples/featureform/docs/resources.py \ + --wait \ + --wait-for finished ``` For a dry run first: @@ -84,4 +86,3 @@ client = ff.Client(host="127.0.0.1:9090", insecure=True, workspace="" features = client.serve("demo_customer_feature_view", entity="C1001") print(features) ``` - diff --git a/content/develop/ai/featureform/reference.md b/content/develop/ai/featureform/reference.md index 5975405c5a..8a9ffdecd8 100644 --- a/content/develop/ai/featureform/reference.md +++ b/content/develop/ai/featureform/reference.md @@ -111,6 +111,7 @@ CLI behavior: - `--output`, `-o` - `--config` - `--no-color` +- `--verbose`, `-v` - `--skip-version-check` ### Top-level commands diff --git a/content/develop/ai/featureform/update-features.md b/content/develop/ai/featureform/update-features.md index 05299174f9..06dffc3d5e 100644 --- a/content/develop/ai/featureform/update-features.md +++ b/content/develop/ai/featureform/update-features.md @@ -28,7 +28,9 @@ Then apply: ```bash ff apply \ --workspace \ - --file examples/featureform/docs/resources.py + --file examples/featureform/docs/resources.py \ + --wait \ + --wait-for finished ``` ## When to use `--merge`