From dd22ca5a60e5824b61386c33f465d18d1f9e8dda Mon Sep 17 00:00:00 2001
From: Marko Lekic <mirrorsandmisdirections@gmail.com>
Date: Thu, 20 Nov 2025 20:23:52 +0100
Subject: [PATCH] Updated api.http caching + added snapshots + snapshot demo

---
 .gitignore                                    |   1 +
 Makefile                                      |   1 +
 Makefile.built                                |  35 ++
 docs/Profiles.md                              |   8 +-
 docs/Snapshots.md                             | 378 ++++++++++++++++++
 docs/Technical_Overview.md                    |  19 +-
 docs/examples/Snapshot_Demo.md                | 255 ++++++++++++
 docs/index.md                                 | 130 +++---
 .../.env.dev_bigquery_bigframes               |   0
 .../.env.dev_bigquery_pandas                  |   0
 examples/snapshot_demo/.env.dev_databricks    |  16 +
 .../snapshot_demo/.env.dev_databricks_delta   |  16 +
 .../snapshot_demo/.env.dev_databricks_iceberg |  13 +
 examples/snapshot_demo/.env.dev_duckdb        |   2 +
 examples/snapshot_demo/.env.dev_postgres      |   3 +
 examples/snapshot_demo/.env.dev_snowflake     |  18 +
 examples/snapshot_demo/Makefile               | 138 +++++++
 examples/snapshot_demo/README.md              |  36 ++
 examples/snapshot_demo/models/README.md       |  20 +
 .../models/marts/mart_users_by_domain.ff.sql  |  28 ++
 .../mart_users_by_domain_snapshot.ff.sql      |  24 ++
 .../snapshots/users_clean_snapshot.ff.sql     |  24 ++
 .../models/staging/users_clean.ff.sql         |  27 ++
 examples/snapshot_demo/profiles.yml           |  70 ++++
 examples/snapshot_demo/project.yml            | 119 ++++++
 examples/snapshot_demo/seeds/README.md        |   4 +
 examples/snapshot_demo/seeds/seed_users.csv   |   4 +
 examples/snapshot_demo/sources.yml            |   8 +
 examples/snapshot_demo/tests/dq/README.md     |   4 +
 examples/snapshot_demo/tests/unit/README.md   |  12 +
 examples_article/http_cache_demo/README.md    |   7 +
 .../http_cache_demo/docs/README.md            |   4 +
 .../http_cache_demo/models/README.md          |   4 +
 .../http_cache_demo/models/todo_ingest.ff.py  |  45 +++
 examples_article/http_cache_demo/profiles.yml |  13 +
 examples_article/http_cache_demo/project.yml  |  18 +
 .../http_cache_demo/seeds/README.md           |   4 +
 examples_article/http_cache_demo/sources.yml  |   9 +
 .../http_cache_demo/tests/dq/README.md        |   4 +
 .../http_cache_demo/tests/unit/README.md      |   4 +
 mkdocs.yml                                    |   4 +-
 src/fastflowtransform/api/context.py          |   2 +-
 src/fastflowtransform/api/http.py             | 248 +++++++-----
 src/fastflowtransform/cli/__init__.py         |   3 +
 src/fastflowtransform/cli/init_cmd.py         |  55 ++-
 src/fastflowtransform/cli/run.py              |  37 +-
 src/fastflowtransform/cli/snapshot_cmd.py     | 173 ++++++++
 src/fastflowtransform/cli/test_cmd.py         |  15 +-
 src/fastflowtransform/config/models.py        | 104 ++++-
 src/fastflowtransform/docs.py                 |   1 +
 .../executors/_spark_imports.py               |  57 +++
 src/fastflowtransform/executors/base.py       |  81 +++-
 .../executors/bigquery/base.py                | 225 +++++++++++
 .../executors/databricks_spark.py             | 299 +++++++++++++-
 src/fastflowtransform/executors/duckdb.py     | 220 ++++++++++
 src/fastflowtransform/executors/postgres.py   | 234 +++++++++++
 .../executors/snowflake_snowpark.py           | 222 +++++++++-
 src/fastflowtransform/snapshots.py            | 113 ++++++
 src/fastflowtransform/templates/index.html.j2 |   1 +
 tests/integration/examples/config.py          |  11 +
 .../api/http/test_http_offline_cache_unit.py  |  22 +
 .../api/http/test_http_pagination_df_unit.py  |  33 ++
 62 files changed, 3495 insertions(+), 190 deletions(-)
 create mode 100644 Makefile.built
 create mode 100644 docs/Snapshots.md
 create mode 100644 docs/examples/Snapshot_Demo.md
 rename examples/{incremental_demo => snapshot_demo}/.env.dev_bigquery_bigframes (100%)
 rename examples/{incremental_demo => snapshot_demo}/.env.dev_bigquery_pandas (100%)
 create mode 100644 examples/snapshot_demo/.env.dev_databricks
 create mode 100644 examples/snapshot_demo/.env.dev_databricks_delta
 create mode 100644 examples/snapshot_demo/.env.dev_databricks_iceberg
 create mode 100644 examples/snapshot_demo/.env.dev_duckdb
 create mode 100644 examples/snapshot_demo/.env.dev_postgres
 create mode 100644 examples/snapshot_demo/.env.dev_snowflake
 create mode 100644 examples/snapshot_demo/Makefile
 create mode 100644 examples/snapshot_demo/README.md
 create mode 100644 examples/snapshot_demo/models/README.md
 create mode 100644 examples/snapshot_demo/models/marts/mart_users_by_domain.ff.sql
 create mode 100644 examples/snapshot_demo/models/snapshots/mart_users_by_domain_snapshot.ff.sql
 create mode 100644 examples/snapshot_demo/models/snapshots/users_clean_snapshot.ff.sql
 create mode 100644 examples/snapshot_demo/models/staging/users_clean.ff.sql
 create mode 100644 examples/snapshot_demo/profiles.yml
 create mode 100644 examples/snapshot_demo/project.yml
 create mode 100644 examples/snapshot_demo/seeds/README.md
 create mode 100644 examples/snapshot_demo/seeds/seed_users.csv
 create mode 100644 examples/snapshot_demo/sources.yml
 create mode 100644 examples/snapshot_demo/tests/dq/README.md
 create mode 100644 examples/snapshot_demo/tests/unit/README.md
 create mode 100644 examples_article/http_cache_demo/README.md
 create mode 100644 examples_article/http_cache_demo/docs/README.md
 create mode 100644 examples_article/http_cache_demo/models/README.md
 create mode 100644 examples_article/http_cache_demo/models/todo_ingest.ff.py
 create mode 100644 examples_article/http_cache_demo/profiles.yml
 create mode 100644 examples_article/http_cache_demo/project.yml
 create mode 100644 examples_article/http_cache_demo/seeds/README.md
 create mode 100644 examples_article/http_cache_demo/sources.yml
 create mode 100644 examples_article/http_cache_demo/tests/dq/README.md
 create mode 100644 examples_article/http_cache_demo/tests/unit/README.md
 create mode 100644 src/fastflowtransform/cli/snapshot_cmd.py
 create mode 100644 src/fastflowtransform/executors/_spark_imports.py
 create mode 100644 src/fastflowtransform/snapshots.py

diff --git a/.gitignore b/.gitignore
index 7049ad9..4209e3f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,3 +50,4 @@ examples/**/docs/
 tickets/**
 site/dag/**
 cache/**
+articles/**
diff --git a/Makefile b/Makefile
index 7e9b8cb..0585b64 100644
--- a/Makefile
+++ b/Makefile
@@ -20,3 +20,4 @@ MAKEFILE_DIR := $(dir $(abspath $(firstword $(MAKEFILE_LIST))))
 
 include $(MAKEFILE_DIR)/Makefile.pipeline
 include $(MAKEFILE_DIR)/Makefile.dev
+include $(MAKEFILE_DIR)/Makefile.built
diff --git a/Makefile.built b/Makefile.built
new file mode 100644
index 0000000..ae209a8
--- /dev/null
+++ b/Makefile.built
@@ -0,0 +1,35 @@
+# Makefile for fastflowtransform
+
+# Change this if your venv has a different name/path
+VENV = .venv
+
+.PHONY: help venv build check upload-test upload clean
+
+help:
+	@echo "Useful commands:"
+	@echo "  make venv        - create virtualenv and install build"
+	@echo "  make build       - build wheel + sdist into dist/"
+	@echo "  make check       - run twine check on dist/*"
+	@echo "  make upload-test - upload dist/* to TestPyPI (using uvx + twine)"
+	@echo "  make upload      - upload dist/* to PyPI (using uvx + twine)"
+	@echo "  make clean       - remove build artifacts"
+
+venv:
+	python3 -m venv $(VENV)
+	$(VENV)/bin/python -m pip install --upgrade pip
+	$(VENV)/bin/uv pip install build
+
+build: venv
+	$(VENV)/bin/python -m build
+
+check:
+	uvx twine check dist/*
+
+upload-test: build check
+	uvx twine upload --repository testpypi dist/*
+
+upload: build check
+	uvx twine upload dist/*
+
+clean:
+	rm -rf build dist *.egg-info
diff --git a/docs/Profiles.md b/docs/Profiles.md
index 4ff4454..d4a2175 100644
--- a/docs/Profiles.md
+++ b/docs/Profiles.md
@@ -50,12 +50,14 @@ Supported engines and their expected sections:
 |----------------------|--------------------|---------------------------------------------------|
 | `duckdb`             | `duckdb`           | `path` (file path or `:memory:`)                  |
 | `postgres`           | `postgres`         | `dsn`, `db_schema`                                |
-| `bigquery`           | `bigquery`         | `project` (optional), `dataset`, `location`       |
-| `databricks_spark`   | `databricks_spark` | `master`, `app_name`, optional `extra_conf`, `warehouse_dir`, `use_hive_metastore`, `database`, `table_format`, `table_options` |
-| `snowflake_snowpark` | `snowflake_snowpark`| `account`, `user`, `password`, `warehouse`, `database`, `db_schema`, optional `role` |
+| `bigquery`           | `bigquery`         | `project` (optional), `dataset`, `location`, `use_bigframes`, `allow_create_dataset` |
+| `databricks_spark`   | `databricks_spark` | `master`, `app_name`, optional `extra_conf`, `warehouse_dir`, `use_hive_metastore`, `catalog`, `database`, `table_format`, `table_options` |
+| `snowflake_snowpark` | `snowflake_snowpark`| `account`, `user`, `password`, `warehouse`, `database`, `schema` (`db_schema` alias), optional `role`, `allow_create_schema` |
 
 Each profile can define its own `vars:` block (values exposed via `var('key')` inside templates).
 
+> Snowflake note: the CLI scaffold shows `schema:` while the docs often mention `db_schema:`. The configuration accepts either key because `schema` is an alias for `db_schema` in the settings model.
+
 ## Environment Variables
 
 `profiles.yml` supports Jinja expressions. The helper `env('FF_VAR', 'fallback')` reads process environment variables and substitutes the default if unset. Examples:
diff --git a/docs/Snapshots.md b/docs/Snapshots.md
new file mode 100644
index 0000000..b827634
--- /dev/null
+++ b/docs/Snapshots.md
@@ -0,0 +1,378 @@
+# Snapshots
+
+Snapshots are **history-aware tables** that track how a row changes over time.
+
+Unlike regular `table` / `view` / `incremental` models, which only ever expose the *current* state, a snapshot keeps **multiple versions** of each business key, with validity ranges and a “current” flag.
+
+FastFlowTransform implements snapshots as a dedicated materialization:
+
+```sql
+{{ config(
+    materialized='snapshot',
+    snapshot={
+        'strategy': 'timestamp',    -- or 'check'
+    },
+    unique_key='id',
+    updated_at='updated_at',
+) }}
+
+select
+    id,
+    ...
+from {{ ref('some_model.ff') }};
+````
+
+You run snapshot models via a **separate CLI entrypoint**:
+
+```bash
+fft snapshot run . --env dev_duckdb
+```
+
+Regular `fft run` does *not* execute snapshot models.
+
+---
+
+## When to use snapshots
+
+Use snapshots when you need to:
+
+* Answer **“what did we know back then?”** questions
+  e.g. “What was the user’s email on 2024-03-01?”
+* Implement **type-2 slowly changing dimensions (SCD2)** for dimensions like users, customers, products, or feature flags.
+* Preserve a **temporal audit trail** of important entities without hand-rolling history tables and merge logic.
+
+You typically place snapshot models near your **cleaned dimensions**, e.g.:
+
+* `staging/users_clean.ff.sql`
+* `snapshots/users_clean_snapshot.ff.sql`  ⟵ snapshot over the staging model
+* `marts/dim_users.ff.sql`  ⟵ reads from the snapshot’s “current” rows
+
+---
+
+## Conceptual model
+
+A snapshot is defined by:
+
+1. **Business key**:
+   `unique_key` / `primary_key`
+
+   > “Which column(s) identify a logical entity?”
+
+2. **Change detection strategy** (required for snapshots):
+
+   * `strategy='timestamp'`
+     Use a **monotonic timestamp column** to detect new versions, e.g. `updated_at`, `signup_date`.
+   * `strategy='check'`
+     Compare a set of **“interesting” columns** (`check_cols`) between runs and open a new version when any of them changes.
+
+3. **Source query**:
+   A normal `SELECT` that produces the *current* state of your entities.
+
+On disk, each snapshot table contains:
+
+* All columns produced by your `SELECT`
+  (e.g. `user_id`, `email`, `email_domain`, `signup_date`)
+* Plus a set of **snapshot metadata columns**, typically:
+
+  * ` _ff_valid_from` – when this version became active
+  * ` _ff_valid_to` – when this version stopped being active (`NULL` for open/current)
+  * ` _ff_is_current` – boolean flag marking the current row for each key
+
+Exact column names may vary per implementation, but the pattern is always:
+
+> “Multiple rows per business key, each with a validity range, and exactly one current row.”
+
+---
+
+## Snapshot configuration
+
+Snapshot behavior is configured via `config(...)` at the top of a model.
+
+### Minimal timestamp snapshot
+
+```sql
+{{ config(
+    materialized='snapshot',
+    snapshot={
+        'strategy': 'timestamp',
+    },
+    unique_key='user_id',
+    updated_at='signup_date',
+) }}
+
+select
+    user_id,
+    email,
+    email_domain,
+    signup_date
+from {{ ref('users_clean.ff') }};
+```
+
+Key pieces:
+
+* `materialized='snapshot'`
+  Enables snapshot semantics for this model.
+
+* `snapshot.strategy='timestamp'`
+  Use a timestamp column to detect new versions.
+
+* `unique_key='user_id'`
+  Business key; you can also pass a list: `['user_id', 'country']`.
+
+* `updated_at='signup_date'`
+  Column used as the **freshness indicator**. When a new run sees a `signup_date` that is greater than the existing version’s, a new version is opened.
+
+> **Validation rules**
+>
+> * Snapshots require a `unique_key` (or `primary_key`).
+> * `strategy` must be `'timestamp'` or `'check'`.
+> * For `'timestamp'`, you must provide `updated_at` / `updated_at_column`.
+> * For `'check'`, you must provide `check_cols`.
+
+### Check strategy with `check_cols`
+
+Use this when you **don’t have** a reliable `updated_at` column and instead want to compare a list of columns:
+
+```sql
+{{ config(
+    materialized='snapshot',
+    snapshot={
+        'strategy': 'check',
+        'check_cols': ['email', 'email_domain', 'status'],
+    },
+    unique_key='user_id',
+) }}
+
+select
+    user_id,
+    email,
+    email_domain,
+    status,
+    signup_date
+from {{ ref('users_clean.ff') }};
+```
+
+Here:
+
+* The engine joins **current source rows** with **current snapshot rows** on `unique_key`.
+* It recomputes a hash over `check_cols`. When the hash changes, a new version is opened.
+
+This is convenient for:
+
+* Entities with **many changing attributes**.
+* Sources where `updated_at` is unreliable or missing.
+
+### Shorthands and normalization
+
+FastFlowTransform’s config layer normalizes snapshot config so you can:
+
+* Pass a single string or list for `unique_key`, `check_cols`, `updated_at_columns`, etc.
+* Use `updated_at` or `updated_at_column` interchangeably (they are validated to be consistent).
+* Optionally keep snapshot settings nested under `snapshot={...}` while still accessing the top-level shortcuts (`unique_key`, `updated_at`, `check_cols`) in executors.
+
+---
+
+## Runtime behavior
+
+### First snapshot run
+
+On the **first** `fft snapshot run`:
+
+* FFT executes the snapshot’s `SELECT`.
+* For each row, it writes:
+
+  * One row per `unique_key`.
+  * `valid_from = run_timestamp`
+  * `valid_to   = NULL`
+  * `is_current = TRUE`
+
+No comparison with previous data (there is none yet).
+
+### Subsequent runs (timestamp strategy)
+
+On each subsequent run (`strategy='timestamp'`):
+
+1. **Load current version** per `unique_key` from the snapshot table.
+2. **Load current source rows** from the snapshot model’s `SELECT`.
+3. For each key:
+
+   * If the key **did not exist** before → **insert** new open-ended version.
+   * If the key existed, and the source row’s `updated_at` is **greater** than the snapshot’s latest version:
+
+     * Compare row values (implementation detail; often just “trust” updated_at).
+     * If considered changed:
+
+       * **Close** the current version: set `valid_to = run_timestamp`, `is_current = FALSE`.
+       * **Open** a new version with `valid_from = run_timestamp`, `valid_to = NULL`, `is_current = TRUE`.
+   * If the key existed, and `updated_at` is **not greater** (or row unchanged) → no-op.
+
+> **Deletes**
+> By design, snapshots focus on changes in the **source-of-truth rows**. When a row disappears from the source, it is treated as **no change** for snapshot purposes (the last known version remains current). If you need delete tracking, model a soft-delete flag and include it in `check_cols`.
+
+### Subsequent runs (check strategy)
+
+For `strategy='check'`:
+
+1. **Load current version** per `unique_key`.
+2. **Load current source rows**.
+3. Compute a **hash** (or equivalent) over the configured `check_cols` for both.
+4. If the hash differs → treat it as a change:
+
+   * Close old version (`valid_to = run_timestamp`, `is_current = FALSE`).
+   * Insert new version (`valid_from = run_timestamp`, `valid_to = NULL`, `is_current = TRUE`).
+
+This strategy is usually more robust when:
+
+* Your source doesn’t maintain an updated timestamp.
+* You care about a specific subset of columns only.
+
+---
+
+## Snapshot table schema
+
+A snapshot table contains:
+
+* **Business columns**: whatever your `SELECT` produces.
+* **Snapshot columns** (typical pattern):
+
+  ```text
+  _ff_valid_from   TIMESTAMP  -- when this version became active
+  _ff_valid_to     TIMESTAMP  -- when this version ended (NULL = still active)
+  _ff_is_current   BOOLEAN    -- TRUE exactly for the current version
+  ```
+
+Common query patterns:
+
+### Current version per key
+
+```sql
+select *
+from users_clean_snapshot
+where _ff_is_current = true;
+```
+
+### History of a single key
+
+```sql
+select *
+from users_clean_snapshot
+where user_id = 42
+order by _ff_valid_from;
+```
+
+### Point-in-time view
+
+“What did we know on 2024-03-01?”
+
+```sql
+select *
+from users_clean_snapshot
+where
+  _ff_valid_from <= timestamp '2024-03-01'
+  and ( _ff_valid_to is null or _ff_valid_to > timestamp '2024-03-01' );
+```
+
+These patterns work uniformly across engines.
+
+---
+
+## CLI: `fft snapshot run`
+
+Snapshots are run via a dedicated CLI subcommand:
+
+```bash
+fft snapshot run <project> [options]
+```
+
+Key properties:
+
+* Only models with `materialized='snapshot'` are eligible.
+* If your selector (`--select/--exclude`) matches non-snapshot models, they are ignored or explicitly rejected with a clear error.
+* You can combine all the usual selection patterns: `tag:...`, `path:...`, `name:...`, etc.
+
+Example:
+
+```bash
+# Run only snapshot models that belong to the snapshot demo and the DuckDB engine
+fft snapshot run . \
+  --env dev_duckdb \
+  --select tag:example:snapshot_demo \
+  --select tag:engine:duckdb
+```
+
+### Retention & pruning
+
+To avoid unbounded growth, the snapshot CLI supports **retention** flags:
+
+* `--prune`
+  Enable pruning of old versions.
+* `--keep-last N`
+  Keep only the last `N` versions per `unique_key`.
+* `--dry-run`
+  Show what *would* be pruned without actually deleting anything.
+
+Examples:
+
+```bash
+# Show which rows would be removed, but do not delete
+fft snapshot run . \
+  --env dev_duckdb \
+  --select tag:example:snapshot_demo \
+  --prune --keep-last 3 --dry-run
+
+# Apply pruning for real
+fft snapshot run . \
+  --env dev_duckdb \
+  --select tag:example:snapshot_demo \
+  --prune --keep-last 3
+```
+
+Retention is applied **after** the snapshot update, so the most recent `N` versions are always preserved.
+
+---
+
+## Interaction with regular runs
+
+Snapshots are intentionally **decoupled** from `fft run`:
+
+* `fft run` builds your **current-state pipeline** (seeds, staging, marts, incremental models, etc.).
+* `fft snapshot run` builds and updates **history tables** based on the current state produced by `fft run`.
+
+Typical workflow:
+
+```bash
+# 1) Rebuild main pipeline
+fft run . --env dev_duckdb
+
+# 2) Update snapshots based on the new state
+fft snapshot run . --env dev_duckdb
+```
+
+If you accidentally try to run a snapshot via `fft run`, FFT will raise an error such as:
+
+> Snapshot models cannot be executed via 'fft run'. Use 'fft snapshot run' instead.
+
+This separation keeps regular DAG runs **predictable and stateless**, while giving you a powerful, focused tool for history tracking.
+
+---
+
+## Best practices
+
+* Put snapshots in a dedicated folder, e.g. `models/snapshots/`, and tag them:
+
+  ```sql
+  tags=['scope:snapshot', 'engine:duckdb', 'example:snapshot_demo']
+  ```
+
+* Always configure a **stable, business-level `unique_key`**. Avoid transient IDs that might be re-used.
+
+* Prefer `strategy='timestamp'` when you have a trustworthy monotonic timestamp.
+
+* Prefer `strategy='check'` when:
+
+  * timestamps are unreliable, or
+  * you care about specific columns only.
+
+* Build point-in-time marts by **reading from the snapshot** rather than the raw staging table when you need historical correctness.
+
+Snapshots give you a clean, structured way to get SCD2-style history without hand-writing merge logic for every table.
diff --git a/docs/Technical_Overview.md b/docs/Technical_Overview.md
index 2da99e8..0eba393 100644
--- a/docs/Technical_Overview.md
+++ b/docs/Technical_Overview.md
@@ -53,7 +53,24 @@
 
 ### Project Layout
 
-For an up-to-date view, browse the repository tree or run `find . -maxdepth 2` from the root; all examples live under `examples/` with their own READMEs.
+The repository is structured so you can jump straight to the area you need without spelunking:
+
+```
+fastflowtransform/
+├── src/fastflowtransform/        # core package (CLI, executors, docs, streaming, settings)
+│   ├── api/, cli/, executors/, testing/, templates/ and friends
+│   └── entry-points such as core.py, dag.py, seeding.py, validation.py
+├── docs/                         # user + developer docs (this file plus guides)
+├── examples/                     # canonical SQL/Python demo projects with seeds + READMEs
+├── examples_article/             # long-form tutorial artifacts referenced by articles
+├── articles/, tickets/, _exports/ # content pipelines + planning notes
+├── tests/                        # pytest coverage for internal modules
+├── _scripts/, rel_dir/, dist/, site/, htmlcov/  # tooling + build outputs
+├── pyproject.toml, uv.lock       # build system + dependency lock
+└── Makefile*, docker-compose.yml # dev workflow shortcuts and services
+```
+
+Treat `fastflowtransform/` as the project root when running commands from this doc.
 
 ### Example Projects and Seeds
 
diff --git a/docs/examples/Snapshot_Demo.md b/docs/examples/Snapshot_Demo.md
new file mode 100644
index 0000000..912f684
--- /dev/null
+++ b/docs/examples/Snapshot_Demo.md
@@ -0,0 +1,255 @@
+# Snapshot Demo Project
+
+The `examples/snapshot_demo` project shows how to build **history-aware tables** with FastFlowTransform snapshots. It reuses the small users pipeline from the basic demo and adds a `users_clean_snapshot` model that captures **row-versioned history** over time.
+
+## Why it exists
+
+* **Show snapshot semantics** – demonstrate `materialized='snapshot'` with `strategy='timestamp'` on a simple dataset.
+* **Separate runs** – illustrate why snapshots are executed via `fft snapshot run` instead of the regular `fft run`.
+* **Engine parity** – keep the snapshot demo portable across DuckDB, Postgres, Databricks Spark (parquet / Delta Lake / Iceberg), and BigQuery (once engines are implemented).
+* **Understand the shape of a snapshot table** – see how FFT adds validity columns on top of your source columns.
+
+Use it as a sandbox before adding snapshots to your own marts and dimensions.
+
+## Project layout
+
+The snapshot demo is intentionally tiny and mirrors the basic demo structure:
+
+| Path                                           | Purpose                                                                                             |
+| ---------------------------------------------- | --------------------------------------------------------------------------------------------------- |
+| `seeds/seed_users.csv`                         | Sample CRM-style user data. `fft seed` materializes it as a physical `seed_users` table.            |
+| `models/staging/users_clean.ff.sql`            | Same as in the basic demo: cleans emails, casts types, derives `email_domain`.                      |
+| `models/marts/mart_users_by_domain.ff.sql`     | Same as in the basic demo: aggregates users per email domain.                                       |
+| `models/snapshots/users_clean_snapshot.ff.sql` | **New:** snapshot model that captures slowly changing history of `users_clean.ff`.                  |
+| `profiles.yml`                                 | Reused from the basic demo: defines `dev_duckdb`, `dev_postgres`, `dev_databricks_delta`, `dev_databricks_iceberg`, `dev_bigquery`. |
+| `.env.dev_*`                                   | Engine-specific environment files (`.env.dev_duckdb`, `.env.dev_postgres`, `.env.dev_databricks_delta`, `.env.dev_databricks_iceberg`). |
+| `Makefile`                                     | Adds snapshot-aware targets on top of the usual `seed` / `run` / `test` / `dag`.                    |
+
+### The snapshot model
+
+The core of the demo is `models/snapshots/users_clean_snapshot.ff.sql`:
+
+```sql
+{{ config(
+    materialized='snapshot',
+    snapshot={
+        'strategy': 'timestamp',   -- or 'check' (not used in this demo)
+    },
+    unique_key='user_id',
+    updated_at='signup_date',
+    tags=[
+        'example:snapshot_demo',
+        'scope:snapshot',
+        'engine:duckdb',
+        'engine:postgres',
+        'engine:databricks_spark',
+        'engine:bigquery'
+    ],
+) }}
+
+select
+    user_id,
+    email,
+    email_domain,
+    signup_date
+from {{ ref('users_clean.ff') }};
+```
+
+Key points:
+
+* `materialized='snapshot'` marks this as a **snapshot model**.
+* `snapshot.strategy='timestamp'` means:
+
+  * FFT uses `updated_at='signup_date'` to detect changed rows.
+  * When a row changes, the old version is **closed** and a new version is **opened**.
+* `unique_key='user_id'` defines the **business key** used to match records between runs.
+* The *body* is a normal `SELECT` from the cleaned staging model; FFT takes care of the history logic.
+
+On physical storage, FFT keeps:
+
+* All columns from the select (`user_id`, `email`, `email_domain`, `signup_date`)
+* Plus engine-agnostic snapshot metadata columns (names depending on your implementation), typically:
+
+  * a **valid-from** timestamp
+  * a **valid-to** timestamp (nullable/open ended)
+  * an **is_current** flag
+
+So a given `user_id` may appear multiple times with different validity ranges.
+
+## Running the snapshot demo
+
+Assuming you’ve already wired `examples/snapshot_demo/Makefile` similarly to the basic demo (with `snapshot` / `snapshot_demo` targets):
+
+1. Change into the project:
+
+   ```bash
+   cd examples/snapshot_demo
+   ```
+
+2. Choose an engine and export the environment (example: DuckDB):
+
+   ```bash
+   # DuckDB
+   set -a; source .env.dev_duckdb; set +a
+
+   # Or Postgres
+   # set -a; source .env.dev_postgres; set +a
+
+   # Or Databricks
+   # Delta/parquet: set -a; source .env.dev_databricks_delta; set +a
+   # Iceberg:      set -a; source .env.dev_databricks_iceberg; set +a
+   # (optionally export FF_DBR_TABLE_FORMAT=delta|iceberg to override the table format)
+
+   # Or BigQuery (requires GCP setup)
+   # set -a; source .env.dev_bigquery_pandas; set +a
+   # set -a; source .env.dev_bigquery_bigframes; set +a
+   ```
+
+3. Run the full snapshot demo for the selected engine:
+
+   ```bash
+   # One-shot: clean → seed → run (pipeline) → snapshot → dag → test
+   make snapshot_demo ENGINE=duckdb
+   # make snapshot_demo ENGINE=postgres
+   # make snapshot_demo ENGINE=databricks_spark DBR_TABLE_FORMAT=delta
+   # make snapshot_demo ENGINE=databricks_spark DBR_TABLE_FORMAT=iceberg
+   # make snapshot_demo ENGINE=bigquery BQ_FRAME=bigframes
+   ```
+
+   Under the hood this will typically do:
+
+   * `fft seed` – materialize `seed_users`
+   * `fft run` – build staging/mart views/tables (excluding snapshot models)
+   * `fft snapshot run` – apply snapshot logic to `users_clean_snapshot`
+   * `fft dag` – generate the DAG/site
+   * `fft test` – run any configured DQ tests
+
+### Databricks table formats (parquet / Delta / Iceberg)
+
+Just like the incremental demo, the snapshot project lets you flip Spark table formats without
+editing models. Pass `DBR_TABLE_FORMAT=parquet|delta|iceberg` to `make snapshot_demo` or export
+`FF_DBR_TABLE_FORMAT` when invoking `fft` directly. The `dev_databricks_delta` profile uses the same
+Hive-compatible metastore as before, while `dev_databricks_iceberg` wires in an Iceberg catalog
+(`spark.jars.packages` / `spark.sql.catalog.iceberg.*`). When running locally you still need the
+matching Python packages (for example `pip install delta-spark` for Delta Lake and the Iceberg
+runtime JARs bundled via the profile).
+
+Manual CLI examples:
+
+```bash
+# Delta Lake snapshots
+FF_DBR_TABLE_FORMAT=delta \
+  FFT_ACTIVE_ENV=dev_databricks_delta FF_ENGINE=databricks_spark \
+  fft snapshot run . --select tag:example:snapshot_demo --select tag:engine:databricks_spark
+
+# Iceberg snapshots
+FF_DBR_TABLE_FORMAT=iceberg \
+  FFT_ACTIVE_ENV=dev_databricks_iceberg FF_ENGINE=databricks_spark \
+  fft snapshot run . --select tag:example:snapshot_demo --select tag:engine:databricks_spark
+```
+
+4. Or run only the snapshot step (after a normal `fft run`):
+
+   ```bash
+   # DuckDB example
+   make run ENGINE=duckdb              # builds users_clean etc.
+   make snapshot ENGINE=duckdb         # runs only snapshot models
+   ```
+
+   Or directly with `fft`:
+
+   ```bash
+   # Only snapshot models (tagged example:snapshot_demo)
+   fft snapshot run . \
+     --env dev_duckdb \
+     --select tag:example:snapshot_demo --select tag:engine:duckdb
+   ```
+
+   If your selection includes non-snapshot models, FFT will ignore them for the snapshot run.
+
+## Inspecting the snapshot table
+
+After a couple of runs with changed data, use your engine to inspect `users_clean_snapshot`:
+
+* **DuckDB** (from the project root):
+
+  ```sql
+  select *
+  from users_clean_snapshot
+  order by user_id, _ff_valid_from;  -- adjust column names to what you implement
+  ```
+
+* **Postgres / BigQuery / Databricks**: the table name is the same; the schema/database/dataset follows the profile.
+
+Typical patterns to explore:
+
+* **Current records only** (one row per `user_id`):
+
+  ```sql
+  select *
+  from users_clean_snapshot
+  where _ff_is_current = true;
+  ```
+
+* **History of a single user**:
+
+  ```sql
+  select *
+  from users_clean_snapshot
+  where user_id = 42
+  order by _ff_valid_from;
+  ```
+
+This makes it easy to answer questions like “what did we know about this user on date X?”.
+
+## Snapshot CLI & retention
+
+The snapshot demo uses the dedicated entry point:
+
+```bash
+fft snapshot run . --env dev_duckdb --select tag:example:snapshot_demo
+```
+
+In addition, the CLI supports retention and pruning flags (once implemented in your code base):
+
+* `--prune` – enables pruning of old snapshot rows.
+* `--keep-last N` – when used with `--prune`, keeps only the last `N` versions per key.
+* `--dry-run` – shows which rows would be pruned without actually deleting anything.
+
+Example:
+
+```bash
+# Keep only the last 3 versions per user_id; just show the plan
+fft snapshot run . \
+  --env dev_duckdb \
+  --select tag:example:snapshot_demo \
+  --prune --keep-last 3 --dry-run
+
+# Apply the pruning for real
+fft snapshot run . \
+  --env dev_duckdb \
+  --select tag:example:snapshot_demo \
+  --prune --keep-last 3
+```
+
+This is especially useful when snapshot tables grow large and you only care about a bounded history window for most use cases.
+
+## Interaction with regular runs
+
+Two important rules:
+
+1. **Snapshot models are not part of `fft run`**
+   They’re intentionally excluded to keep regular pipeline runs stateless and predictable. If a snapshot model is accidentally selected in `fft run`, FFT surfaces a clear error:
+
+   > Snapshot models cannot be executed via 'fft run'. Use 'fft snapshot run' instead.
+
+2. **Snapshots depend on upstream models**
+   In the demo, `users_clean_snapshot` depends on `users_clean.ff`. The typical flow is:
+
+   ```bash
+   fft run . --env dev_duckdb --select tag:example:basic_demo
+   fft snapshot run . --env dev_duckdb --select tag:example:snapshot_demo
+   ```
+
+   * `fft run` ensures `users_clean` is fresh.
+   * `fft snapshot run` compares the new `users_clean` rows with the existing snapshot table and writes history changes.
diff --git a/docs/index.md b/docs/index.md
index 57ab785..20034b2 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,79 +1,99 @@
 # FastFlowTransform Documentation Hub
 
-Welcome! This page is your starting point for FastFlowTransform docs. Pick the track that matches what you want to do and follow the links to the detailed guides.
+FastFlowTransform (FFT) is a SQL + Python data modeling engine with a deterministic DAG, parallel executor, optional caching, incremental builds, auto-generated docs, snapshots, and built-in data-quality tests. The `fft` CLI orchestrates compilation, execution, docs, validation, and history tracking across DuckDB, Postgres, BigQuery (pandas + BigFrames), Databricks/Spark, and Snowflake Snowpark.
 
----
+Use this page as the front door into the docs: start with the orientation section, then jump to the guide that matches the task you have at hand.
 
-## Docs Navigation
-- **Getting Started** — you are here (`docs/index.md`)
-- [User Guide](./Technical_Overview.md#part-i-operational-guide)
-- [Modeling Reference](./Config_and_Macros.md)
-- [Parallelism & Cache](./Cache_and_Parallelism.md)
-- [CLI Guide](./CLI_Guide.md)
-- [Logging & Verbosity](./Logging.md)
-- [API calls in Python models](./Api_Models.md)
-- [Incremental Models](./Incremental.md)
-- [YAML Tests (Schema-bound)](./YAML_Tests.md)
-- [Model Unit Tests](./Unit_Tests.md)
-- [Data Quality Tests Reference](./Data_Quality_Tests.md)
-- [Auto-Docs & Lineage](./Auto_Docs.md)
-- [Troubleshooting & Error Codes](./Troubleshooting.md)
-- [Profiles & Environments](./Profiles.md)
-- [Sources Declaration](./Sources.md)
-- [Project Configuration](./Project_Config.md)
-- [State Selection (changed & results)](./State_Selection.md)
-- [Basic Demo](./examples/Basic_Demo.md)
-- [Materializations Demo](./examples/Materializations_Demo.md)
-- [Data Quality Tests Demo](./examples/DQ_Demo.md)
-- [Macros Demo](./examples/Macros_Demo.md)
-- [Cache Demo](./examples/Cache_Demo.md)
-- [Environment Matrix Demo](./examples/Environment_Matrix.md)
-- [Incremental & Delta Demo](examples/Incremental_Demo.md)
-- [Local Engine Setup](./examples/Local_Engine_Setup.md)
-- [API Demo](./examples/API_Demo.md)
-- [Developer Guide](./Technical_Overview.md#part-ii-architecture-internals)
+---
 
 ## Table of Contents
 
-- [Docs Navigation](#docs-navigation)
-- [Choose Your Path](#choose-your-path)
-- [Reference Map](#reference-map)
+- [Quick Orientation](#quick-orientation)
+- [Build & Run Projects](#build--run-projects)
+- [Modeling & Configuration](#modeling--configuration)
+- [Execution & State Management](#execution--state-management)
+- [Testing & Data Quality](#testing--data-quality)
+- [Docs, Debugging & Operations](#docs-debugging--operations)
+- [Examples & Tutorials](#examples--tutorials)
+- [Reference & Contribution](#reference--contribution)
 - [Need Help?](#need-help)
 
 ---
 
-## Choose Your Path
+## Quick Orientation
+
+- **New to FFT?** Read the [Quickstart](Quickstart.md) for installation (venv + editable install), seeding, and the first `fft run`.
+- **Want the bigger picture?** The [Technical Overview](Technical_Overview.md) explains the project layout, DAG, scheduler, registry, executors, and the roadmap snapshot.
+- **Learning the CLI surface area?** Browse the [CLI Guide](CLI_Guide.md) for command groups such as `fft run`, `fft snapshot run`, `fft dag`, `fft docgen`, `fft test`, and `fft utest`.
+
+---
+
+## Build & Run Projects
+
+- **Project layout & CLI workflow:** Pair the “Project Layout” chapter of the [Technical Overview](Technical_Overview.md#project-layout) with the [CLI Guide](CLI_Guide.md) to understand how `fft run`, `fft test`, and `fft dag` fit together.
+- **Profiles & environments:** [Profiles & Environments](Profiles.md) covers executor profiles, environment overrides, credential handling, and engine-specific flags.
+- **Runtimes & observability flags:** [Logging & Verbosity](Logging.md) explains log levels, JSON logs, progress indicators, and metrics toggles during `fft run`.
+- **Local runtimes & engines:** [Local Engine Setup](examples/Local_Engine_Setup.md) walks through DuckDB, Postgres, Spark/Delta, BigQuery, and Snowflake Snowpark bootstrapping for the demos.
+
+---
+
+## Modeling & Configuration
+
+- **SQL + Python authoring model:** [API & Models](Api_Models.md) documents the Python node decorators, HTTP helper (`fastflowtransform.api.http`), and how `ref()` / `source()` bindings work in both SQL and Python models.
+- **Templates, macros, and config keys:** [Configuration & Macros](Config_and_Macros.md) lists the `config(...)` options, reusable macros, helper functions, and naming rules for `.ff.sql` / `.ff.py`.
+- **Project-level metadata:** [Project Configuration](Project_Config.md) describes `project.yml`, default materializations, tags, exposures, docs strings, and the `models/` hierarchy.
+- **Sources & seeds:** [Sources](Sources.md) shows how to register upstream tables/files, snapshots of raw data, and how state tracking interacts with sources.
+
+---
+
+## Execution & State Management
+
+- **Parallelism, caching & rebuilds:** [Cache & Parallelism](Cache_and_Parallelism.md) dives into the level-wise scheduler, fingerprint cache, and `--rebuild` / `--no-cache` behaviors.
+- **Incremental models:** [Incremental Processing](Incremental.md) explains merge vs append strategies, cleanup rules, and engine-specific hooks.
+- **Snapshots / history tables:** [Snapshots](Snapshots.md) documents the `materialized='snapshot'` config, timestamp vs check strategies, and the dedicated `fft snapshot run . --env <profile>` entrypoint.
+- **Selective runs:** [State Selection](State_Selection.md) covers `--selector`, `--select`, `--exclude`, `--changed`, and `--results` filters across DAGs.
+
+---
+
+## Testing & Data Quality
 
-### 1. Build & Operate Projects (Data Practitioners)
+- **Schema-bound YAML tests:** [YAML Tests](YAML_Tests.md) details how to define and run column-level constraints declared in `.yml`.
+- **Reusable data-quality suites:** [Data Quality Tests](Data_Quality_Tests.md) catalogs reconciliation, freshness, and anomaly rules that can attach to models or sources.
+- **Fast model unit tests:** [Unit Tests](Unit_Tests.md) shows how to author `.sql` / `.py` assertions, seed fixtures, and run them via `fft utest`.
+
+---
+
+## Docs, Debugging & Operations
+
+- **Auto-generated docs & lineage:** [Auto Docs](Auto_Docs.md) explains `fft dag --html`, `fft docgen`, JSON exports, and optional `sync-db-comments` for Postgres/Snowflake.
+- **Visibility & logging:** [Logging & Verbosity](Logging.md) lists CLI flags for structured logs, progress bars, and verbose executor info.
+- **Troubleshooting:** [Troubleshooting & Error Codes](Troubleshooting.md) enumerates the most common failures, retry strategies, and diagnostic commands.
+
+---
 
-- **Get set up quickly:** follow the dedicated [Quickstart](Quickstart.md) guide for installation, seeding, and a first run.
-- **Need local runtimes?** The [API demo local engine setup](examples/Local_Engine_Setup.md) walks through DuckDB, Postgres, Databricks Spark, BigQuery, and Snowflake Snowpark.
-- **Understand the project layout & CLI workflow:** start with *Project Layout* in the [Technical Overview](Technical_Overview.md#project-layout) and pair it with the [CLI Guide](CLI_Guide.md) for command patterns.
-- **Configure runtimes & profiles:** review executor profiles and environment overrides in the dedicated [Profiles guide](Profiles.md) plus [Logging & Verbosity](Logging.md) for observability flags.
-- **Model data quality & troubleshoot runs:** combine the [Model Unit Tests guide](Unit_Tests.md) with [Troubleshooting & Error Codes](Troubleshooting.md) to keep runs deterministic and easy to debug.
-- **Explore runnable demos:** start with the [Basic Demo Overview](examples/Basic_Demo.md) or browse the `examples/` directory; each subproject ships with its own README.
+## Examples & Tutorials
 
-### 2. Extend FastFlowTransform (Developers & Contributors)
+- **Core walkthroughs:** [Basic Demo](examples/Basic_Demo.md) and [Materializations Demo](examples/Materializations_Demo.md) cover the standard table/view/incremental builds and DAG navigation.
+- **Testing-focused:** [Data Quality Tests Demo](examples/DQ_Demo.md) and [Macros Demo](examples/Macros_Demo.md) showcase advanced assertions and templating.
+- **Performance & state:** [Cache Demo](examples/Cache_Demo.md), [Environment Matrix Demo](examples/Environment_Matrix.md), and [Incremental Demo](examples/Incremental_Demo.md) highlight rebuilds and selective runs.
+- **API & integrations:** [API Demo](examples/API_Demo.md) illustrates Python HTTP models; [Local Engine Setup](examples/Local_Engine_Setup.md) provides engine-specific Makefiles.
+- **History tracking:** [Snapshot Demo](examples/Snapshot_Demo.md) demonstrates the snapshot materialization end-to-end with timestamp/check strategies.
 
-- **Dive into architecture & core modules:** start with [Architecture Overview](Technical_Overview.md#architecture-overview) and [Core Modules](Technical_Overview.md#core-modules) for registry, DAG, executors, validation, and more.
-- **Add tests & seeds:** reuse the curated demos under `docs/examples/` for seeds/Makefiles and follow the [Model Unit Tests guide](Unit_Tests.md) for deterministic fixtures.
-- **Contribute code:** follow the workflow described in [`./Contributing.md`](./Contributing.md) and consult the module-level docs for internal APIs.
-- **Plan ahead:** check the roadmap snapshot in the [Technical Overview](Technical_Overview.md#roadmap-snapshot) to understand upcoming work.
+All demos live in the top-level `examples/` directory and ship with Makefiles plus runnable seeds.
 
 ---
 
-## Reference Map
+## Reference & Contribution
 
-- **Modeling reference** — Jinja configuration, macros, helper functions: [`Config_and_Macros.md`](Config_and_Macros.md)
-- **CLI entry point & commands** — `src/fastflowtransform/cli.py`
-- **Registry & node loading** — `src/fastflowtransform/core.py`
-- **Unit test runner** — `src/fastflowtransform/utest.py`
-- **Rendered DAG templates** — `src/fastflowtransform/docs/templates/`
+- **API reference:** Browse the generated [API Reference](reference/) (MkDocStrings) for public functions, classes, and executors under `src/fastflowtransform`.
+- **Architecture internals:** The [Technical Overview](Technical_Overview.md#part-ii-architecture-internals) dives into registries, DAG building, validation, and engine abstractions.
+- **Contributing:** Follow [Contributing.md](Contributing.md) for dev environment setup (`uv`, `pyproject.toml`), coding standards, tests, and PR expectations.
+- **License:** Apache 2.0 — see [License.md](License.md).
 
 ---
 
 ## Need Help?
 
-- Open an issue or PR — see [`./Contributing.md`](./Contributing.md) for guidelines.
-- Join the discussion (planning doc / roadmap highlights) — see the roadmap section in the [Technical Overview](Technical_Overview.md#roadmap-snapshot).
-- If you spot gaps in the docs, file an issue with the context and links to the relevant section.
+- Open an issue or PR with context — start with [Contributing.md](Contributing.md) if you want to propose changes.
+- Surface documentation gaps, bugs, or missing examples via GitHub issues in [MirrorsAndMisdirections/FastFlowTransform](https://github.com/MirrorsAndMisdirections/FastFlowTransform).
+- For roadmap highlights or planning threads, check the final section of the [Technical Overview](Technical_Overview.md#roadmap-snapshot).
diff --git a/examples/incremental_demo/.env.dev_bigquery_bigframes b/examples/snapshot_demo/.env.dev_bigquery_bigframes
similarity index 100%
rename from examples/incremental_demo/.env.dev_bigquery_bigframes
rename to examples/snapshot_demo/.env.dev_bigquery_bigframes
diff --git a/examples/incremental_demo/.env.dev_bigquery_pandas b/examples/snapshot_demo/.env.dev_bigquery_pandas
similarity index 100%
rename from examples/incremental_demo/.env.dev_bigquery_pandas
rename to examples/snapshot_demo/.env.dev_bigquery_pandas
diff --git a/examples/snapshot_demo/.env.dev_databricks b/examples/snapshot_demo/.env.dev_databricks
new file mode 100644
index 0000000..a8d5dc9
--- /dev/null
+++ b/examples/snapshot_demo/.env.dev_databricks
@@ -0,0 +1,16 @@
+# Databricks (or local Spark) defaults for the snapshot demo
+FF_SPARK_MASTER=local[*]
+FF_SPARK_APP_NAME=snapshot_demo
+
+# Optional overrides for Databricks SQL warehouses or Unity Catalog
+FF_DBR_DATABASE=snapshot_demo
+# FF_DBR_CATALOG=hive_metastore
+
+# Enable a local Hive-compatible metastore (required for snapshots when running Spark standalone)
+FF_DBR_ENABLE_HIVE=1
+
+# Configure Java for local Spark sessions when needed
+# JAVA_HOME=/opt/homebrew/opt/openjdk@17
+
+# To target Delta Lake tables explicitly, set:
+# FF_DBR_TABLE_FORMAT=delta
diff --git a/examples/snapshot_demo/.env.dev_databricks_delta b/examples/snapshot_demo/.env.dev_databricks_delta
new file mode 100644
index 0000000..0cfccaf
--- /dev/null
+++ b/examples/snapshot_demo/.env.dev_databricks_delta
@@ -0,0 +1,16 @@
+# Databricks Spark (Delta) profile defaults for the snapshot demo
+FF_SPARK_MASTER=local[*]
+FF_SPARK_APP_NAME=snapshot_demo
+
+# Managed metastore/database when running Spark locally
+FF_DBR_ENABLE_HIVE=1
+FF_DBR_DATABASE=snapshot_demo
+# Optional: Unity Catalog
+# FF_DBR_CATALOG=hive_metastore
+
+# Switch the managed table format (parquet|delta|iceberg)
+# Defaults to parquet unless Makefile/CLI overrides FF_DBR_TABLE_FORMAT
+# FF_DBR_TABLE_FORMAT=delta
+
+# Configure Java for local Spark sessions when needed
+# JAVA_HOME=/opt/homebrew/opt/openjdk@17
diff --git a/examples/snapshot_demo/.env.dev_databricks_iceberg b/examples/snapshot_demo/.env.dev_databricks_iceberg
new file mode 100644
index 0000000..d82e3c4
--- /dev/null
+++ b/examples/snapshot_demo/.env.dev_databricks_iceberg
@@ -0,0 +1,13 @@
+# Databricks Spark (Iceberg) profile defaults for the snapshot demo
+FF_SPARK_MASTER=local[*]
+FF_SPARK_APP_NAME=snapshot_demo
+
+# Managed Iceberg catalog metadata lives under .local/iceberg_warehouse_snapshot
+# The profile wires the Iceberg catalog; just ensure the directory is writable.
+FF_DBR_DATABASE=snapshot_demo
+
+# Tell FFT/Spark to request Iceberg tables (Makefile also injects this)
+FF_DBR_TABLE_FORMAT=iceberg
+
+# Configure Java for local Spark sessions when needed
+# JAVA_HOME=/opt/homebrew/opt/openjdk@17
diff --git a/examples/snapshot_demo/.env.dev_duckdb b/examples/snapshot_demo/.env.dev_duckdb
new file mode 100644
index 0000000..00a0889
--- /dev/null
+++ b/examples/snapshot_demo/.env.dev_duckdb
@@ -0,0 +1,2 @@
+# DuckDB profile for the snapshot demo
+FF_DUCKDB_PATH=.local/snapshot_demo.duckdb
diff --git a/examples/snapshot_demo/.env.dev_postgres b/examples/snapshot_demo/.env.dev_postgres
new file mode 100644
index 0000000..3f80dd1
--- /dev/null
+++ b/examples/snapshot_demo/.env.dev_postgres
@@ -0,0 +1,3 @@
+# Postgres profile for the basic demo (replace with your own connection string)
+FF_PG_DSN=postgresql+psycopg://postgres:postgres@localhost:5432
+FF_PG_SCHEMA=snapshot_demo
diff --git a/examples/snapshot_demo/.env.dev_snowflake b/examples/snapshot_demo/.env.dev_snowflake
new file mode 100644
index 0000000..b6410cb
--- /dev/null
+++ b/examples/snapshot_demo/.env.dev_snowflake
@@ -0,0 +1,18 @@
+# Snowflake Snowpark profile for the incremental demo
+
+# Your Snowflake account identifier, e.g. xy12345.eu-central-1
+FF_SF_ACCOUNT=your_account_id
+
+# Username & password (or extend for keypair auth)
+FF_SF_USER=your_username
+FF_SF_PASSWORD=your_password
+
+# Compute warehouse
+FF_SF_WAREHOUSE=COMPUTE_WH
+
+# Database & schema for the demo
+FF_SF_DATABASE=EXAMPLE_DEMO
+FF_SF_SCHEMA=SNAPSHOT_DEMO
+
+# Optional role (can be left blank)
+FF_SF_ROLE=ANALYST
diff --git a/examples/snapshot_demo/Makefile b/examples/snapshot_demo/Makefile
new file mode 100644
index 0000000..7389e16
--- /dev/null
+++ b/examples/snapshot_demo/Makefile
@@ -0,0 +1,138 @@
+.PHONY: seed run snapshot test dag show artifacts clean demo help
+
+# --- Configuration -----------------------------------------------------------
+
+DB ?= .local/snapshot_demo.duckdb
+PROJECT ?= .
+UV ?= uv
+
+# Engine selector (duckdb|postgres|databricks_spark|bigquery)
+ENGINE ?= duckdb
+
+# BigQuery frame type selector (pandas | bigframes)
+BQ_FRAME ?= bigframes
+
+# Databricks table format (parquet | delta | iceberg)
+DBR_TABLE_FORMAT ?= parquet
+
+# Resolve profile and tags per engine
+ifeq ($(ENGINE),duckdb)
+  PROFILE_ENV = dev_duckdb
+  ENGINE_TAG = engine:duckdb
+endif
+ifeq ($(ENGINE),postgres)
+  PROFILE_ENV = dev_postgres
+  ENGINE_TAG = engine:postgres
+endif
+ifeq ($(ENGINE),databricks_spark)
+  ENGINE_TAG = engine:databricks_spark
+  ifeq ($(DBR_TABLE_FORMAT),delta)
+    PROFILE_ENV = dev_databricks_delta
+  else ifeq ($(DBR_TABLE_FORMAT),iceberg)
+    PROFILE_ENV = dev_databricks_iceberg
+  else
+    PROFILE_ENV = dev_databricks_delta
+  endif
+endif
+ifeq ($(ENGINE),bigquery)
+  ENGINE_TAG = engine:bigquery
+  ifeq ($(BQ_FRAME),pandas)
+    PROFILE_ENV = dev_bigquery_pandas
+  else
+    PROFILE_ENV = dev_bigquery_bigframes
+  endif
+endif
+ifeq ($(ENGINE),snowflake_snowpark)
+  PROFILE_ENV = dev_snowflake
+  ENGINE_TAG = engine:snowflake_snowpark
+endif
+
+BASE_ENV = FFT_ACTIVE_ENV=$(PROFILE_ENV) FF_ENGINE=$(ENGINE)
+
+ifeq ($(ENGINE),bigquery)
+  BASE_ENV := $(BASE_ENV) FF_ENGINE_VARIANT=$(BQ_FRAME)
+endif
+ifeq ($(ENGINE),databricks_spark)
+  BASE_ENV := $(BASE_ENV) FF_DBR_TABLE_FORMAT=$(DBR_TABLE_FORMAT)
+endif
+
+RUN_ENV = $(BASE_ENV)
+
+# Select only models belonging to this example + the active engine
+SELECT_FLAGS = --select tag:example:snapshot_demo --select tag:$(ENGINE_TAG)
+
+SHOW_MODEL ?= mart_users_by_domain
+
+CLEAN_SCRIPT = ../_scripts/cleanup_env.py
+
+ifeq ($(ENGINE),duckdb)
+  CLEAN_CMD = env $(BASE_ENV) $(UV) run python $(CLEAN_SCRIPT) --engine duckdb --env "$(PROFILE_ENV)" --project "$(PROJECT)" --duckdb-path "$(DB)"
+else ifeq ($(ENGINE),postgres)
+  CLEAN_CMD = env $(BASE_ENV) $(UV) run python $(CLEAN_SCRIPT) --engine postgres --env "$(PROFILE_ENV)" --project "$(PROJECT)"
+else ifeq ($(ENGINE),databricks_spark)
+  CLEAN_CMD = env $(BASE_ENV) $(UV) run python $(CLEAN_SCRIPT) --engine databricks_spark --env "$(PROFILE_ENV)" --project "$(PROJECT)"
+else ifeq ($(ENGINE),bigquery)
+  CLEAN_CMD = env $(BASE_ENV) $(UV) run python $(CLEAN_SCRIPT) --engine bigquery --env "$(PROFILE_ENV)" --project "$(PROJECT)"
+else ifeq ($(ENGINE),snowflake_snowpark)
+  CLEAN_CMD = env $(BASE_ENV) $(UV) run python $(CLEAN_SCRIPT) --engine snowflake_snowpark --env "$(PROFILE_ENV)" --project "$(PROJECT)"
+else
+  $(error Unsupported ENGINE=$(ENGINE) - pick duckdb|postgres|databricks_spark|bigquery)
+endif
+
+# --- Targets ----------------------------------------------------------------
+
+help:
+	@echo "FastFlowTransform Snapshot Demo"
+	@echo "Targets:"
+	@echo "  make seed       ENGINE=$(ENGINE)"
+	@echo "  make run        ENGINE=$(ENGINE)    # staging + marts"
+	@echo "  make snapshot   ENGINE=$(ENGINE)    # snapshot models (materialized='snapshot')"
+	@echo "  make dag        ENGINE=$(ENGINE)"
+	@echo "  make test       ENGINE=$(ENGINE)"
+	@echo "  make show       ENGINE=$(ENGINE) SHOW_MODEL=$(SHOW_MODEL)"
+	@echo "  make demo       ENGINE=$(ENGINE)"
+	@echo "  make clean      ENGINE=$(ENGINE)"
+	@echo
+	@echo "Variables: DB=$(DB) PROJECT=$(PROJECT) UV=$(UV) ENGINE=$(ENGINE) BQ_FRAME=$(BQ_FRAME) DBR_TABLE_FORMAT=$(DBR_TABLE_FORMAT)"
+
+seed:
+	env $(BASE_ENV) $(UV) run fft seed "$(PROJECT)" --env $(PROFILE_ENV)
+
+run:
+	env $(RUN_ENV) $(UV) run fft run "$(PROJECT)" --env $(PROFILE_ENV) $(SELECT_FLAGS)
+
+snapshot:
+	env $(RUN_ENV) $(UV) run fft snapshot run "$(PROJECT)" --env $(PROFILE_ENV) $(SELECT_FLAGS)
+
+test:
+	env $(BASE_ENV) $(UV) run fft test "$(PROJECT)" --env $(PROFILE_ENV) $(SELECT_FLAGS)
+
+dag:
+	env $(RUN_ENV) $(UV) run fft dag "$(PROJECT)" --env $(PROFILE_ENV) $(SELECT_FLAGS) --html
+
+show:
+	@if [ -f "$(PROJECT)/site/dag/index.html" ]; then \
+		$(OPENER) "$(PROJECT)/site/dag/index.html" 2>/dev/null || echo "Open manually at: $(PROJECT)/site/dag/index.html"; \
+	else \
+		echo "No HTML found: $(PROJECT)/site/dag/index.html"; \
+	fi
+
+artifacts:
+	@echo
+	@echo "== 📦 Artifacts =="
+	@echo "  $(PROJECT)/.fastflowtransform/target/{manifest.json,run_results.json,catalog.json}"
+	@echo "  DAG HTML: $(PROJECT)/site/dag/index.html"
+
+clean:
+	$(CLEAN_CMD)
+
+demo: clean
+	@echo "== 🚀 Snapshot Demo ($(ENGINE)) =="
+	@echo "Profile=$(PROFILE_ENV)  PROJECT=$(PROJECT)  DBR_TABLE_FORMAT=$(DBR_TABLE_FORMAT)"
+	+$(MAKE) seed ENGINE=$(ENGINE)
+	+$(MAKE) run ENGINE=$(ENGINE)
+	+$(MAKE) snapshot ENGINE=$(ENGINE)
+	+$(MAKE) dag ENGINE=$(ENGINE)
+	+$(MAKE) test ENGINE=$(ENGINE)
+	+$(MAKE) artifacts
+	@echo "✅ Demo complete."
diff --git a/examples/snapshot_demo/README.md b/examples/snapshot_demo/README.md
new file mode 100644
index 0000000..61a7ab6
--- /dev/null
+++ b/examples/snapshot_demo/README.md
@@ -0,0 +1,36 @@
+# Snapshot demo
+
+`examples/snapshot_demo` reuses the basic users pipeline and adds snapshot models that keep
+slowly-changing history tables. It now mirrors the incremental demo by supporting Spark parquet,
+Delta Lake, and Iceberg targets through the Databricks/Spark executor.
+
+## Environment files
+
+Copy one of the `.env.dev_*` files and export it before running `make`:
+
+| File | Purpose |
+| --- | --- |
+| `.env.dev_duckdb` | Local DuckDB file for the demo |
+| `.env.dev_postgres` | Postgres DSN/schema |
+| `.env.dev_databricks_delta` | Local Spark or Databricks defaults for parquet/Delta tables |
+| `.env.dev_databricks_iceberg` | Spark 4+/Databricks configuration with the Iceberg catalog wired in |
+
+`FF_DBR_TABLE_FORMAT` can always override the physical Spark table format (`parquet`, `delta`,
+`iceberg`) even if the profile defaults differ.
+
+## Running the demo
+
+```bash
+# DuckDB / Postgres
+make snapshot_demo ENGINE=duckdb
+make snapshot_demo ENGINE=postgres
+
+# Databricks / Spark: switch table format via DBR_TABLE_FORMAT
+make snapshot_demo ENGINE=databricks_spark DBR_TABLE_FORMAT=parquet
+make snapshot_demo ENGINE=databricks_spark DBR_TABLE_FORMAT=delta
+make snapshot_demo ENGINE=databricks_spark DBR_TABLE_FORMAT=iceberg
+```
+
+Under the hood `make snapshot_demo` executes `fft seed`, `fft run`, `fft snapshot run`, `fft dag`,
+and `fft test` for the models tagged with `example:snapshot_demo`. Use `make snapshot ENGINE=...`
+if you only want to update the snapshot materializations after a regular `fft run`.
diff --git a/examples/snapshot_demo/models/README.md b/examples/snapshot_demo/models/README.md
new file mode 100644
index 0000000..cb87b64
--- /dev/null
+++ b/examples/snapshot_demo/models/README.md
@@ -0,0 +1,20 @@
+# Models directory (snapshot_demo)
+
+This demo shows how to:
+
+- Clean seed data into a staging view.
+- Build a small mart (`mart_users_by_domain`).
+- Maintain two slowly-changing snapshot tables:
+
+  - `users_clean_snapshot` – timestamp-based snapshot of the staging view.
+  - `mart_users_by_domain_snapshot` – check-based snapshot of the mart.
+
+The snapshot models use:
+
+```jinja
+materialized='snapshot'
+strategy='timestamp' | 'check'
+unique_key='...'
+updated_at='...'
+check_cols=['...']
+```
\ No newline at end of file
diff --git a/examples/snapshot_demo/models/marts/mart_users_by_domain.ff.sql b/examples/snapshot_demo/models/marts/mart_users_by_domain.ff.sql
new file mode 100644
index 0000000..bbeab40
--- /dev/null
+++ b/examples/snapshot_demo/models/marts/mart_users_by_domain.ff.sql
@@ -0,0 +1,28 @@
+{{ config(
+    materialized='table',
+    tags=[
+        'example:snapshot_demo',
+        'scope:mart',
+        'engine:duckdb',
+        'engine:postgres',
+        'engine:databricks_spark',
+        'engine:bigquery',
+        'engine:snowflake_snowpark',
+    ],
+) }}
+
+with base as (
+    select
+        email_domain,
+        signup_date
+    from {{ ref('users_clean.ff') }}
+)
+
+select
+    email_domain,
+    count(*) as user_count,
+    min(signup_date) as first_signup,
+    max(signup_date) as last_signup
+from base
+group by email_domain
+order by email_domain;
diff --git a/examples/snapshot_demo/models/snapshots/mart_users_by_domain_snapshot.ff.sql b/examples/snapshot_demo/models/snapshots/mart_users_by_domain_snapshot.ff.sql
new file mode 100644
index 0000000..19c9fc3
--- /dev/null
+++ b/examples/snapshot_demo/models/snapshots/mart_users_by_domain_snapshot.ff.sql
@@ -0,0 +1,24 @@
+{{ config(
+    materialized='snapshot',
+    snapshot={
+        'strategy': 'check',
+        'check_cols': ['user_count', 'first_signup', 'last_signup'],
+    },
+    unique_key='email_domain',
+    tags=[
+        'example:snapshot_demo',
+        'scope:snapshot',
+        'engine:duckdb',
+        'engine:postgres',
+        'engine:databricks_spark',
+        'engine:bigquery',
+        'engine:snowflake_snowpark',
+    ],
+) }}
+
+select
+    email_domain,
+    user_count,
+    first_signup,
+    last_signup
+from {{ ref('mart_users_by_domain.ff') }};
diff --git a/examples/snapshot_demo/models/snapshots/users_clean_snapshot.ff.sql b/examples/snapshot_demo/models/snapshots/users_clean_snapshot.ff.sql
new file mode 100644
index 0000000..c068eec
--- /dev/null
+++ b/examples/snapshot_demo/models/snapshots/users_clean_snapshot.ff.sql
@@ -0,0 +1,24 @@
+{{ config(
+    materialized='snapshot',
+    snapshot={
+        'strategy': 'timestamp',
+        'updated_at': 'signup_date',
+    },
+    unique_key='user_id',
+    tags=[
+        'example:snapshot_demo',
+        'scope:snapshot',
+        'engine:duckdb',
+        'engine:postgres',
+        'engine:databricks_spark',
+        'engine:bigquery',
+        'engine:snowflake_snowpark',
+    ],
+) }}
+
+select
+    user_id,
+    email,
+    email_domain,
+    signup_date
+from {{ ref('users_clean.ff') }};
diff --git a/examples/snapshot_demo/models/staging/users_clean.ff.sql b/examples/snapshot_demo/models/staging/users_clean.ff.sql
new file mode 100644
index 0000000..7d8a674
--- /dev/null
+++ b/examples/snapshot_demo/models/staging/users_clean.ff.sql
@@ -0,0 +1,27 @@
+{{ config(
+    materialized='table',
+    tags=[
+        'example:snapshot_demo',
+        'scope:staging',
+        'engine:duckdb',
+        'engine:postgres',
+        'engine:databricks_spark',
+        'engine:bigquery',
+        'engine:snowflake_snowpark',
+    ],
+) }}
+
+with raw_users as (
+    select
+        cast(id as integer) as user_id,
+        lower(email) as email,
+        cast(signup_date as date) as signup_date
+    from {{ source('crm', 'users') }}
+)
+
+select
+    user_id,
+    email,
+    regexp_replace(email, '^.*@', '') as email_domain,
+    signup_date
+from raw_users;
diff --git a/examples/snapshot_demo/profiles.yml b/examples/snapshot_demo/profiles.yml
new file mode 100644
index 0000000..c23c3bd
--- /dev/null
+++ b/examples/snapshot_demo/profiles.yml
@@ -0,0 +1,70 @@
+# Connection profiles for the snapshot demo.
+
+dev_duckdb:
+  engine: duckdb
+  duckdb:
+    path: "{{ env('FF_DUCKDB_PATH', '.local/snapshot_demo.duckdb') }}"
+
+dev_postgres:
+  engine: postgres
+  postgres:
+    dsn: "{{ env('FF_PG_DSN') }}"
+    db_schema: "{{ env('FF_PG_SCHEMA', 'snapshot_demo') }}"
+
+dev_databricks_delta:
+  engine: databricks_spark
+  databricks_spark:
+    master: "{{ env('FF_SPARK_MASTER', 'local[*]') }}"
+    app_name: "{{ env('FF_SPARK_APP_NAME', 'snapshot_demo') }}"
+    warehouse_dir: "{{ project_dir() }}/.local/spark_warehouse"
+    extra_conf:
+      spark.hadoop.javax.jdo.option.ConnectionURL: "jdbc:derby:{{ project_dir() }}/.local/metastore_db;create=true"
+      spark.hadoop.datanucleus.rdbms.datastoreAdapterClassName: "org.datanucleus.store.rdbms.adapter.DerbyAdapter"
+      spark.hadoop.datanucleus.schema.autoCreateAll: "true"
+      spark.hadoop.javax.jdo.option.ConnectionDriverName: "org.apache.derby.jdbc.EmbeddedDriver"
+      spark.driver.extraJavaOptions: "-Dderby.stream.error.file={{ project_dir() }}/.local/derby.log"
+
+dev_databricks_iceberg:
+  engine: databricks_spark
+  databricks_spark:
+    master: "{{ env('FF_SPARK_MASTER', 'local[*]') }}"
+    app_name: "{{ env('FF_SPARK_APP_NAME', 'snapshot_demo') }}"
+    warehouse_dir: "{{ project_dir() }}/.local/spark_warehouse"
+    table_format: "iceberg"
+    extra_conf:
+      spark.jars.packages: "org.apache.iceberg:iceberg-spark-runtime-4.0_2.13:1.10.0"
+      spark.sql.catalog.iceberg: "org.apache.iceberg.spark.SparkCatalog"
+      spark.sql.catalog.iceberg.type: "hadoop"
+      spark.sql.catalog.iceberg.warehouse: "file://{{ project_dir() }}/.local/iceberg_warehouse"
+      spark.sql.catalog.iceberg.write.metadata.version-hint.enabled: "false"
+      spark.sql.catalog.iceberg.read.metadata.version-hint.enabled: "false"
+
+dev_bigquery_bigframes:
+  engine: bigquery
+  bigquery:
+    project: "{{ env('FF_BQ_PROJECT') }}"
+    dataset: "{{ env('FF_BQ_DATASET', 'snapshot_demo') }}"
+    location: "{{ env('FF_BQ_LOCATION', 'EU') }}"
+    use_bigframes: true
+    # allow_create_dataset: true   # uncomment to auto-create dataset on first run
+
+dev_bigquery_pandas:
+  engine: bigquery
+  bigquery:
+    project: "{{ env('FF_BQ_PROJECT') }}"
+    dataset: "{{ env('FF_BQ_DATASET', 'snapshot_demo') }}"
+    location: "{{ env('FF_BQ_LOCATION', 'EU') }}"
+    use_bigframes: false
+    # allow_create_dataset: true   # uncomment to auto-create dataset on first run
+
+dev_snowflake:
+  engine: snowflake_snowpark
+  snowflake_snowpark:
+    account: "{{ env('FF_SF_ACCOUNT') }}"
+    user: "{{ env('FF_SF_USER') }}"
+    password: "{{ env('FF_SF_PASSWORD') }}"
+    warehouse: "{{ env('FF_SF_WAREHOUSE', 'COMPUTE_WH') }}"
+    database: "{{ env('FF_SF_DATABASE', 'EXAMPLE_DEMO') }}"
+    schema: "{{ env('FF_SF_SCHEMA', 'SNAPSHOT_DEMO') }}"
+    role: "{{ env('FF_SF_ROLE', '') }}"
+    allow_create_schema: true
diff --git a/examples/snapshot_demo/project.yml b/examples/snapshot_demo/project.yml
new file mode 100644
index 0000000..6ce1031
--- /dev/null
+++ b/examples/snapshot_demo/project.yml
@@ -0,0 +1,119 @@
+name: snapshot_demo
+version: "0.1"
+models_dir: models
+
+docs:
+  dag_dir: site/dag
+
+vars: {}
+
+models:
+  storage:
+    mart_users_by_domain.ff:
+      path: ".local/spark/events_base"
+      # format: parquet
+    users_clean.ff:
+      path: ".local/spark/users_clean"
+      # format: parquet
+    users_clean_snapshot.ff:
+      path: ".local/spark/users_clean_snapshot"
+      # format: parquet
+    mart_users_by_domain_snapshot.ff:
+      path: ".local/spark/mart_users_by_domain_snapshot"
+      # format: parquet
+
+tests:
+  # --------------------------------------------------------------------------
+  # Base tests (similar to basic_demo, slightly simplified)
+  # --------------------------------------------------------------------------
+  - type: not_null
+    table: users_clean
+    column: email_domain
+    tags: [example_snapshot_demo]
+
+  - type: unique
+    table: users_clean
+    column: user_id
+    tags: [example_snapshot_demo]
+
+  - type: unique
+    table: mart_users_by_domain
+    column: email_domain
+    tags: [example_snapshot_demo]
+
+  # --------------------------------------------------------------------------
+  # Snapshot tables – basic sanity on business keys
+  # --------------------------------------------------------------------------
+  - type: not_null
+    table: users_clean_snapshot
+    column: user_id
+    tags: [example_snapshot_demo]
+
+  - type: not_null
+    table: mart_users_by_domain_snapshot
+    column: email_domain
+    tags: [example_snapshot_demo]
+
+  # --------------------------------------------------------------------------
+  # Snapshot tables – snapshot metadata columns
+  #
+  # Assumes snapshot columns:
+  #   _ff_valid_from   – version start
+  #   _ff_valid_to     – version end (nullable for current rows)
+  #   _ff_is_current   – boolean flag for open row
+  #   _ff_updated_at   – source updated_at used for snapshotting
+  #
+  # Adjust the column names if your BaseExecutor constants differ.
+  # --------------------------------------------------------------------------
+
+  # users_clean_snapshot: every row must have a valid_from timestamp
+  - type: not_null
+    table: users_clean_snapshot
+    column: _ff_valid_from
+    tags: [example_snapshot_demo]
+
+  # users_clean_snapshot: is_current flag should always be set (true/false)
+  - type: not_null
+    table: users_clean_snapshot
+    column: _ff_is_current
+    tags: [example_snapshot_demo]
+
+  # users_clean_snapshot: updated_at metadata should be populated
+  - type: not_null
+    table: users_clean_snapshot
+    column: _ff_updated_at
+    tags: [example_snapshot_demo]
+
+  # mart_users_by_domain_snapshot: every row must have a valid_from timestamp
+  - type: not_null
+    table: mart_users_by_domain_snapshot
+    column: _ff_valid_from
+    tags: [example_snapshot_demo]
+
+  # mart_users_by_domain_snapshot: is_current flag should always be set (true/false)
+  - type: not_null
+    table: mart_users_by_domain_snapshot
+    column: _ff_is_current
+    tags: [example_snapshot_demo]
+
+  # mart_users_by_domain_snapshot: updated_at metadata should be populated
+  - type: not_null
+    table: mart_users_by_domain_snapshot
+    column: _ff_updated_at
+    tags: [example_snapshot_demo]
+
+  # --------------------------------------------------------------------------
+  # Optional: row-count sanity for snapshots
+  # (for the demo data – tweak or remove if you change the seed)
+  # --------------------------------------------------------------------------
+  - type: row_count_between
+    table: users_clean_snapshot
+    min_rows: 3
+    max_rows: 50
+    tags: [example_snapshot_demo]
+
+  - type: row_count_between
+    table: mart_users_by_domain_snapshot
+    min_rows: 1
+    max_rows: 50
+    tags: [example_snapshot_demo]
diff --git a/examples/snapshot_demo/seeds/README.md b/examples/snapshot_demo/seeds/README.md
new file mode 100644
index 0000000..2e553ed
--- /dev/null
+++ b/examples/snapshot_demo/seeds/README.md
@@ -0,0 +1,4 @@
+# Seeds directory
+
+Add CSV or Parquet files for reproducible seeds.
+Usage examples are covered in docs/Quickstart.md and docs/Config_and_Macros.md#13-seeds-sources-and-dependencies.
diff --git a/examples/snapshot_demo/seeds/seed_users.csv b/examples/snapshot_demo/seeds/seed_users.csv
new file mode 100644
index 0000000..e890383
--- /dev/null
+++ b/examples/snapshot_demo/seeds/seed_users.csv
@@ -0,0 +1,4 @@
+id,email,signup_date
+1,anna@example.com,2024-01-05
+2,bob@example.net,2024-02-11
+3,cara@example.org,2024-02-27
diff --git a/examples/snapshot_demo/sources.yml b/examples/snapshot_demo/sources.yml
new file mode 100644
index 0000000..d48deca
--- /dev/null
+++ b/examples/snapshot_demo/sources.yml
@@ -0,0 +1,8 @@
+version: 2
+
+sources:
+  - name: crm
+    tables:
+      - name: users
+        identifier: seed_users
+        description: Three sample users that populate the seed table.
diff --git a/examples/snapshot_demo/tests/dq/README.md b/examples/snapshot_demo/tests/dq/README.md
new file mode 100644
index 0000000..1acd01d
--- /dev/null
+++ b/examples/snapshot_demo/tests/dq/README.md
@@ -0,0 +1,4 @@
+# Data quality tests
+
+Store custom data-quality tests that run via `fft test` (docs/Data_Quality_Tests.md).
+Use this directory for schema-bound tests separate from unit specs.
diff --git a/examples/snapshot_demo/tests/unit/README.md b/examples/snapshot_demo/tests/unit/README.md
new file mode 100644
index 0000000..74f4550
--- /dev/null
+++ b/examples/snapshot_demo/tests/unit/README.md
@@ -0,0 +1,12 @@
+# Unit tests (snapshot_demo)
+
+Add YAML unit specs for fine-grained expectations, for example:
+
+- that `users_clean_snapshot` has at least N rows
+- that each `user_id` has at most one open (`ff_valid_to IS NULL`) row
+
+Invoke via:
+
+```bash
+fft utest . --env dev_duckdb
+```
\ No newline at end of file
diff --git a/examples_article/http_cache_demo/README.md b/examples_article/http_cache_demo/README.md
new file mode 100644
index 0000000..5e977f7
--- /dev/null
+++ b/examples_article/http_cache_demo/README.md
@@ -0,0 +1,7 @@
+# FastFlowTransform project scaffold
+
+This project was created with `fft init`.
+Next steps:
+1. Update `profiles.yml` with real connection details (docs/Profiles.md).
+2. Add sources in `sources.yml` and author models under `models/` (docs/Config_and_Macros.md).
+3. Seed sample data with `fft seed` and execute models with `fft run` (docs/Quickstart.md).
diff --git a/examples_article/http_cache_demo/docs/README.md b/examples_article/http_cache_demo/docs/README.md
new file mode 100644
index 0000000..69e73e7
--- /dev/null
+++ b/examples_article/http_cache_demo/docs/README.md
@@ -0,0 +1,4 @@
+# Project documentation
+
+Write operator or contributor notes here and keep them in sync with generated docs.
+See docs/Technical_Overview.md#auto-docs-and-lineage for `fft dag` / `fft docgen` guidance.
diff --git a/examples_article/http_cache_demo/models/README.md b/examples_article/http_cache_demo/models/README.md
new file mode 100644
index 0000000..32818bb
--- /dev/null
+++ b/examples_article/http_cache_demo/models/README.md
@@ -0,0 +1,4 @@
+# Models directory
+
+Place SQL (`*.ff.sql`) and Python (`*.ff.py`) models here.
+See docs/Config_and_Macros.md for modeling guidance and config options.
diff --git a/examples_article/http_cache_demo/models/todo_ingest.ff.py b/examples_article/http_cache_demo/models/todo_ingest.ff.py
new file mode 100644
index 0000000..3c5dcc2
--- /dev/null
+++ b/examples_article/http_cache_demo/models/todo_ingest.ff.py
@@ -0,0 +1,45 @@
+import pandas as pd
+from fastflowtransform import model
+from fastflowtransform.api.http import get_df
+
+
+# 1. Define the Paginator
+# This function runs after every request to determine what to do next.
+def offset_paginator(url, params, response_json):
+    # If the API returns an empty list, we are done.
+    if not response_json:
+        return None
+
+    # Otherwise, increment the page number
+    current_page = params.get("_page", 1)
+    if current_page >= 2:
+        return None
+    next_params = dict(params or {})
+    next_params["_page"] = current_page + 1
+    return {"next_request": {"params": next_params}}
+
+
+@model(name="todos_ingest")
+def fetch_todos() -> pd.DataFrame:
+    # 2. get_df handles the HTTP calls, caching, and conversion
+    df = get_df(
+        url="https://jsonplaceholder.typicode.com/todos",
+        params={"_page": 1, "_limit": 10},  # Start at page 1
+        paginator=offset_paginator,
+        # record_path is None because the root of the JSON is the list itself
+        record_path=None,
+    )
+
+    # 3. Apply transformation logic
+    # If we change THIS logic later, FFT won't re-fetch the API!
+
+    # Example: Mark high-priority items locally
+    df["priority"] = df["title"].apply(lambda x: "HIGH" if "delectus" in x else "NORMAL")
+
+    # New Logic: Filter rows
+    df = df[df["completed"] == False]
+
+    # New Logic: Uppercase titles
+    df["title"] = df["title"].str.upper()
+
+    return df
diff --git a/examples_article/http_cache_demo/profiles.yml b/examples_article/http_cache_demo/profiles.yml
new file mode 100644
index 0000000..d2a5bf7
--- /dev/null
+++ b/examples_article/http_cache_demo/profiles.yml
@@ -0,0 +1,13 @@
+# Profiles generated by `fft init`.
+# Update these placeholders as described in docs/Profiles.md.
+dev:
+  engine: duckdb
+  # DuckDB profile example. See docs/Profiles.md#engines-and-sections for details.
+  duckdb:
+    path: "{{ env('FF_DUCKDB_PATH', '.local/dev.duckdb') }}"  # Path to your DuckDB database file.
+
+# Default in-memory profile for quick experiments.
+default:
+  engine: duckdb
+  duckdb:
+    path: ":memory:"
diff --git a/examples_article/http_cache_demo/project.yml b/examples_article/http_cache_demo/project.yml
new file mode 100644
index 0000000..16a77d0
--- /dev/null
+++ b/examples_article/http_cache_demo/project.yml
@@ -0,0 +1,18 @@
+# Project configuration generated by `fft init`.
+# Read docs/Project_Config.md for the complete reference.
+name: http_cache_demo
+version: "0.1"
+models_dir: models
+
+docs:
+  # Adjust `dag_dir` to change where `fft dag --html` writes documentation (docs/Technical_Overview.md#auto-docs-and-lineage).
+  dag_dir: site/dag
+
+# Project-level variables accessible via {{ var('key') }} inside models.
+# Example:
+#   vars:
+#     run_date: "2024-01-01"
+vars: {}
+
+# Declare project-wide data quality checks under `tests`. See docs/Data_Quality_Tests.md.
+tests: []
diff --git a/examples_article/http_cache_demo/seeds/README.md b/examples_article/http_cache_demo/seeds/README.md
new file mode 100644
index 0000000..2e553ed
--- /dev/null
+++ b/examples_article/http_cache_demo/seeds/README.md
@@ -0,0 +1,4 @@
+# Seeds directory
+
+Add CSV or Parquet files for reproducible seeds.
+Usage examples are covered in docs/Quickstart.md and docs/Config_and_Macros.md#13-seeds-sources-and-dependencies.
diff --git a/examples_article/http_cache_demo/sources.yml b/examples_article/http_cache_demo/sources.yml
new file mode 100644
index 0000000..83436dc
--- /dev/null
+++ b/examples_article/http_cache_demo/sources.yml
@@ -0,0 +1,9 @@
+# Source declarations describe external tables. See docs/Sources.md for details.
+version: 2
+# sources:
+  # Example:
+  # - name: raw
+  #   schema: staging
+  #   tables:
+  #     - name: users
+  #       identifier: seed_users
diff --git a/examples_article/http_cache_demo/tests/dq/README.md b/examples_article/http_cache_demo/tests/dq/README.md
new file mode 100644
index 0000000..1acd01d
--- /dev/null
+++ b/examples_article/http_cache_demo/tests/dq/README.md
@@ -0,0 +1,4 @@
+# Data quality tests
+
+Store custom data-quality tests that run via `fft test` (docs/Data_Quality_Tests.md).
+Use this directory for schema-bound tests separate from unit specs.
diff --git a/examples_article/http_cache_demo/tests/unit/README.md b/examples_article/http_cache_demo/tests/unit/README.md
new file mode 100644
index 0000000..b3c3c8d
--- /dev/null
+++ b/examples_article/http_cache_demo/tests/unit/README.md
@@ -0,0 +1,4 @@
+# Unit tests
+
+Define YAML unit specs as described in docs/Config_and_Macros.md#73-model-unit-tests-fft-utest.
+Invoke them with `fft utest <project> --env <profile>`.
diff --git a/mkdocs.yml b/mkdocs.yml
index 1d17581..cfc4781 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -32,11 +32,11 @@ nav:
   - State Selection: State_Selection.md
   - YAML Tests: YAML_Tests.md
   - Data Quality Tests: Data_Quality_Tests.md
-  - API Reference: reference/
   - CLI Guide: CLI_Guide.md
   - Auto Docs: Auto_Docs.md
   - Logging: Logging.md
   - Unit Tests: Unit_Tests.md
+  - Snapshots: Snapshots.md
   - Troubleshooting: Troubleshooting.md
   - Examples:
       - Basic Demo: examples/Basic_Demo.md
@@ -48,6 +48,8 @@ nav:
       - Incremental Demo: examples/Incremental_Demo.md
       - API Demo: examples/API_Demo.md
       - Local Engine Setup: examples/Local_Engine_Setup.md
+      - Snapshot Demo: examples/Snapshot_Demo.md
+  - API Reference: reference/
   - Contributing: Contributing.md
   - License: License.md
 
diff --git a/src/fastflowtransform/api/context.py b/src/fastflowtransform/api/context.py
index 08937da..ff267e2 100644
--- a/src/fastflowtransform/api/context.py
+++ b/src/fastflowtransform/api/context.py
@@ -136,7 +136,7 @@ def record(
     if content_hash and (not stats.hashes or stats.hashes[-1] != content_hash):
         stats.hashes.append(content_hash)
 
-    stats.used_offline = bool(used_offline)
+    stats.used_offline = bool(stats.used_offline or used_offline)
 
 
 def snapshot() -> dict[str, Any]:
diff --git a/src/fastflowtransform/api/http.py b/src/fastflowtransform/api/http.py
index 75cab55..0abae81 100644
--- a/src/fastflowtransform/api/http.py
+++ b/src/fastflowtransform/api/http.py
@@ -133,6 +133,21 @@ def _write_cache(key: str, status: int, headers: dict, body: bytes, url: str) ->
     return meta
 
 
+def _maybe_json_payload(body: bytes) -> Any:
+    with suppress(Exception):
+        return json.loads(body.decode("utf-8"))
+    with suppress(Exception):
+        return json.loads(body)
+    return body
+
+
+def _json_payload(body: bytes) -> Any:
+    try:
+        return json.loads(body.decode("utf-8"))
+    except Exception:
+        return json.loads(body)
+
+
 def _http_request(
     method: str,
     url: str,
@@ -153,6 +168,115 @@ def _backoff_sleep(i: int) -> None:
     time.sleep(base + random.random() * 0.3 * base)
 
 
+def _request_with_cache(
+    method: str,
+    url: str,
+    params: dict | None,
+    headers: dict | None,
+    ttl: int | None,
+    timeout: float | None,
+) -> tuple[bytes, dict[str, Any]]:
+    hdrs = dict(headers or {})
+    key = _cache_key(method, url, params, hdrs)
+    meta, body, hit = _read_cache(key, ttl)
+    if hit:
+        meta_dict: dict[str, Any] = meta or {}
+        payload = body or b""
+        _ctx.record(
+            key,
+            meta_dict.get("content_hash", ""),
+            True,
+            len(payload),
+            used_offline=_OFFLINE,
+        )
+        return payload, meta_dict
+    if _OFFLINE:
+        raise RuntimeError(f"HTTP offline mode - cache miss for {url}")
+
+    tries = max(_DEF_MAX_RETRIES, 1)
+    for i in range(tries):
+        try:
+            status, resp_headers, resp_body = _http_request(
+                method, url, params=params, headers=hdrs, timeout=timeout
+            )
+        except _HTTP.TimeoutException as exc:
+            if i < tries - 1:
+                _backoff_sleep(i)
+                continue
+            raise RuntimeError(f"HTTP timeout after {timeout or _DEF_TIMEOUT}s for {url}") from exc
+        except _HTTP.RequestError as exc:
+            if i < tries - 1:
+                _backoff_sleep(i)
+                continue
+            raise RuntimeError(f"HTTP request error for {url}: {exc}") from exc
+        if status in (429, 500, 502, 503, 504) and i < tries - 1:
+            ra = resp_headers.get("Retry-After")
+            if ra:
+                try:
+                    time.sleep(float(ra))
+                except Exception:
+                    _backoff_sleep(i)
+            else:
+                _backoff_sleep(i)
+            continue
+        http_status_200 = 200
+        http_status_300 = 300
+        http_status_304 = 304
+        if http_status_200 <= status < http_status_300 or status == http_status_304:
+            meta_out = _write_cache(key, status, resp_headers, resp_body, url)
+            _ctx.record(
+                key,
+                meta_out.get("content_hash", ""),
+                False,
+                len(resp_body),
+                used_offline=False,
+            )
+            return resp_body, meta_out
+        raise RuntimeError(f"HTTP {status} for {url}")
+    raise RuntimeError(f"HTTP error after retries for {url}")
+
+
+def _collect_pages(
+    method: str,
+    url: str,
+    params: dict | None,
+    headers: dict[str, Any],
+    ttl: int | None,
+    timeout: float | None,
+    paginator: Callable[[str, dict | None, Any], dict | None] | None,
+    *,
+    keep_payload: bool,
+    payload_factory: Callable[[bytes], Any] | None,
+) -> list[tuple[bytes, dict[str, Any], Any]]:
+    cur_url = url
+    cur_params = params
+    cur_headers = dict(headers or {})
+    pages: list[tuple[bytes, dict[str, Any], Any]] = []
+    while True:
+        body, meta = _request_with_cache(method, cur_url, cur_params, cur_headers, ttl, timeout)
+        need_payload = keep_payload or paginator is not None
+        payload = None
+        if payload_factory and need_payload:
+            payload = payload_factory(body)
+        stored_payload = payload if keep_payload else None
+        pages.append((body, meta, stored_payload))
+        if paginator is None:
+            break
+        nxt = paginator(cur_url, cur_params, payload)
+        if not nxt:
+            break
+        req = nxt.get("next_request") if isinstance(nxt, dict) else None
+        if not req:
+            break
+        cur_url = req.get("url") or cur_url
+        if "params" in req:
+            cur_params = req.get("params")
+        if "headers" in req:
+            nxt_headers = req.get("headers")
+            cur_headers = dict(nxt_headers) if nxt_headers is not None else {}
+    return pages
+
+
 # ---- Public API ---------------------------------------------------------
 def get(
     url: str,
@@ -160,13 +284,15 @@ def get(
     params: dict | None = None,
     headers: dict | None = None,
     ttl: int | None = None,
-    paginator: Callable[[str, dict | None, dict], dict | None] | None = None,
+    paginator: Callable[[str, dict | None, Any], dict | None] | None = None,
     timeout: float | None = None,
-) -> bytes:
+) -> bytes | list[bytes]:
     """
     Raw GET with optional FS cache and simple pagination.
     If paginator is provided, it should return
     {"next_request": {"url": "...", "params": {...}}} or None.
+    When pagination is active the result is a list of response bodies; otherwise
+    a single bytes object is returned.
     """
     if not _domain_ok(url):
         raise RuntimeError(f"HTTP domain not allowed by FF_HTTP_ALLOWED_DOMAINS: {url}")
@@ -174,70 +300,22 @@ def get(
     ttl = _DEF_TTL if ttl is None else ttl
     headers = dict(headers or {})
 
-    def _one(method: str, url_: str, params_: dict | None) -> tuple[bytes, dict]:
-        key = _cache_key(method, url_, params_, headers)
-        meta, body, hit = _read_cache(key, ttl)
-        if hit:
-            # meta can be None -> normalize to empty dict before accessing .get
-            meta_dict = meta or {}
-            _ctx.record(
-                key, meta_dict.get("content_hash", ""), True, len(body or b""), used_offline=True
-            )
-            return body or b"", meta_dict
-        if _OFFLINE:
-            raise RuntimeError(f"HTTP offline mode - cache miss for {url_}")
-
-        tries = max(_DEF_MAX_RETRIES, 1)
-        for i in range(tries):
-            try:
-                status, resp_headers, resp_body = _http_request(
-                    method, url_, params=params_, headers=headers, timeout=timeout
-                )
-            except _HTTP.TimeoutException as exc:
-                if i < tries - 1:
-                    _backoff_sleep(i)
-                    continue
-                raise RuntimeError(
-                    f"HTTP timeout after {timeout or _DEF_TIMEOUT}s for {url_}"
-                ) from exc
-            except _HTTP.RequestError as exc:
-                if i < tries - 1:
-                    _backoff_sleep(i)
-                    continue
-                raise RuntimeError(f"HTTP request error for {url_}: {exc}") from exc
-            if status in (429, 500, 502, 503, 504) and i < tries - 1:
-                # honor Retry-After (seconds) if present
-                ra = resp_headers.get("Retry-After")
-                if ra:
-                    try:
-                        time.sleep(float(ra))
-                    except Exception:
-                        _backoff_sleep(i)
-                else:
-                    _backoff_sleep(i)
-                continue
-            # write cache for any success or 304
-            http_status_200 = 200
-            http_status_300 = 300
-            http_status_304 = 304
-            if http_status_200 <= status < http_status_300 or status == http_status_304:
-                meta = _write_cache(key, status, resp_headers, resp_body, url_)
-                _ctx.record(
-                    key, meta.get("content_hash", ""), False, len(resp_body), used_offline=False
-                )
-                return resp_body, meta
-            raise RuntimeError(f"HTTP {status} for {url_}")
-        # should not reach
-        raise RuntimeError(f"HTTP error after retries for {url_}")
-
-    body, _ = _one("GET", url, params)
-    if not paginator:
+    if paginator is None:
+        body, _ = _request_with_cache("GET", url, params, headers, ttl, timeout)
         return body
 
-    # paginate: concatenated bytes are not helpful
-    # → collect JSON pages and join later in get_json/get_df
-    # Here we just return the first page; get_json/get_df implement paging across JSON.
-    return body
+    pages = _collect_pages(
+        "GET",
+        url,
+        params,
+        headers,
+        ttl,
+        timeout,
+        paginator,
+        keep_payload=False,
+        payload_factory=_maybe_json_payload,
+    )
+    return [body for body, _, _ in pages]
 
 
 def get_json(
@@ -246,37 +324,25 @@ def get_json(
     params: dict | None = None,
     headers: dict | None = None,
     ttl: int | None = None,
-    paginator: Callable[[str, dict | None, dict], dict | None] | None = None,
+    paginator: Callable[[str, dict | None, Any], dict | None] | None = None,
     timeout: float | None = None,
 ) -> Any:
     """GET returning parsed JSON. If paginator is provided, it follows pages via callback."""
     ttl = _DEF_TTL if ttl is None else ttl
     headers = dict(headers or {})
-
-    def _load_one(u: str, p: dict | None) -> tuple[Any, dict]:
-        raw = get(u, params=p, headers=headers, ttl=ttl, paginator=None, timeout=timeout)
-        try:
-            js = json.loads(raw.decode("utf-8"))
-        except Exception:
-            js = json.loads(raw)  # if already str
-        return js, {}
-
-    pages: list[Any] = []
-    u, p = url, params
-    while True:
-        js, _ = _load_one(u, p)
-        pages.append(js)
-        if paginator is None:
-            break
-        nxt = paginator(u, p, js)
-        if not nxt:
-            break
-        req = nxt.get("next_request")
-        if not req:
-            break
-        u = req.get("url") or u
-        p = req.get("params")
-    return pages[0] if paginator is None else pages
+    pages = _collect_pages(
+        "GET",
+        url,
+        params,
+        headers,
+        ttl,
+        timeout,
+        paginator,
+        keep_payload=True,
+        payload_factory=_json_payload,
+    )
+    payloads = [payload for _, _, payload in pages]
+    return payloads[0] if paginator is None else payloads
 
 
 MetaEntry = str | list[str]
@@ -293,7 +359,7 @@ def get_df(
     params: dict | None = None,
     headers: dict | None = None,
     ttl: int | None = None,
-    paginator: Callable[[str, dict | None, dict], dict | None] | None = None,
+    paginator: Callable[[str, dict | None, Any], dict | None] | None = None,
     json_path: list[str] | None = None,
     record_path: Sequence[str] | None = None,
     meta: MetaArgIn | None = None,
diff --git a/src/fastflowtransform/cli/__init__.py b/src/fastflowtransform/cli/__init__.py
index f3c8a2c..e67044c 100644
--- a/src/fastflowtransform/cli/__init__.py
+++ b/src/fastflowtransform/cli/__init__.py
@@ -62,6 +62,7 @@
     _selected_subgraph_names,
     _selector,
 )
+from fastflowtransform.cli.snapshot_cmd import register as _register_snapshot, snapshot
 from fastflowtransform.cli.sync_db_comments_cmd import (
     _pg_fq_table,
     _pg_quote_ident,
@@ -128,6 +129,7 @@ def main(
 _register_docgen(app)
 _register_sync_db_comments(app)
 _register_init(app)
+_register_snapshot(app)
 
 
 __all__ = [
@@ -191,6 +193,7 @@ def main(
     "run",
     "schedule",
     "seed",
+    "snapshot",
     "sync_db_comments",
     "test",
     "topo_sort",
diff --git a/src/fastflowtransform/cli/init_cmd.py b/src/fastflowtransform/cli/init_cmd.py
index 194ff4a..b6c42dd 100644
--- a/src/fastflowtransform/cli/init_cmd.py
+++ b/src/fastflowtransform/cli/init_cmd.py
@@ -11,7 +11,6 @@
     "duckdb",
     "postgres",
     "bigquery",
-    "bigquery_bf",
     "databricks_spark",
     "snowflake_snowpark",
 }
@@ -28,47 +27,51 @@ class _InitContext:
 def _build_profiles_yaml(ctx: _InitContext) -> str:
     engine_block = {
         "duckdb": [
-            "  # DuckDB profile example. See docs/Profiles.md#duckdb for details.",
+            "  # DuckDB profile example. See docs/Profiles.md#engines-and-sections for details.",
             "  duckdb:",
             "    path: \"{{ env('FF_DUCKDB_PATH', '.local/dev.duckdb') }}\"  # Path to your DuckDB database file.",  # Noqa E501
         ],
         "postgres": [
-            "  # Postgres profile example. See docs/Profiles.md#postgres for required keys.",
+            "  # Postgres profile example. See docs/Profiles.md#engines-and-sections "
+            "    for required keys.",
             "  postgres:",
             "    dsn: \"{{ env('FF_PG_DSN') }}\"  # Full Postgres DSN, e.g. postgresql://user:pass@host/db",
             "    db_schema: \"{{ env('FF_PG_SCHEMA', 'analytics') }}\"",
         ],
         "bigquery": [
-            "  # BigQuery profile example. See docs/Profiles.md#bigquery.",
+            "  # BigQuery profile example. See docs/Profiles.md#engines-and-sections.",
             "  bigquery:",
-            "    project: \"{{ env('FF_BQ_PROJECT') }}\"  # GCP project id.",
+            "    project: \"{{ env('FF_BQ_PROJECT') }}\"  # Optional if your ADC "
+            "    default project is set.",
             "    dataset: \"{{ env('FF_BQ_DATASET') }}\"  # Target dataset for models.",
-            "    location: US  # Update to match your dataset location.",
-        ],
-        "bigquery_bf": [
-            "  # BigQuery BigFrames profile example. See docs/Profiles.md#bigquery.",
-            "  bigquery_bf:",
-            "    project: \"{{ env('FF_BQ_PROJECT') }}\"",
-            "    dataset: \"{{ env('FF_BQ_DATASET') }}\"",
-            "    location: US",
+            "    location: \"{{ env('FF_BQ_LOCATION', 'US') }}\"  # Must match dataset location.",
+            "    use_bigframes: true  # Run Python models through BigQuery DataFrames (BigFrames).",
+            "    allow_create_dataset: false  # Set true to auto-create the dataset on first run.",
         ],
         "databricks_spark": [
-            "  # Databricks Spark profile example. See docs/Profiles.md#databricks-spark.",
+            "  # Databricks Spark profile example. See docs/Profiles.md#engines-and-sections.",
             "  databricks_spark:",
             "    master: \"{{ env('FF_SPARK_MASTER') }}\"  # e.g. spark://host:7077 or a Databricks cluster URL.",  # Noqa E501
             "    app_name: \"{{ env('FF_SPARK_APP_NAME', 'fft-project') }}\"",
             "    warehouse_dir: \"{{ env('FF_SPARK_WAREHOUSE', '/tmp/fft-warehouse') }}\"",
             "    use_hive_metastore: false",
+            "    extra_conf: {}  # Provide Spark conf overrides here.",
+            "    catalog: \"{{ env('FF_SPARK_CATALOG', '') }}\"  # Unity catalog (optional).",
+            "    database: \"{{ env('FF_SPARK_DATABASE', 'default') }}\"",
+            "    table_format: \"{{ env('FF_SPARK_TABLE_FORMAT', 'parquet') }}\"",
+            "    table_options: {}",
         ],
         "snowflake_snowpark": [
-            "  # Snowflake Snowpark profile example. See docs/Profiles.md#snowflake-snowpark.",
+            "  # Snowflake Snowpark profile example. See docs/Profiles.md#engines-and-sections.",
             "  snowflake_snowpark:",
             "    account: \"{{ env('FF_SF_ACCOUNT') }}\"",
             "    user: \"{{ env('FF_SF_USER') }}\"",
             "    password: \"{{ env('FF_SF_PASSWORD') }}\"",
             "    warehouse: \"{{ env('FF_SF_WAREHOUSE') }}\"",
             "    database: \"{{ env('FF_SF_DATABASE') }}\"",
-            "    db_schema: \"{{ env('FF_SF_SCHEMA', 'PUBLIC') }}\"",
+            "    schema: \"{{ env('FF_SF_SCHEMA', 'PUBLIC') }}\"",
+            "    role: \"{{ env('FF_SF_ROLE') }}\"",
+            "    allow_create_schema: true",
         ],
     }[ctx.engine]
 
@@ -100,7 +103,7 @@ def _build_project_yaml(ctx: _InitContext) -> str:
             "",
             "docs:",
             "  # Adjust `dag_dir` to change where `fft dag --html` writes documentation "
-            "(docs/Technical_Overview.md#documentation).",
+            "(docs/Technical_Overview.md#auto-docs-and-lineage).",
             "  dag_dir: site/dag",
             "",
             "# Project-level variables accessible via {{ var('key') }} inside models.",
@@ -122,7 +125,7 @@ def _build_sources_yaml() -> str:
         [
             "# Source declarations describe external tables. See docs/Sources.md for details.",
             "version: 2",
-            "sources:",
+            "# sources:",
             "  # Example:",
             "  # - name: raw",
             "  #   schema: staging",
@@ -170,13 +173,23 @@ def _create_directory_notes(target: Path) -> None:
                 "",
             ]
         ),
+        "tests/dq/README.md": "\n".join(
+            [
+                "# Data quality tests",
+                "",
+                "Store custom data-quality tests that run via `fft test` "
+                "(docs/Data_Quality_Tests.md).",
+                "Use this directory for schema-bound tests separate from unit specs.",
+                "",
+            ]
+        ),
         "docs/README.md": "\n".join(
             [
                 "# Project documentation",
                 "",
                 "Write operator or contributor notes here and keep "
                 "them in sync with generated docs.",
-                "See docs/Technical_Overview.md#documentation "
+                "See docs/Technical_Overview.md#auto-docs-and-lineage "
                 "for `fft dag` / `fft docgen` guidance.",
                 "",
             ]
@@ -220,7 +233,7 @@ def init(
             "--engine",
             help=(
                 "Executor engine for the default profile. "
-                "Supported values: duckdb, postgres, bigquery, bigquery_bf, "
+                "Supported values: duckdb, postgres, bigquery, "
                 "databricks_spark, snowflake_snowpark."
             ),
         ),
@@ -261,7 +274,7 @@ def init(
         engine=resolved_engine,
     )
 
-    for sub in ("models", "seeds", "tests/unit", "docs"):
+    for sub in ("models", "seeds", "tests/unit", "tests/dq", "docs"):
         (project_dir / sub).mkdir(parents=True, exist_ok=True)
 
     _write_file(project_dir / "project.yml", _build_project_yaml(ctx))
diff --git a/src/fastflowtransform/cli/run.py b/src/fastflowtransform/cli/run.py
index 0429d49..3930ffa 100644
--- a/src/fastflowtransform/cli/run.py
+++ b/src/fastflowtransform/cli/run.py
@@ -327,9 +327,21 @@ def _normalize_node_names_or_warn(names: list[str] | None) -> set[str]:
     out: set[str] = set()
     for tok in _parse_select(names or []):
         try:
-            out.add(REGISTRY.get_node(tok).name)
+            node = REGISTRY.get_node(tok)
         except KeyError:
             warn(f"Unknown model in --rebuild: {tok}")
+            continue
+
+        if _is_snapshot_model(node):
+            warn(
+                f"Ignoring snapshot model in --rebuild: {tok} "
+                "(snapshot models are not executed via 'fft run'; "
+                "use 'fft snapshot run' instead)."
+            )
+            continue
+
+        out.add(node.name)
+
     return out
 
 
@@ -344,6 +356,15 @@ def _abbr(e: str) -> str:
     return mapping.get(e, e.upper()[:4])
 
 
+def _is_snapshot_model(node: Any) -> bool:
+    """
+    Return True if this node is a snapshot model (materialized='snapshot').
+    """
+    meta = getattr(node, "meta", {}) or {}
+    mat = str(meta.get("materialized") or "").lower()
+    return mat == "snapshot"
+
+
 # ----------------- helpers (run function) -----------------
 
 
@@ -357,7 +378,11 @@ def _build_engine_ctx(project, env_name, engine, vars, cache, no_cache):
 
 
 def _select_predicate_and_raw(
-    executor_engine: _RunEngine, ctx: CLIContext, select: SelectOpt
+    executor_engine: _RunEngine,
+    ctx: CLIContext,
+    select: SelectOpt,
+    *,
+    include_snapshots: bool = False,
 ) -> tuple[list[str], Callable[[Any], bool], list[str]]:
     select_tokens = _parse_select(select or [])
     base_tokens = [t for t in select_tokens if not t.startswith("state:modified")]
@@ -369,7 +394,13 @@ def _select_predicate_and_raw(
         modified_set = executor_engine.cache.modified_set(ctx.jinja_env, executor)
         select_pred = augment_with_state_modified(select_tokens, base_pred, modified_set)
 
-    raw_selected = [k for k, v in REGISTRY.nodes.items() if select_pred(v)]
+    raw_selected = []
+    for k, v in REGISTRY.nodes.items():
+        if not select_pred(v):
+            continue
+        if not include_snapshots and _is_snapshot_model(v):
+            continue
+        raw_selected.append(k)
     return select_tokens, select_pred, raw_selected
 
 
diff --git a/src/fastflowtransform/cli/snapshot_cmd.py b/src/fastflowtransform/cli/snapshot_cmd.py
new file mode 100644
index 0000000..d09e499
--- /dev/null
+++ b/src/fastflowtransform/cli/snapshot_cmd.py
@@ -0,0 +1,173 @@
+from __future__ import annotations
+
+import typer
+
+from fastflowtransform.cli.bootstrap import CLIContext, _prepare_context
+from fastflowtransform.cli.options import (
+    EngineOpt,
+    EnvOpt,
+    ExcludeOpt,
+    JobsOpt,
+    KeepOpt,
+    ProjectArg,
+    SelectOpt,
+    VarsOpt,
+)
+from fastflowtransform.cli.run import (
+    CacheMode,
+    _attempt_catalog,
+    _emit_logs_and_errors,
+    _levels_for_run,
+    _run_schedule,
+    _RunEngine,
+    _select_predicate_and_raw,
+    _wanted_names,
+    _write_artifacts,
+)
+from fastflowtransform.core import REGISTRY, relation_for
+from fastflowtransform.executors.base import BaseExecutor
+from fastflowtransform.logging import bind_context, clear_context, echo
+
+snapshot = typer.Typer(help="Snapshot materialization commands.")
+
+
+class _SnapshotRunEngine(_RunEngine):
+    """
+    Variant of _RunEngine that calls executor.run_snapshot_sql(...) for
+    SQL nodes instead of the normal run_sql path.
+    """
+
+    def run_node(self, name: str) -> None:
+        node = REGISTRY.nodes[name]
+        ex, _run_sql_fn, _run_py_fn = self._get_runner()
+        if node.kind != "sql":
+            raise TypeError(
+                f"Snapshot run only supports SQL models, but node '{name}' is kind={node.kind!r}."
+            )
+        # No fingerprint / cache skipping: snapshots always execute.
+        ex.run_snapshot_sql(node, self.ctx.jinja_env)
+
+
+def _prune_snapshots(
+    executor: BaseExecutor, snapshot_names: set[str], keep_last: int, dry_run: bool
+) -> None:
+    """
+    Apply per-model pruning using executor.snapshot_prune(...) where available.
+    """
+    for name in sorted(snapshot_names):
+        node = REGISTRY.nodes[name]
+        meta = getattr(node, "meta", {}) or {}
+
+        unique_key = meta.get("unique_key") or meta.get("primary_key") or []
+        unique_key_list = [unique_key] if isinstance(unique_key, str) else list(unique_key or [])
+
+        if not unique_key_list:
+            echo(f"Skipping prune for {name}: missing unique_key/primary_key.")
+            continue
+
+        if not hasattr(executor, "snapshot_prune"):
+            eng = getattr(executor, "engine_name", "unknown")
+            echo(f"Skipping prune for {name}: snapshot_prune not implemented for engine '{eng}'.")
+            continue
+
+        rel = relation_for(name)
+        prefix = "[DRY-RUN] " if dry_run else ""
+        echo(f"{prefix}Pruning snapshot {name} (relation={rel}, keep_last={keep_last})")
+        executor.snapshot_prune(rel, unique_key_list, keep_last=keep_last, dry_run=dry_run)
+
+
+@snapshot.command("run")
+def snapshot_run(
+    project: ProjectArg = ".",
+    env_name: EnvOpt = "dev",
+    engine: EngineOpt = None,
+    vars: VarsOpt = None,
+    select: SelectOpt = None,
+    exclude: ExcludeOpt = None,
+    jobs: JobsOpt = 1,
+    keep_going: KeepOpt = False,
+    prune: bool = typer.Option(
+        False,
+        "--prune",
+        help="Prune historical snapshot rows after a successful run.",
+    ),
+    keep_last: int = typer.Option(
+        3,
+        "--keep-last",
+        min=1,
+        help="Number of latest versions per key to keep when pruning.",
+    ),
+    dry_run: bool = typer.Option(
+        False,
+        "--dry-run",
+        help="Show pruning actions without modifying any data.",
+    ),
+) -> None:
+    """
+    Execute only snapshot models (materialized='snapshot').
+
+    Selection works like `fft run` but the final set is restricted to snapshot
+    models. Use --prune/--keep-last/--dry-run for retention.
+    """
+    ctx: CLIContext = _prepare_context(project, env_name, engine, vars)
+    bind_context(engine=ctx.profile.engine, env=env_name)
+
+    engine_ = _SnapshotRunEngine(
+        ctx=ctx,
+        pred=None,
+        env_name=env_name,
+        cache_mode=CacheMode.OFF,  # snapshots always run; no cache skipping
+        force_rebuild=set(),
+    )
+
+    # Selection identical to run(), but we filter to snapshots afterwards.
+    select_tokens, _, raw_selected = _select_predicate_and_raw(
+        engine_, ctx, select, include_snapshots=True
+    )
+    wanted_all = _wanted_names(
+        select_tokens=select_tokens, exclude=exclude, raw_selected=raw_selected
+    )
+
+    # Restrict to snapshot models only
+    snapshot_names: set[str] = {
+        name
+        for name in wanted_all
+        if (getattr(REGISTRY.nodes[name], "meta", {}) or {}).get("materialized") == "snapshot"
+    }
+
+    if not snapshot_names:
+        typer.secho(
+            "Nothing to run (no snapshot models in selection).",
+            fg="yellow",
+        )
+        clear_context()
+        raise typer.Exit(0)
+
+    # Build DAG levels for the full wanted set so dependency validation still runs.
+    lvls_all = _levels_for_run([], wanted_all)
+    # Only execute snapshot nodes while preserving their relative order.
+    lvls = [lvl for lvl in ([n for n in level if n in snapshot_names] for level in lvls_all) if lvl]
+
+    result, logq, started_at, finished_at = _run_schedule(engine_, lvls, jobs, keep_going, ctx)
+
+    _write_artifacts(ctx, result, started_at, finished_at, engine_)
+    _attempt_catalog(ctx)
+    _emit_logs_and_errors(logq, result, engine_)
+
+    if result.failed:
+        clear_context()
+        raise typer.Exit(1)
+
+    # Optional retention
+    if prune:
+        executor = engine_.shared[0]
+        _prune_snapshots(executor, snapshot_names, keep_last, dry_run)
+
+    engine_.persist_on_success(result)
+    engine_.print_timings(result)
+    echo("✓ Snapshot run done")
+    clear_context()
+
+
+def register(app: typer.Typer) -> None:
+    app.add_typer(snapshot, name="snapshot")
diff --git a/src/fastflowtransform/cli/test_cmd.py b/src/fastflowtransform/cli/test_cmd.py
index 8701e9d..382d0ea 100644
--- a/src/fastflowtransform/cli/test_cmd.py
+++ b/src/fastflowtransform/cli/test_cmd.py
@@ -49,6 +49,15 @@ class DQResult:
     example_sql: str | None = None
 
 
+def _is_snapshot_model(node: Any) -> bool:
+    """
+    Return True if this node is a snapshot model (materialized='snapshot').
+    """
+    meta = getattr(node, "meta", {}) or {}
+    mat = str(meta.get("materialized") or "").lower()
+    return mat == "snapshot"
+
+
 def _print_model_error_block(node_name: str, relation: str, message: str, sql: str | None) -> None:
     header = "┌" + "─" * 70
     footer = "└" + "─" * 70
@@ -99,7 +108,11 @@ def _run_models(
     before: Callable[[str, Any], None] | None = None,
     on_error: Callable[[str, Any, Exception], None] | None = None,
 ) -> None:
-    order = [n for n in topo_sort(REGISTRY.nodes) if pred(REGISTRY.nodes[n])]
+    order = [
+        n
+        for n in topo_sort(REGISTRY.nodes)
+        if pred(REGISTRY.nodes[n]) and not _is_snapshot_model(REGISTRY.nodes[n])
+    ]
     _execute_models(order, run_sql, run_py, before=before, on_error=on_error)
 
 
diff --git a/src/fastflowtransform/config/models.py b/src/fastflowtransform/config/models.py
index 7c3989e..f71ab6c 100644
--- a/src/fastflowtransform/config/models.py
+++ b/src/fastflowtransform/config/models.py
@@ -1,3 +1,4 @@
+# fastflowtransform/config/model.py
 from __future__ import annotations
 
 from collections.abc import Mapping, Sequence
@@ -108,6 +109,40 @@ def _normalize_str_or_seq(cls, v: Any) -> list[str] | None:
         raise TypeError("must be a string or a sequence of strings")
 
 
+class SnapshotConfig(BaseModel):
+    """
+    Snapshot configuration block, for example:
+
+        {{ config(
+            materialized='snapshot',
+            snapshot={
+                "strategy": "timestamp",   # or "check"
+                "updated_at": "updated_at",
+                "check_cols": ["col1", "col2"],  # required for strategy='check'
+            },
+            unique_key=["id"],
+        ) }}
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    strategy: Literal["timestamp", "check"]
+    updated_at: str | None = None
+    updated_at_column: str | None = None
+    check_cols: list[str] | None = None
+
+    @field_validator("check_cols", mode="before")
+    @classmethod
+    def _normalize_check_cols(cls, v: Any) -> list[str] | None:
+        if v is None:
+            return None
+        if isinstance(v, str):
+            return [v]
+        if isinstance(v, Sequence) and not isinstance(v, (str, bytes)):
+            return [str(x) for x in v]
+        raise TypeError("check_cols must be a string or a sequence of strings")
+
+
 # ---------------------------------------------------------------------------
 # ModelConfig - canonical form of config(...) / decorator meta
 # ---------------------------------------------------------------------------
@@ -132,7 +167,7 @@ class ModelConfig(BaseModel):
 
     # --- Core materialization & classification -----------------------------
 
-    materialized: Literal["table", "view", "incremental", "ephemeral"] | None = None
+    materialized: Literal["table", "view", "incremental", "ephemeral", "snapshot"] | None = None
 
     # Optional logical kind; useful for selectors (kind:python / kind:sql / etc.)
     kind: str | None = None
@@ -157,6 +192,9 @@ class ModelConfig(BaseModel):
     #   - { ... IncrementalConfig fields ... }
     incremental: IncrementalConfig | None = None
 
+    # --- Snapshot configuration (structured) ---------------------------------
+    snapshot: SnapshotConfig | None = None
+
     # Top-level shortcuts (backwards-compatible)
     # These are used by existing executor logic.
     unique_key: list[str] | None = None
@@ -273,6 +311,14 @@ def _merge_incremental_overlays(self) -> ModelConfig:
         if self.delta and not self.delta_sql:
             self.delta_sql = self.delta.sql
 
+        # Mirror snapshot hints onto top-level shortcuts for backwards compatibility.
+        snap = self.snapshot
+        if snap:
+            if self.updated_at is None and snap.updated_at is not None:
+                self.updated_at = snap.updated_at
+            if self.updated_at_column is None and snap.updated_at_column is not None:
+                self.updated_at_column = snap.updated_at_column
+
         return self
 
     # ----------------------------------------------------------------------
@@ -291,19 +337,26 @@ def is_incremental_enabled(self) -> bool:
     # Cross-field guardrails (fail fast with clear messages)
     # ----------------------------------------------------------------------
     @model_validator(mode="after")
-    def _validate_incremental_requirements(self) -> ModelConfig:
+    def _validate_model_requirements(self) -> ModelConfig:
         """
-        Enforce combinations that must hold for incremental materializations.
+        Enforce combinations that must hold for incremental and snapshot models.
 
-        Rules:
+        Incremental rules:
           1) If materialized == 'incremental', incremental must be effectively enabled.
           2) If incremental is enabled, at least one freshness/delta hint must exist:
              - updated_at / updated_at_column / updated_at_columns / timestamp_columns
                OR delta_sql OR delta_python.
           3) If both updated_at and updated_at_column are provided, they must match.
-          4) (Opinionated) Require unique_key when incremental is enabled
-             to avoid accidental cartesian merges. Relax if your executor permits.
+          4) Require unique_key when incremental is enabled.
+
+        Snapshot rules:
+          1) If materialized == 'snapshot', a snapshot config must be provided.
+          2) Snapshot models require unique_key (or primary_key).
+          3) strategy must be 'timestamp' or 'check'.
+          4) For 'timestamp', require updated_at / updated_at_column.
+          5) For 'check', require check_cols.
         """
+        # --- Incremental ---------------------------------------------------
         is_mat_inc = self.materialized == "incremental"
         is_inc_enabled = self.is_incremental_enabled()
 
@@ -348,6 +401,45 @@ def _validate_incremental_requirements(self) -> ModelConfig:
                 "for safe merges. Example: unique_key: ['id']"
             )
 
+        # --- Snapshot-specific rules --------------------------------------
+        if self.materialized == "snapshot":
+            snap = self.snapshot
+            if snap is None:
+                raise ValueError(
+                    "materialized='snapshot' requires a snapshot config block. "
+                    "Example:\n"
+                    "  snapshot: { strategy: 'timestamp' }"
+                )
+
+            # business key
+            if not (self.unique_key or self.primary_key):
+                raise ValueError(
+                    "materialized='snapshot' requires a unique_key (or primary_key). "
+                    "Example: unique_key: ['id']"
+                )
+
+            # strategy is validated by SnapshotConfig (Literal), but we keep a guardrail here
+            if snap.strategy not in ("timestamp", "check"):
+                raise ValueError(
+                    "Snapshot models require strategy='timestamp' or 'check'. "
+                    "Example: snapshot: { strategy: 'timestamp' }"
+                )
+
+            # timestamp strategy: needs updated_at
+            snap_updated = snap.updated_at or snap.updated_at_column
+            if snap.strategy == "timestamp" and not snap_updated:
+                raise ValueError(
+                    "strategy='timestamp' snapshots require snapshot.updated_at or "
+                    "snapshot.updated_at_column."
+                )
+
+            # check strategy: needs check_cols
+            if snap.strategy == "check" and not snap.check_cols:
+                raise ValueError(
+                    "strategy='check' snapshots require snapshot.check_cols "
+                    "(string or list of column names)."
+                )
+
         return self
 
 
diff --git a/src/fastflowtransform/docs.py b/src/fastflowtransform/docs.py
index c8fdc16..d84b97a 100644
--- a/src/fastflowtransform/docs.py
+++ b/src/fastflowtransform/docs.py
@@ -160,6 +160,7 @@ def _materialization_legend() -> dict[str, dict[str, str]]:
         "view": {"label": "view", "class": "badge-view"},
         "ephemeral": {"label": "ephemeral", "class": "badge-ephemeral"},
         "incremental": {"label": "incremental", "class": "badge-incremental"},
+        "snapshot": {"label": "snapshot", "class": "badge-snapshot"},
     }
 
 
diff --git a/src/fastflowtransform/executors/_spark_imports.py b/src/fastflowtransform/executors/_spark_imports.py
new file mode 100644
index 0000000..e68bf16
--- /dev/null
+++ b/src/fastflowtransform/executors/_spark_imports.py
@@ -0,0 +1,57 @@
+# fastflowtransform/executors/_spark_imports.py
+from __future__ import annotations
+
+from types import ModuleType
+from typing import TYPE_CHECKING
+
+__all__ = ["get_spark_functions", "get_spark_window"]
+
+
+def _spark_missing_error(exc: Exception) -> RuntimeError:
+    return RuntimeError(
+        "pyspark is required for Spark/Databricks executors. "
+        "Install the extra: fastflowtransform[spark]."
+    )
+
+
+if TYPE_CHECKING:  # pragma: no cover - typing only
+    # We import these only for static typing.
+    from pyspark.sql import (
+        Window,
+    )
+
+    # `Window` itself is a class with static constructors (partitionBy, orderBy, ...),
+    # so using it directly as the return type is fine.
+    def get_spark_window() -> type[Window]:  # Window is a class
+        ...
+
+    # `functions` is a module; for typing purposes we just expose it as ModuleType.
+    def get_spark_functions() -> ModuleType: ...
+
+else:
+    # Runtime implementations - no need to annotate; type-checkers use the stubs above.
+    def get_spark_window():
+        """
+        Lazy import for pyspark.sql.Window.
+
+        Raises:
+            RuntimeError: if pyspark is not installed or import fails.
+        """
+        try:
+            from pyspark.sql import Window  # noqa PLC0415
+        except Exception as exc:  # pragma: no cover
+            raise _spark_missing_error(exc) from exc
+        return Window
+
+    def get_spark_functions():
+        """
+        Lazy import for pyspark.sql.functions as F.
+
+        Raises:
+            RuntimeError: if pyspark is not installed or import fails.
+        """
+        try:
+            from pyspark.sql import functions as F  # noqa PLC0415
+        except Exception as exc:  # pragma: no cover
+            raise _spark_missing_error(exc) from exc
+        return F
diff --git a/src/fastflowtransform/executors/base.py b/src/fastflowtransform/executors/base.py
index 7ef7672..a6a43e8 100644
--- a/src/fastflowtransform/executors/base.py
+++ b/src/fastflowtransform/executors/base.py
@@ -19,7 +19,7 @@
 from fastflowtransform.core import REGISTRY, Node, relation_for, resolve_source_entry
 from fastflowtransform.errors import ModelExecutionError
 from fastflowtransform.incremental import _normalize_unique_key
-from fastflowtransform.logging import echo_debug
+from fastflowtransform.logging import echo, echo_debug
 from fastflowtransform.validation import validate_required_columns
 
 
@@ -117,6 +117,13 @@ class BaseExecutor[TFrame](ABC):
       - (optional) _frame_name
     """
 
+    # Standard meta columns used by snapshot materialization.
+    SNAPSHOT_VALID_FROM_COL = "_ff_valid_from"
+    SNAPSHOT_VALID_TO_COL = "_ff_valid_to"
+    SNAPSHOT_IS_CURRENT_COL = "_ff_is_current"
+    SNAPSHOT_HASH_COL = "_ff_snapshot_hash"
+    SNAPSHOT_UPDATED_AT_COL = "_ff_updated_at"
+
     # ---------- SQL ----------
     def render_sql(
         self,
@@ -236,6 +243,18 @@ def run_sql(self, node: Node, env: Environment) -> None:
             # Delegates to incremental engine: render, schema sync, merge/insert, etc.
             return _ff_incremental.run_or_dispatch(self, node, env)
 
+        if self._meta_is_snapshot(meta):
+            # Snapshots are executed via the dedicated CLI: `fft snapshot run`.
+            raise ModelExecutionError(
+                node_name=node.name,
+                relation=relation_for(node.name),
+                message=(
+                    "Snapshot models cannot be executed via 'fft run'. "
+                    "Use 'fft snapshot run' instead."
+                ),
+                sql_snippet="",
+            )
+
         sql_rendered = self.render_sql(
             node,
             env,
@@ -289,6 +308,17 @@ def run_sql(self, node: Node, env: Environment) -> None:
                 sql_snippet=preview,
             ) from e
 
+    def run_snapshot_sql(self, node: Node, env: Environment) -> None:
+        """
+        Execute a SQL model materialized as 'snapshot'.
+
+        Default implementation: engines must override this or snapshots
+        will fail with a clear error.
+        """
+        raise NotImplementedError(
+            f"Snapshot materialization is not implemented for engine '{self.engine_name}'."
+        )
+
     # --- Helpers for materialization & ephemeral inlining (instance methods) ---
     def _first_select_body(self, sql: str) -> str:
         """
@@ -593,7 +623,22 @@ def _snapshot_http_ctx(self, node: Node) -> None:
             return
 
         with suppress(Exception):
-            (node.meta or {}).update({"_http_snapshot": snap})
+            if not isinstance(node.meta, dict) or not node.meta:
+                node.meta = {}
+            node.meta["_http_snapshot"] = snap
+
+        requests = int(snap.get("requests") or 0)
+        if requests <= 0:
+            return
+        cache_hits = int(snap.get("cache_hits") or 0)
+        bytes_read = int(snap.get("bytes") or 0)
+        offline = bool(snap.get("used_offline"))
+        echo(
+            f"HTTP stats for {node.name}: requests={requests} cache_hits={cache_hits} "
+            f"bytes={bytes_read} offline={offline}"
+        )
+        if offline:
+            echo(f"Node {node.name} served responses from offline cache")
 
     # -------- Python model view helpers (shared) --------
     def _py_view_backing_name(self, relation: str) -> str:
@@ -711,7 +756,7 @@ def _resolve_source(self, source_name: str, table_name: str) -> str:
         cfg.setdefault("options", {})
         return self._format_source_reference(cfg, source_name, table_name)
 
-    # ---------- Abstrakte Frame-Hooks ----------
+    # ---------- Abstract Frame-Hooks ----------
     @abstractmethod
     def _read_relation(self, relation: str, node: Node, deps: Iterable[str]) -> TFrame: ...
 
@@ -816,6 +861,36 @@ def _meta_is_incremental(meta: Mapping[str, Any] | None) -> bool:
         # Fallback: any non-empty incremental value is treated as "enabled".
         return bool(incremental_cfg)
 
+    # ── Snapshot API ──────────────────────────────────────────────────
+    def snapshot_prune(
+        self,
+        relation: str,
+        unique_key: list[str],
+        keep_last: int,
+        *,
+        dry_run: bool = False,
+    ) -> None:  # pragma: no cover - abstract
+        """
+        Prune old snapshot versions for the given relation.
+
+        Engines may implement this in a best-effort manner. Default: not supported.
+        """
+        raise NotImplementedError(
+            f"Snapshot pruning is not implemented for engine '{self.engine_name}'."
+        )
+
+    @staticmethod
+    def _meta_is_snapshot(meta: Mapping[str, Any] | None) -> bool:
+        """
+        Return True if the given meta mapping describes a snapshot model.
+
+        For now we define snapshots purely by materialized='snapshot'.
+        """
+        if not meta:
+            return False
+        materialized = str(meta.get("materialized") or "").lower()
+        return materialized == "snapshot"
+
     ENGINE_NAME: str = "generic"
 
     @property
diff --git a/src/fastflowtransform/executors/bigquery/base.py b/src/fastflowtransform/executors/bigquery/base.py
index bae74a9..394dab2 100644
--- a/src/fastflowtransform/executors/bigquery/base.py
+++ b/src/fastflowtransform/executors/bigquery/base.py
@@ -7,7 +7,9 @@
 from fastflowtransform.executors._shims import BigQueryConnShim
 from fastflowtransform.executors.base import BaseExecutor
 from fastflowtransform.executors.bigquery._bigquery_mixin import BigQueryIdentifierMixin
+from fastflowtransform.logging import echo
 from fastflowtransform.meta import ensure_meta_table, upsert_meta
+from fastflowtransform.snapshots import resolve_snapshot_config
 from fastflowtransform.typing import BadRequest, Client, NotFound, bigquery
 
 TFrame = TypeVar("TFrame")
@@ -275,3 +277,226 @@ def alter_table_sync_schema(
                 f"ALTER TABLE {target} ADD COLUMN {col} {typ}",
                 location=self.location,
             ).result()
+
+    # ── Snapshots API (shared for pandas + BigFrames) ─────────────────────
+    def run_snapshot_sql(self, node: Node, env: Any) -> None:
+        """
+        Snapshot materialization for BigQuery SQL models.
+
+        Uses the same semantics as the DuckDB/Postgres/Snowflake executors:
+          - First run: create table with snapshot metadata columns.
+          - Subsequent runs:
+              * close changed current rows (set valid_to, is_current=false)
+              * insert new current rows for new/changed keys.
+        """
+        if node.kind != "sql":
+            raise TypeError(
+                f"Snapshot materialization is only supported for SQL models, "
+                f"got kind={node.kind!r} for {node.name}."
+            )
+
+        meta = getattr(node, "meta", {}) or {}
+        if not self._meta_is_snapshot(meta):
+            raise ValueError(f"Node {node.name} is not configured with materialized='snapshot'.")
+
+        cfg = resolve_snapshot_config(node, meta)
+        strategy = cfg.strategy  # "timestamp" | "check"
+        unique_key = cfg.unique_key  # list[str]
+        updated_at = cfg.updated_at  # str | None
+        check_cols = cfg.check_cols  # list[str]
+
+        if not unique_key:
+            raise ValueError(f"{node.path}: snapshot models require a non-empty unique_key list.")
+
+        # ---- Render SQL and extract SELECT body ----
+        sql_rendered = self.render_sql(
+            node,
+            env,
+            ref_resolver=lambda name: self._resolve_ref(name, env),
+            source_resolver=self._resolve_source,
+        )
+        sql_clean = self._strip_leading_config(sql_rendered).strip()
+        body = self._selectable_body(sql_clean).rstrip(" ;\n\t")
+
+        rel_name = relation_for(node.name)
+        target = self._qualified_identifier(rel_name)
+
+        vf = BaseExecutor.SNAPSHOT_VALID_FROM_COL
+        vt = BaseExecutor.SNAPSHOT_VALID_TO_COL
+        is_cur = BaseExecutor.SNAPSHOT_IS_CURRENT_COL
+        hash_col = BaseExecutor.SNAPSHOT_HASH_COL
+        upd_meta = BaseExecutor.SNAPSHOT_UPDATED_AT_COL
+
+        self._ensure_dataset()
+
+        # ---- First run: create snapshot table ----
+        if not self.exists_relation(rel_name):
+            if strategy == "timestamp":
+                if not updated_at:
+                    raise ValueError(
+                        f"{node.path}: strategy='timestamp' snapshots require an updated_at column."
+                    )
+                create_sql = f"""
+CREATE TABLE {target} AS
+SELECT
+  s.*,
+  s.{updated_at} AS {upd_meta},
+  s.{updated_at} AS {vf},
+  CAST(NULL AS TIMESTAMP) AS {vt},
+  TRUE AS {is_cur},
+  CAST(NULL AS STRING) AS {hash_col}
+FROM ({body}) AS s
+"""
+            else:  # strategy == "check"
+                if not check_cols:
+                    raise ValueError(
+                        f"{node.path}: strategy='check' snapshots require non-empty check_cols."
+                    )
+                col_exprs = [f"COALESCE(CAST(s.{col} AS STRING), '')" for col in check_cols]
+                concat_expr = " || '||' || ".join(col_exprs)
+                hash_expr = f"TO_HEX(MD5({concat_expr}))"
+                upd_expr = f"s.{updated_at}" if updated_at else "CURRENT_TIMESTAMP()"
+                create_sql = f"""
+CREATE TABLE {target} AS
+SELECT
+  s.*,
+  {upd_expr} AS {upd_meta},
+  CURRENT_TIMESTAMP() AS {vf},
+  CAST(NULL AS TIMESTAMP) AS {vt},
+  TRUE AS {is_cur},
+  {hash_expr} AS {hash_col}
+FROM ({body}) AS s
+"""
+            self.client.query(create_sql, location=self.location).result()
+            return
+
+        # ---- Incremental snapshot update ----
+        keys_pred = " AND ".join([f"t.{k} = s.{k}" for k in unique_key])
+
+        if strategy == "timestamp":
+            if not updated_at:
+                raise ValueError(
+                    f"{node.path}: strategy='timestamp' snapshots require an updated_at column."
+                )
+            change_condition = f"s.{updated_at} > t.{upd_meta}"
+            new_upd_expr = f"s.{updated_at}"
+            new_valid_from_expr = f"s.{updated_at}"
+            new_hash_expr = "NULL"
+        else:
+            col_exprs_s = [f"COALESCE(CAST(s.{col} AS STRING), '')" for col in check_cols]
+            concat_expr_s = " || '||' || ".join(col_exprs_s)
+            hash_expr_s = f"TO_HEX(MD5({concat_expr_s}))"
+            change_condition = f"COALESCE({hash_expr_s}, '') <> COALESCE(t.{hash_col}, '')"
+            new_upd_expr = f"s.{updated_at}" if updated_at else "CURRENT_TIMESTAMP()"
+            new_valid_from_expr = "CURRENT_TIMESTAMP()"
+            new_hash_expr = hash_expr_s
+
+        # 1) Close changed current rows
+        close_sql = f"""
+UPDATE {target} AS t
+SET
+  {vt} = CURRENT_TIMESTAMP(),
+  {is_cur} = FALSE
+FROM ({body}) AS s
+WHERE
+  {keys_pred}
+  AND t.{is_cur} = TRUE
+  AND {change_condition}
+"""
+        self.client.query(close_sql, location=self.location).result()
+
+        # 2) Insert new current versions (new keys or changed rows)
+        first_key = unique_key[0]
+        insert_sql = f"""
+INSERT INTO {target}
+SELECT
+  s.*,
+  {new_upd_expr} AS {upd_meta},
+  {new_valid_from_expr} AS {vf},
+  CAST(NULL AS TIMESTAMP) AS {vt},
+  TRUE AS {is_cur},
+  {new_hash_expr} AS {hash_col}
+FROM ({body}) AS s
+LEFT JOIN {target} AS t
+  ON {keys_pred}
+  AND t.{is_cur} = TRUE
+WHERE
+  t.{first_key} IS NULL
+  OR {change_condition}
+"""
+        self.client.query(insert_sql, location=self.location).result()
+
+    def snapshot_prune(
+        self,
+        relation: str,
+        unique_key: list[str],
+        keep_last: int,
+        *,
+        dry_run: bool = False,
+    ) -> None:
+        """
+        Delete older snapshot versions while keeping the most recent `keep_last`
+        rows per business key (including the current row).
+        """
+        if keep_last <= 0:
+            return
+
+        keys = [k for k in unique_key if k]
+        if not keys:
+            return
+
+        target = self._qualified_identifier(
+            relation,
+            project=self.project,
+            dataset=self.dataset,
+        )
+        vf = BaseExecutor.SNAPSHOT_VALID_FROM_COL
+        key_select = ", ".join(keys)
+        part_by = ", ".join(keys)
+
+        ranked_sql = f"""
+SELECT
+  {key_select},
+  {vf},
+  ROW_NUMBER() OVER (
+    PARTITION BY {part_by}
+    ORDER BY {vf} DESC
+  ) AS rn
+FROM {target}
+"""
+
+        if dry_run:
+            sql = f"""
+WITH ranked AS (
+  {ranked_sql}
+)
+SELECT COUNT(*) AS rows_to_delete
+FROM ranked
+WHERE rn > {int(keep_last)}
+"""
+            job = self.client.query(sql, location=self.location)
+            rows = list(job.result())
+            count = int(rows[0][0]) if rows else 0
+
+            echo(
+                f"[DRY-RUN] snapshot_prune({relation}): would delete {count} row(s) "
+                f"(keep_last={keep_last})"
+            )
+            return
+
+        join_pred = " AND ".join([f"t.{k} = r.{k}" for k in keys])
+        delete_sql = f"""
+DELETE FROM {target} AS t
+WHERE EXISTS (
+  WITH ranked AS (
+    {ranked_sql}
+  )
+  SELECT 1
+  FROM ranked AS r
+  WHERE
+    r.rn > {int(keep_last)}
+    AND {join_pred}
+    AND t.{vf} = r.{vf}
+)
+"""
+        self.client.query(delete_sql, location=self.location).result()
diff --git a/src/fastflowtransform/executors/databricks_spark.py b/src/fastflowtransform/executors/databricks_spark.py
index 73a188d..0bede06 100644
--- a/src/fastflowtransform/executors/databricks_spark.py
+++ b/src/fastflowtransform/executors/databricks_spark.py
@@ -3,16 +3,24 @@
 
 from collections.abc import Callable, Iterable
 from contextlib import suppress
+from functools import reduce
 from pathlib import Path
 from typing import Any
 from urllib.parse import unquote, urlparse
 
+from jinja2 import Environment
+
 from fastflowtransform import storage
 from fastflowtransform.core import REGISTRY, Node, relation_for
 from fastflowtransform.errors import ModelExecutionError
+from fastflowtransform.executors._spark_imports import (
+    get_spark_functions,
+    get_spark_window,
+)
 from fastflowtransform.executors.base import BaseExecutor
-from fastflowtransform.logging import echo_debug
+from fastflowtransform.logging import echo, echo_debug
 from fastflowtransform.meta import ensure_meta_table, upsert_meta
+from fastflowtransform.snapshots import resolve_snapshot_config
 from fastflowtransform.table_formats import get_spark_format_handler
 from fastflowtransform.table_formats.base import SparkFormatHandler
 from fastflowtransform.typing import SDF, DataType, SparkSession
@@ -28,7 +36,7 @@
 
 _DELTA_EXTENSION = "io.delta.sql.DeltaSparkSessionExtension"
 _DELTA_CATALOG = "org.apache.spark.sql.delta.catalog.DeltaCatalog"
-_SPARK_DEFAULT_CATALOG = "org.apache.spark.sql.internal.CatalogImpl"  # Spark's built-in
+# _SPARK_DEFAULT_CATALOG = "org.apache.spark.sql.internal.CatalogImpl"  # Spark's built-in
 
 
 def _has_delta(spark: SparkSession) -> bool:
@@ -584,7 +592,7 @@ def on_node_built(self, node: Node, relation: str, fingerprint: str) -> None:
         ensure_meta_table(self)
         upsert_meta(self, node.name, relation, fingerprint, "databricks_spark")
 
-    # ── Incremental API (parity) ─────────────────────────────────────────
+    # ── Incremental API ─────────────────────────────────────────
     def exists_relation(self, relation: str) -> bool:
         """Check whether a table/view exists (optionally qualified with database)."""
         return self._format_handler.relation_exists(relation, database=self.database)
@@ -712,6 +720,291 @@ def _spark_sql_type(dt: DataType) -> str:
         table_sql = self._sql_identifier(relation)
         self.spark.sql(f"ALTER TABLE {table_sql} ADD COLUMNS ({cols_sql})")
 
+    # ── Snapshot API ─────────────────────────────────────────────────────
+
+    def run_snapshot_sql(self, node: Node, env: Environment) -> None:
+        """
+        Snapshot materialization for Spark/Databricks.
+        """
+        F = get_spark_functions()
+
+        meta = self._validate_snapshot_node(node)
+        cfg = resolve_snapshot_config(node, meta)
+
+        strategy = cfg.strategy
+        unique_key = cfg.unique_key
+        updated_at = cfg.updated_at
+        check_cols = cfg.check_cols
+
+        body, rel_name, physical = self._snapshot_sql_body(node, env)
+
+        vf = BaseExecutor.SNAPSHOT_VALID_FROM_COL
+        vt = BaseExecutor.SNAPSHOT_VALID_TO_COL
+        is_cur = BaseExecutor.SNAPSHOT_IS_CURRENT_COL
+        hash_col = BaseExecutor.SNAPSHOT_HASH_COL
+        upd_meta = BaseExecutor.SNAPSHOT_UPDATED_AT_COL
+
+        if not self.exists_relation(rel_name):
+            self._snapshot_first_run(
+                node=node,
+                rel_name=rel_name,
+                body=body,
+                strategy=strategy,
+                updated_at=updated_at,
+                check_cols=check_cols,
+                F=F,
+                vf=vf,
+                vt=vt,
+                is_cur=is_cur,
+                hash_col=hash_col,
+                upd_meta=upd_meta,
+            )
+            return
+
+        self._snapshot_incremental_run(
+            node=node,
+            body=body,
+            rel_name=rel_name,
+            physical=physical,
+            strategy=strategy,
+            unique_key=unique_key,
+            updated_at=updated_at,
+            check_cols=check_cols,
+            F=F,
+            vf=vf,
+            vt=vt,
+            is_cur=is_cur,
+            hash_col=hash_col,
+            upd_meta=upd_meta,
+        )
+
+    def _validate_snapshot_node(self, node: Node) -> dict[str, Any]:
+        if node.kind != "sql":
+            raise TypeError(
+                f"Snapshot materialization is only supported for SQL models, "
+                f"got kind={node.kind!r} for {node.name}."
+            )
+
+        meta = getattr(node, "meta", {}) or {}
+        if not self._meta_is_snapshot(meta):
+            raise ValueError(f"Node {node.name} is not configured with materialized='snapshot'.")
+        return meta
+
+    def _snapshot_sql_body(
+        self,
+        node: Node,
+        env: Environment,
+    ) -> tuple[str, str, str]:
+        sql_rendered = self.render_sql(
+            node,
+            env,
+            ref_resolver=lambda name: self._resolve_ref(name, env),
+            source_resolver=self._resolve_source,
+        )
+        sql_clean = self._strip_leading_config(sql_rendered).strip()
+        body = self._selectable_body(sql_clean).rstrip(" ;\n\t")
+
+        rel_name = relation_for(node.name)
+        physical = self._physical_identifier(rel_name)
+        return body, rel_name, physical
+
+    def _snapshot_first_run(
+        self,
+        *,
+        node: Node,
+        rel_name: str,
+        body: str,
+        strategy: str,
+        updated_at: str | None,
+        check_cols: list[str],
+        F: Any,
+        vf: str,
+        vt: str,
+        is_cur: str,
+        hash_col: str,
+        upd_meta: str,
+    ) -> None:
+        src_df = self.spark.sql(body)
+
+        echo_debug(f"[snapshot] first run for {rel_name} (strategy={strategy})")
+
+        if strategy == "timestamp":
+            assert updated_at is not None, (
+                "timestamp snapshots require a non-null updated_at column"
+            )
+            df_snap = (
+                src_df.withColumn(upd_meta, F.col(updated_at))
+                .withColumn(vf, F.col(updated_at))
+                .withColumn(vt, F.lit(None).cast("timestamp"))
+                .withColumn(is_cur, F.lit(True))
+                .withColumn(hash_col, F.lit(None).cast("string"))
+            )
+        else:
+            cols_expr = [F.coalesce(F.col(c).cast("string"), F.lit("")) for c in check_cols]
+            concat_expr = F.concat_ws("||", *cols_expr)
+            hash_expr = F.md5(concat_expr).cast("string")
+            upd_expr = F.col(updated_at) if updated_at else F.current_timestamp()
+
+            df_snap = (
+                src_df.withColumn(upd_meta, upd_expr)
+                .withColumn(vf, F.current_timestamp())
+                .withColumn(vt, F.lit(None).cast("timestamp"))
+                .withColumn(is_cur, F.lit(True))
+                .withColumn(hash_col, hash_expr)
+            )
+
+        storage_meta = self._storage_meta(node, rel_name)
+        self._save_df_as_table(rel_name, df_snap, storage=storage_meta)
+
+    def _snapshot_incremental_run(
+        self,
+        *,
+        node: Node,
+        body: str,
+        rel_name: str,
+        physical: str,
+        strategy: str,
+        unique_key: list[str],
+        updated_at: str | None,
+        check_cols: list[str],
+        F: Any,
+        vf: str,
+        vt: str,
+        is_cur: str,
+        hash_col: str,
+        upd_meta: str,
+    ) -> None:
+        echo_debug(f"[snapshot] incremental run for {rel_name} (strategy={strategy})")
+
+        existing = self.spark.table(physical)
+        src_df = self.spark.sql(body)
+
+        missing_keys_src = [k for k in unique_key if k not in src_df.columns]
+        missing_keys_snap = [k for k in unique_key if k not in existing.columns]
+        if missing_keys_src or missing_keys_snap:
+            raise ValueError(
+                f"{node.path}: snapshot unique_key columns must exist on both source and "
+                f"snapshot table. Missing on source={missing_keys_src}, "
+                f"on snapshot={missing_keys_snap}."
+            )
+
+        if strategy == "check":
+            cols_expr = [F.coalesce(F.col(c).cast("string"), F.lit("")) for c in check_cols]
+            concat_expr = F.concat_ws("||", *cols_expr)
+            src_df = src_df.withColumn("__ff_new_hash", F.md5(concat_expr).cast("string"))
+
+        current_df = existing.filter(F.col(is_cur) == True)  # noqa: E712
+
+        s_alias = src_df.alias("s")
+        t_alias = current_df.alias("t")
+        joined = s_alias.join(t_alias, on=unique_key, how="left")
+
+        if strategy == "timestamp":
+            assert updated_at is not None, (
+                "timestamp snapshots require a non-null updated_at column"
+            )
+            s_upd = F.col(f"s.{updated_at}")
+            t_upd = F.col(f"t.{upd_meta}")
+            cond_new = t_upd.isNull()
+            cond_changed = t_upd.isNotNull() & (s_upd > t_upd)
+            changed_or_new = cond_new | cond_changed
+        else:
+            s_hash = F.col("s.__ff_new_hash")
+            t_hash = F.col(f"t.{hash_col}")
+            cond_new = t_hash.isNull()
+            cond_changed = t_hash.isNotNull() & (s_hash != F.coalesce(t_hash, F.lit("")))
+            changed_or_new = cond_new | cond_changed
+
+        changed_keys = (
+            joined.filter(changed_or_new)
+            .select(*[F.col(f"s.{k}").alias(k) for k in unique_key])
+            .dropDuplicates()
+        )
+
+        prev_noncurrent = existing.filter(F.col(is_cur) == False)  # noqa: E712
+        preserved_current = current_df.join(changed_keys, on=unique_key, how="left_anti")
+
+        closed_prev = (
+            current_df.join(changed_keys, on=unique_key, how="inner")
+            .withColumn(vt, F.current_timestamp())
+            .withColumn(is_cur, F.lit(False))
+        )
+
+        new_src = src_df.join(changed_keys, on=unique_key, how="inner")
+        if strategy == "timestamp":
+            assert updated_at is not None, (
+                "timestamp snapshots require a non-null updated_at column"
+            )
+            new_versions = (
+                new_src.withColumn(upd_meta, F.col(updated_at))
+                .withColumn(vf, F.col(updated_at))
+                .withColumn(vt, F.lit(None).cast("timestamp"))
+                .withColumn(is_cur, F.lit(True))
+                .withColumn(hash_col, F.lit(None).cast("string"))
+            )
+        else:
+            upd_expr = F.col(updated_at) if updated_at else F.current_timestamp()
+            new_versions = (
+                new_src.withColumn(upd_meta, upd_expr)
+                .withColumn(vf, F.current_timestamp())
+                .withColumn(vt, F.lit(None).cast("timestamp"))
+                .withColumn(is_cur, F.lit(True))
+                .withColumn(hash_col, F.col("__ff_new_hash"))
+            )
+
+        parts = [prev_noncurrent, preserved_current, closed_prev, new_versions]
+        snapshot_df = reduce(lambda a, b: a.unionByName(b, allowMissingColumns=True), parts)
+        if "__ff_new_hash" in snapshot_df.columns:
+            snapshot_df = snapshot_df.drop("__ff_new_hash")
+
+        storage_meta = self._storage_meta(node, rel_name)
+        self._save_df_as_table(rel_name, snapshot_df, storage=storage_meta)
+
+    def snapshot_prune(
+        self,
+        relation: str,
+        unique_key: list[str],
+        keep_last: int,
+        *,
+        dry_run: bool = False,
+    ) -> None:
+        """
+        Delete older snapshot versions while keeping the most recent `keep_last`
+        rows per business key (including the current row), implemented as a
+        DataFrame overwrite (no in-place DELETE).
+        """
+        if keep_last <= 0:
+            return
+
+        Window = get_spark_window()
+        F = get_spark_functions()
+
+        if not unique_key:
+            return
+
+        vf = BaseExecutor.SNAPSHOT_VALID_FROM_COL
+
+        try:
+            physical = self._physical_identifier(relation)
+            df = self.spark.table(physical)
+        except Exception:
+            return
+
+        w = Window.partitionBy(*[F.col(k) for k in unique_key]).orderBy(F.col(vf).desc())
+        ranked = df.withColumn("__ff_rn", F.row_number().over(w))
+
+        if dry_run:
+            cnt = ranked.filter(F.col("__ff_rn") > int(keep_last)).count()
+
+            echo(
+                f"[DRY-RUN] snapshot_prune({relation}): would delete {cnt} row(s) "
+                f"(keep_last={keep_last})"
+            )
+            return
+
+        pruned = ranked.filter(F.col("__ff_rn") <= int(keep_last)).drop("__ff_rn")
+        self._save_df_as_table(relation, pruned)
+
 
 # ────────────────────────── local helpers / shim ──────────────────────────
 class _SparkResult:
diff --git a/src/fastflowtransform/executors/duckdb.py b/src/fastflowtransform/executors/duckdb.py
index 536ccff..44121c0 100644
--- a/src/fastflowtransform/executors/duckdb.py
+++ b/src/fastflowtransform/executors/duckdb.py
@@ -9,10 +9,13 @@
 import duckdb
 import pandas as pd
 from duckdb import CatalogException
+from jinja2 import Environment
 
 from fastflowtransform.core import Node, relation_for
 from fastflowtransform.executors.base import BaseExecutor
+from fastflowtransform.logging import echo
 from fastflowtransform.meta import ensure_meta_table, upsert_meta
+from fastflowtransform.snapshots import resolve_snapshot_config
 
 
 def _q(ident: str) -> str:
@@ -284,3 +287,220 @@ def alter_table_sync_schema(
                 self.con.execute(f"alter table {target} add column {col} varchar")
             except Exception:
                 self.con.execute(f"alter table {target} add column {col} varchar")
+
+    def run_snapshot_sql(self, node: Node, env: Environment) -> None:
+        """
+        Snapshot materialization for DuckDB.
+
+        Config (node.meta):
+          - materialized='snapshot'
+          - snapshot: { ... }  # strategy + per-strategy hints
+          - unique_key: str | list[str]
+
+        Behaviour:
+          - First run: create table with one current row per unique key.
+          - Subsequent runs:
+              * close changed current rows (set valid_to, is_current=false)
+              * insert new current rows for new/changed keys.
+        """
+        if node.kind != "sql":
+            raise TypeError(
+                f"Snapshot materialization is only supported for SQL models, "
+                f"got kind={node.kind!r} for {node.name}."
+            )
+
+        meta = getattr(node, "meta", {}) or {}
+        if not self._meta_is_snapshot(meta):
+            raise ValueError(f"Node {node.name} is not configured with materialized='snapshot'.")
+
+        # ---- Extract & normalise snapshot config (shared helper) ----
+        cfg = resolve_snapshot_config(node, meta)
+        strategy = cfg.strategy
+        unique_key = cfg.unique_key
+        updated_at = cfg.updated_at
+        check_cols = cfg.check_cols
+
+        # ---- Render SQL and extract SELECT body ----
+        sql_rendered = self.render_sql(
+            node,
+            env,
+            ref_resolver=lambda name: self._resolve_ref(name, env),
+            source_resolver=self._resolve_source,
+        )
+        sql = self._strip_leading_config(sql_rendered).strip()
+        body = self._selectable_body(sql).rstrip(" ;\n\t")
+
+        rel_name = relation_for(node.name)
+        target = self._qualified(rel_name)
+
+        vf = BaseExecutor.SNAPSHOT_VALID_FROM_COL
+        vt = BaseExecutor.SNAPSHOT_VALID_TO_COL
+        is_cur = BaseExecutor.SNAPSHOT_IS_CURRENT_COL
+        hash_col = BaseExecutor.SNAPSHOT_HASH_COL
+        upd_meta = BaseExecutor.SNAPSHOT_UPDATED_AT_COL
+
+        # ---- First run: create snapshot table ----
+        if not self.exists_relation(rel_name):
+            if strategy == "timestamp":
+                # valid_from + updated_at come from the source updated_at column
+                create_sql = f"""
+create table {target} as
+select
+    s.*,
+    s.{updated_at} as {upd_meta},
+    s.{updated_at} as {vf},
+    cast(null as timestamp) as {vt},
+    true as {is_cur},
+    cast(null as varchar) as {hash_col}
+from ({body}) as s
+"""
+            else:  # strategy == "check"
+                # Hash over check_cols to detect changes
+                col_exprs = [f"coalesce(cast(s.{col} as varchar), '')" for col in check_cols]
+                concat_expr = " || '||' || ".join(col_exprs)
+                hash_expr = f"cast(md5({concat_expr}) as varchar)"
+                upd_expr = f"s.{updated_at}" if updated_at else "current_timestamp"
+                create_sql = f"""
+create table {target} as
+select
+    s.*,
+    {upd_expr} as {upd_meta},
+    current_timestamp as {vf},
+    cast(null as timestamp) as {vt},
+    true as {is_cur},
+    {hash_expr} as {hash_col}
+from ({body}) as s
+"""
+            self.con.execute(create_sql)
+            return
+
+        # ---- Incremental snapshot update ----
+
+        # Stage current source rows in a temp view for reuse
+        src_view_name = f"__ff_snapshot_src_{rel_name}".replace(".", "_")
+        src_quoted = _q(src_view_name)
+        self.con.execute(f"create or replace temp view {src_quoted} as {body}")
+
+        try:
+            # Join predicate on unique keys
+            keys_pred = " AND ".join([f"t.{k} = s.{k}" for k in unique_key])
+
+            # Change condition & hash for staging rows
+            if strategy == "timestamp":
+                change_condition = f"s.{updated_at} > t.{upd_meta}"
+                hash_expr_s = "NULL"
+                new_upd_expr = f"s.{updated_at}"
+                new_valid_from_expr = f"s.{updated_at}"
+                new_hash_expr = "NULL"
+            else:
+                col_exprs_s = [f"coalesce(cast(s.{col} as varchar), '')" for col in check_cols]
+                concat_expr_s = " || '||' || ".join(col_exprs_s)
+                hash_expr_s = f"cast(md5({concat_expr_s}) as varchar)"
+                change_condition = f"coalesce({hash_expr_s}, '') <> coalesce(t.{hash_col}, '')"
+                new_upd_expr = f"s.{updated_at}" if updated_at else "current_timestamp"
+                new_valid_from_expr = "current_timestamp"
+                new_hash_expr = hash_expr_s
+
+            # 1) Close changed current rows
+            close_sql = f"""
+update {target} as t
+set
+    {vt} = current_timestamp,
+    {is_cur} = false
+from {src_quoted} as s
+where
+    {keys_pred}
+    and t.{is_cur} = true
+    and {change_condition};
+"""
+            self.con.execute(close_sql)
+
+            # 2) Insert new current versions (new keys or changed rows)
+            first_key = unique_key[0]
+            insert_sql = f"""
+insert into {target}
+select
+    s.*,
+    {new_upd_expr} as {upd_meta},
+    {new_valid_from_expr} as {vf},
+    cast(null as timestamp) as {vt},
+    true as {is_cur},
+    {new_hash_expr} as {hash_col}
+from {src_quoted} as s
+left join {target} as t
+  on {keys_pred}
+  and t.{is_cur} = true
+where
+    t.{first_key} is null
+    or {change_condition};
+"""
+            self.con.execute(insert_sql)
+        finally:
+            with suppress(Exception):
+                self.con.execute(f"drop view if exists {src_quoted}")
+
+    def snapshot_prune(
+        self,
+        relation: str,
+        unique_key: list[str],
+        keep_last: int,
+        *,
+        dry_run: bool = False,
+    ) -> None:
+        """
+        Delete older snapshot versions while keeping the most recent `keep_last`
+        rows per business key (including the current row).
+        """
+        if keep_last <= 0:
+            return
+
+        target = self._qualified(relation)
+        vf = BaseExecutor.SNAPSHOT_VALID_FROM_COL
+        keys = [k for k in unique_key if k]
+
+        if not keys:
+            return
+
+        part_by = ", ".join([k for k in keys])
+        key_select = ", ".join(keys)
+
+        ranked_sql = f"""
+select
+  {key_select},
+  {vf},
+  row_number() over (
+    partition by {part_by}
+    order by {vf} desc
+  ) as rn
+from {target}
+"""
+
+        if dry_run:
+            sql = f"""
+with ranked as (
+  {ranked_sql}
+)
+select count(*) as rows_to_delete
+from ranked
+where rn > {int(keep_last)}
+"""
+            res = self.con.execute(sql).fetchone()
+            rows = int(res[0]) if res else 0
+
+            echo(
+                f"[DRY-RUN] snapshot_prune({relation}): would delete {rows} row(s) "
+                f"(keep_last={keep_last})"
+            )
+            return
+
+        delete_sql = f"""
+delete from {target} t
+using (
+  {ranked_sql}
+) r
+where
+  r.rn > {int(keep_last)}
+  and {" AND ".join([f"t.{k} = r.{k}" for k in keys])}
+  and t.{vf} = r.{vf};
+"""
+        self.con.execute(delete_sql)
diff --git a/src/fastflowtransform/executors/postgres.py b/src/fastflowtransform/executors/postgres.py
index 1ff0243..de57fda 100644
--- a/src/fastflowtransform/executors/postgres.py
+++ b/src/fastflowtransform/executors/postgres.py
@@ -3,6 +3,7 @@
 from typing import Any
 
 import pandas as pd
+from jinja2 import Environment
 from sqlalchemy import create_engine, text
 from sqlalchemy.engine import Connection, Engine
 from sqlalchemy.exc import ProgrammingError, SQLAlchemyError
@@ -11,7 +12,9 @@
 from fastflowtransform.errors import ModelExecutionError, ProfileConfigError
 from fastflowtransform.executors._shims import SAConnShim
 from fastflowtransform.executors.base import BaseExecutor
+from fastflowtransform.logging import echo
 from fastflowtransform.meta import ensure_meta_table, upsert_meta
+from fastflowtransform.snapshots import resolve_snapshot_config
 
 
 class PostgresExecutor(BaseExecutor[pd.DataFrame]):
@@ -257,3 +260,234 @@ def alter_table_sync_schema(
             add = [c for c in cols if c not in existing]
             for c in add:
                 con.execute(text(f'alter table {qrel} add column "{c}" text'))
+
+    # ── Snapshot API ──────────────────────────────────────────────────────
+
+    def run_snapshot_sql(self, node: Node, env: Environment) -> None:
+        """
+        Snapshot materialization for Postgres.
+
+        Config:
+          - materialized='snapshot'
+          - snapshot={...} and/or top-level strategy/updated_at/check_cols
+          - unique_key / primary_key
+
+        Behaviour:
+          - First run: create table with one current row per unique key.
+          - Subsequent runs:
+              * close changed current rows (set valid_to, is_current=false)
+              * insert new current rows for new/changed keys.
+        """
+        if node.kind != "sql":
+            raise TypeError(
+                f"Snapshot materialization is only supported for SQL models, "
+                f"got kind={node.kind!r} for {node.name}."
+            )
+
+        meta = getattr(node, "meta", {}) or {}
+        if not self._meta_is_snapshot(meta):
+            raise ValueError(f"Node {node.name} is not configured with materialized='snapshot'.")
+
+        # Shared normalisation: supports nested 'snapshot={...}' OR flattened config.
+        cfg = resolve_snapshot_config(node, meta)
+        strategy = cfg.strategy
+        unique_key = cfg.unique_key
+        updated_at = cfg.updated_at
+        check_cols = cfg.check_cols
+
+        # ---- Render SQL and extract SELECT body ----
+        sql_rendered = self.render_sql(
+            node,
+            env,
+            ref_resolver=lambda name: self._resolve_ref(name, env),
+            source_resolver=self._resolve_source,
+        )
+        sql = self._strip_leading_config(sql_rendered).strip()
+        body = self._selectable_body(sql).rstrip(" ;\n\t")
+
+        rel_name = relation_for(node.name)
+        target = self._qualified(rel_name)
+
+        vf = BaseExecutor.SNAPSHOT_VALID_FROM_COL
+        vt = BaseExecutor.SNAPSHOT_VALID_TO_COL
+        is_cur = BaseExecutor.SNAPSHOT_IS_CURRENT_COL
+        hash_col = BaseExecutor.SNAPSHOT_HASH_COL
+        upd_meta = BaseExecutor.SNAPSHOT_UPDATED_AT_COL
+
+        # ---- First run: create snapshot table ----
+        if not self.exists_relation(rel_name):
+            if strategy == "timestamp":
+                # valid_from + updated_at come from the source updated_at column
+                create_sql = f"""
+create table {target} as
+select
+    s.*,
+    s.{updated_at} as {upd_meta},
+    s.{updated_at} as {vf},
+    cast(null as timestamp) as {vt},
+    true as {is_cur},
+    cast(null as text) as {hash_col}
+from ({body}) as s
+"""
+            else:  # strategy == "check"
+                # Hash over check_cols to detect changes
+                col_exprs = [f"coalesce(cast(s.{col} as text), '')" for col in check_cols]
+                concat_expr = " || '||' || ".join(col_exprs)
+                hash_expr = f"md5({concat_expr})"
+                upd_expr = f"s.{updated_at}" if updated_at else "current_timestamp"
+                create_sql = f"""
+create table {target} as
+select
+    s.*,
+    {upd_expr} as {upd_meta},
+    current_timestamp as {vf},
+    cast(null as timestamp) as {vt},
+    true as {is_cur},
+    {hash_expr} as {hash_col}
+from ({body}) as s
+"""
+            with self.engine.begin() as conn:
+                self._set_search_path(conn)
+                conn.execute(text(create_sql))
+            return
+
+        # ---- Incremental snapshot update ----
+
+        # Stage current source rows in a temporary table for reuse
+        src_name = f"__ff_snapshot_src_{rel_name}".replace(".", "_")
+        src_q = self._q_ident(src_name)
+
+        with self.engine.begin() as conn:
+            self._set_search_path(conn)
+
+            # (Re-)create temp staging table
+            conn.execute(text(f"drop table if exists {src_q}"))
+            conn.execute(text(f"create temporary table {src_q} as {body}"))
+
+            # Join predicate on unique keys
+            keys_pred = " AND ".join([f"t.{k} = s.{k}" for k in unique_key])
+
+            # Change condition & hash for staging rows
+            if strategy == "timestamp":
+                change_condition = f"s.{updated_at} > t.{upd_meta}"
+                hash_expr_s = "NULL"
+                new_upd_expr = f"s.{updated_at}"
+                new_valid_from_expr = f"s.{updated_at}"
+                new_hash_expr = "NULL"
+            else:
+                col_exprs_s = [f"coalesce(cast(s.{col} as text), '')" for col in check_cols]
+                concat_expr_s = " || '||' || ".join(col_exprs_s)
+                hash_expr_s = f"md5({concat_expr_s})"
+                change_condition = (
+                    f"coalesce({hash_expr_s}, '') <> coalesce(t.{hash_col}::text, '')"
+                )
+                new_upd_expr = f"s.{updated_at}" if updated_at else "current_timestamp"
+                new_valid_from_expr = "current_timestamp"
+                new_hash_expr = hash_expr_s
+
+            # 1) Close changed current rows
+            close_sql = f"""
+update {target} as t
+set
+    {vt} = current_timestamp,
+    {is_cur} = false
+from {src_q} as s
+where
+    {keys_pred}
+    and t.{is_cur} = true
+    and {change_condition};
+"""
+            conn.execute(text(close_sql))
+
+            # 2) Insert new current versions (new keys or changed rows)
+            first_key = unique_key[0]
+            insert_sql = f"""
+insert into {target}
+select
+    s.*,
+    {new_upd_expr} as {upd_meta},
+    {new_valid_from_expr} as {vf},
+    cast(null as timestamp) as {vt},
+    true as {is_cur},
+    {new_hash_expr} as {hash_col}
+from {src_q} as s
+left join {target} as t
+  on {keys_pred}
+  and t.{is_cur} = true
+where
+    t.{first_key} is null
+    or {change_condition};
+"""
+            conn.execute(text(insert_sql))
+
+            # Temp table will be dropped automatically at end of session; dropping
+            # explicitly here is harmless and keeps the connection clean for tests.
+            conn.execute(text(f"drop table if exists {src_q}"))
+
+    def snapshot_prune(
+        self,
+        relation: str,
+        unique_key: list[str],
+        keep_last: int,
+        *,
+        dry_run: bool = False,
+    ) -> None:
+        """
+        Delete older snapshot versions while keeping the most recent `keep_last`
+        rows per business key (including the current row).
+        """
+        if keep_last <= 0:
+            return
+
+        vf = BaseExecutor.SNAPSHOT_VALID_FROM_COL
+        keys = [k for k in unique_key if k]
+        if not keys:
+            return
+
+        target = self._qualified(relation)
+        part_by = ", ".join(keys)
+        key_select = ", ".join(keys)
+
+        ranked_sql = f"""
+select
+  {key_select},
+  {vf},
+  row_number() over (
+    partition by {part_by}
+    order by {vf} desc
+  ) as rn
+from {target}
+"""
+
+        if dry_run:
+            sql = f"""
+with ranked as (
+  {ranked_sql}
+)
+select count(*) as rows_to_delete
+from ranked
+where rn > {int(keep_last)}
+"""
+            with self.engine.begin() as conn:
+                self._set_search_path(conn)
+                res = conn.execute(text(sql)).fetchone()
+                rows = int(res[0]) if res else 0
+            echo(
+                f"[DRY-RUN] snapshot_prune({relation}): would delete {rows} row(s) "
+                f"(keep_last={keep_last})"
+            )
+            return
+
+        delete_sql = f"""
+delete from {target} t
+using (
+  {ranked_sql}
+) r
+where
+  r.rn > {int(keep_last)}
+  and {" AND ".join([f"t.{k} = r.{k}" for k in keys])}
+  and t.{vf} = r.{vf};
+"""
+        with self.engine.begin() as conn:
+            self._set_search_path(conn)
+            conn.execute(text(delete_sql))
diff --git a/src/fastflowtransform/executors/snowflake_snowpark.py b/src/fastflowtransform/executors/snowflake_snowpark.py
index f209b8a..3195a85 100644
--- a/src/fastflowtransform/executors/snowflake_snowpark.py
+++ b/src/fastflowtransform/executors/snowflake_snowpark.py
@@ -3,11 +3,15 @@
 
 from collections.abc import Iterable
 from contextlib import suppress
-from typing import Any
+from typing import Any, cast
+
+from jinja2 import Environment
 
 from fastflowtransform.core import Node, relation_for
 from fastflowtransform.executors.base import BaseExecutor
+from fastflowtransform.logging import echo
 from fastflowtransform.meta import ensure_meta_table, upsert_meta
+from fastflowtransform.snapshots import resolve_snapshot_config
 from fastflowtransform.typing import SNDF, SnowparkSession as Session
 
 
@@ -291,6 +295,222 @@ def alter_table_sync_schema(
         cols_sql = ", ".join(f"{self._q(c)} STRING" for c in to_add)
         self.session.sql(f"ALTER TABLE {qrel} ADD COLUMN {cols_sql}").collect()
 
+    # ── Snapshot API ─────────────────────────────────────────────────────
+    def run_snapshot_sql(self, node: Node, env: Environment) -> None:
+        """
+        Snapshot materialization for Snowflake Snowpark.
+
+        Uses the shared snapshot config resolver so all engines share the
+        same semantics and validation.
+        """
+        if node.kind != "sql":
+            raise TypeError(
+                f"Snapshot materialization is only supported for SQL models, "
+                f"got kind={node.kind!r} for {node.name}."
+            )
+
+        meta = getattr(node, "meta", {}) or {}
+        if not self._meta_is_snapshot(meta):
+            raise ValueError(f"Node {node.name} is not configured with materialized='snapshot'.")
+
+        cfg = resolve_snapshot_config(node, meta)
+
+        # Render model SQL and extract the SELECT body
+        rendered = self.render_sql(
+            node,
+            env,
+            ref_resolver=lambda name: self._resolve_ref(name, env),
+            source_resolver=self._resolve_source,
+        )
+        sql = self._strip_leading_config(rendered).strip()
+        body = self._selectable_body(sql).rstrip(";\n\t ")
+
+        rel_name = relation_for(node.name)
+        target = self._qualified(rel_name)
+
+        vf = BaseExecutor.SNAPSHOT_VALID_FROM_COL
+        vt = BaseExecutor.SNAPSHOT_VALID_TO_COL
+        is_cur = BaseExecutor.SNAPSHOT_IS_CURRENT_COL
+        hash_col = BaseExecutor.SNAPSHOT_HASH_COL
+        upd_meta = BaseExecutor.SNAPSHOT_UPDATED_AT_COL
+
+        # ---- First run: create snapshot table ----
+        if not self.exists_relation(rel_name):
+            if cfg.strategy == "timestamp":
+                # cfg.updated_at is guaranteed non-None by resolve_snapshot_config
+                if cfg.updated_at is None:  # defensive, for type-checkers
+                    raise ValueError(
+                        "strategy='timestamp' snapshot requires a non-null updated_at column."
+                    )
+                create_sql = f"""
+CREATE OR REPLACE TABLE {target} AS
+SELECT
+  s.*,
+  s.{cfg.updated_at} AS {upd_meta},
+  s.{cfg.updated_at} AS {vf},
+  CAST(NULL AS TIMESTAMP) AS {vt},
+  TRUE AS {is_cur},
+  CAST(NULL AS VARCHAR) AS {hash_col}
+FROM ({body}) AS s
+"""
+            else:  # strategy == "check"
+                # hash over check_cols to detect changes
+                col_exprs = [f"COALESCE(CAST(s.{col} AS VARCHAR), '')" for col in cfg.check_cols]
+                concat_expr = " || '||' || ".join(col_exprs) or "''"
+                hash_expr = f"CAST(MD5({concat_expr}) AS VARCHAR)"
+                upd_expr = (
+                    f"s.{cfg.updated_at}" if cfg.updated_at is not None else "CURRENT_TIMESTAMP()"
+                )
+                create_sql = f"""
+CREATE OR REPLACE TABLE {target} AS
+SELECT
+  s.*,
+  {upd_expr} AS {upd_meta},
+  CURRENT_TIMESTAMP() AS {vf},
+  CAST(NULL AS TIMESTAMP) AS {vt},
+  TRUE AS {is_cur},
+  {hash_expr} AS {hash_col}
+FROM ({body}) AS s
+"""
+            self.session.sql(create_sql).collect()
+            return
+
+        # ---- Incremental snapshot update ----
+        src_name = f"__ff_snapshot_src_{rel_name}".replace(".", "_")
+
+        # Use a temporary view for the current source rows
+        self.session.sql(f"CREATE OR REPLACE TEMPORARY VIEW {src_name} AS {body}").collect()
+
+        try:
+            keys_pred = " AND ".join([f"t.{k} = s.{k}" for k in cfg.unique_key]) or "FALSE"
+
+            if cfg.strategy == "timestamp":
+                if cfg.updated_at is None:
+                    raise ValueError(
+                        "strategy='timestamp' snapshot requires a non-null updated_at column."
+                    )
+                change_condition = f"s.{cfg.updated_at} > t.{upd_meta}"
+                hash_expr_s = "NULL"
+                new_upd_expr = f"s.{cfg.updated_at}"
+                new_valid_from_expr = f"s.{cfg.updated_at}"
+                new_hash_expr = "NULL"
+            else:
+                col_exprs_s = [f"COALESCE(CAST(s.{col} AS VARCHAR), '')" for col in cfg.check_cols]
+                concat_expr_s = " || '||' || ".join(col_exprs_s) or "''"
+                hash_expr_s = f"CAST(MD5({concat_expr_s}) AS VARCHAR)"
+                change_condition = f"COALESCE({hash_expr_s}, '') <> COALESCE(t.{hash_col}, '')"
+                new_upd_expr = (
+                    f"s.{cfg.updated_at}" if cfg.updated_at is not None else "CURRENT_TIMESTAMP()"
+                )
+                new_valid_from_expr = "CURRENT_TIMESTAMP()"
+                new_hash_expr = hash_expr_s
+
+            # 1) Close changed current rows
+            close_sql = f"""
+UPDATE {target} AS t
+SET
+  {vt} = CURRENT_TIMESTAMP(),
+  {is_cur} = FALSE
+FROM {src_name} AS s
+WHERE
+  {keys_pred}
+  AND t.{is_cur} = TRUE
+  AND {change_condition}
+"""
+            self.session.sql(close_sql).collect()
+
+            # 2) Insert new current versions (new keys or changed rows)
+            first_key = cfg.unique_key[0]
+            insert_sql = f"""
+INSERT INTO {target}
+SELECT
+  s.*,
+  {new_upd_expr} AS {upd_meta},
+  {new_valid_from_expr} AS {vf},
+  CAST(NULL AS TIMESTAMP) AS {vt},
+  TRUE AS {is_cur},
+  {new_hash_expr} AS {hash_col}
+FROM {src_name} AS s
+LEFT JOIN {target} AS t
+  ON {keys_pred}
+ AND t.{is_cur} = TRUE
+WHERE
+  t.{first_key} IS NULL
+  OR {change_condition}
+"""
+            self.session.sql(insert_sql).collect()
+        finally:
+            with suppress(Exception):
+                self.session.sql(f"DROP VIEW IF EXISTS {src_name}").collect()
+
+    def snapshot_prune(
+        self,
+        relation: str,
+        unique_key: list[str],
+        keep_last: int,
+        *,
+        dry_run: bool = False,
+    ) -> None:
+        """
+        Delete older snapshot versions while keeping the most recent `keep_last`
+        rows per business key (including the current row).
+        """
+        if keep_last <= 0:
+            return
+
+        keys = [k for k in unique_key if k]
+        if not keys:
+            return
+
+        target = self._qualified(relation)
+        vf = BaseExecutor.SNAPSHOT_VALID_FROM_COL
+
+        part_by = ", ".join(keys)
+        key_select = ", ".join(keys)
+
+        ranked_sql = f"""
+SELECT
+  {key_select},
+  {vf},
+  ROW_NUMBER() OVER (
+    PARTITION BY {part_by}
+    ORDER BY {vf} DESC
+  ) AS rn
+FROM {target}
+"""
+
+        if dry_run:
+            sql = f"""
+WITH ranked AS (
+  {ranked_sql}
+)
+SELECT COUNT(*) AS rows_to_delete
+FROM ranked
+WHERE rn > {int(keep_last)}
+"""
+            res_raw = self.session.sql(sql).collect()
+            # Snowflake returns a list of Row objects; treat them as tuples for typing.
+            res = cast("list[tuple[Any, ...]]", res_raw)
+            rows = int(res[0][0]) if res else 0
+
+            echo(
+                f"[DRY-RUN] snapshot_prune({relation}): would delete {rows} row(s) "
+                f"(keep_last={keep_last})"
+            )
+            return
+
+        delete_sql = f"""
+DELETE FROM {target} t
+USING (
+  {ranked_sql}
+) r
+WHERE
+  r.rn > {int(keep_last)}
+  AND {" AND ".join([f"t.{k} = r.{k}" for k in keys])}
+  AND t.{vf} = r.{vf}
+"""
+        self.session.sql(delete_sql).collect()
+
 
 # ────────────────────────── local testing shim ───────────────────────────
 class _SFCursorShim:
diff --git a/src/fastflowtransform/snapshots.py b/src/fastflowtransform/snapshots.py
new file mode 100644
index 0000000..456d837
--- /dev/null
+++ b/src/fastflowtransform/snapshots.py
@@ -0,0 +1,113 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from dataclasses import dataclass
+from typing import Any, Literal
+
+from fastflowtransform.core import Node
+from fastflowtransform.incremental import _normalize_unique_key
+
+SnapshotStrategy = Literal["timestamp", "check"]
+
+
+@dataclass
+class SnapshotConfigResolved:
+    """
+    Normalised snapshot configuration usable by executors.
+
+    Supports both:
+      - legacy nested config: snapshot={strategy=..., updated_at=..., check_cols=...}
+      - flattened config:     strategy=..., updated_at=..., check_cols=...
+    """
+
+    strategy: SnapshotStrategy
+    unique_key: list[str]
+    updated_at: str | None
+    check_cols: list[str]
+
+
+def resolve_snapshot_config(node: Node, meta: Mapping[str, Any]) -> SnapshotConfigResolved:
+    """
+    Resolve and validate snapshot configuration from a model's meta dict.
+
+    Accepted shapes:
+      {{ config(
+          materialized='snapshot',
+          snapshot={
+            'strategy': 'timestamp',
+            'updated_at': 'updated_at',
+            'check_cols': ['col1', 'col2'],
+          },
+          unique_key='id',
+        ) }}
+
+      OR (flattened)
+
+      {{ config(
+          materialized='snapshot',
+          strategy='timestamp',
+          updated_at='updated_at',
+          check_cols=['col1', 'col2'],
+          unique_key='id',
+        ) }}
+    """
+    meta = dict(meta or {})
+
+    # Optional nested block
+    snapshot_block = meta.get("snapshot")
+    if snapshot_block is not None and not isinstance(snapshot_block, Mapping):
+        raise TypeError(
+            f"{node.path}: snapshot configuration must be a mapping (snapshot={{...}})."
+        )
+    snapshot_block = dict(snapshot_block or {})
+
+    # ---- unique key ----------------------------------------------------
+    unique_key = _normalize_unique_key(meta.get("unique_key") or meta.get("primary_key"))
+    if not unique_key:
+        raise ValueError(
+            f"{node.path}: snapshot models require 'unique_key' (string or list of strings)."
+        )
+
+    # ---- strategy ------------------------------------------------------
+    raw_strategy = snapshot_block.get("strategy") or meta.get("strategy") or "timestamp"
+    strategy_str = str(raw_strategy).lower()
+    if strategy_str not in ("timestamp", "check"):
+        raise ValueError(
+            f"{node.path}: snapshot 'strategy' must be 'timestamp' or 'check', "
+            f"got {raw_strategy!r}."
+        )
+
+    # Narrow to the Literal["timestamp", "check"] type for type-checkers
+    strategy: SnapshotStrategy = "timestamp" if strategy_str == "timestamp" else "check"
+
+    # ---- updated_at ----------------------------------------------------
+    updated_at = (
+        snapshot_block.get("updated_at")
+        or snapshot_block.get("updated_at_column")
+        or meta.get("updated_at")
+        or meta.get("updated_at_column")
+    )
+
+    # ---- check_cols ----------------------------------------------------
+    raw_check_cols = (
+        snapshot_block.get("check_cols")
+        or snapshot_block.get("check_columns")
+        or meta.get("check_cols")
+        or meta.get("check_columns")
+    )
+    check_cols = _normalize_unique_key(raw_check_cols) if raw_check_cols else []
+
+    # Per-strategy guards (extra safety besides ModelConfig)
+    if strategy == "timestamp" and not updated_at:
+        raise ValueError(
+            f"{node.path}: strategy='timestamp' snapshots require 'updated_at' column name."
+        )
+    if strategy == "check" and not check_cols:
+        raise ValueError(
+            f"{node.path}: strategy='check' snapshots require non-empty "
+            "'check_cols' (string or list)."
+        )
+
+    return SnapshotConfigResolved(
+        strategy=strategy, unique_key=unique_key, updated_at=updated_at, check_cols=check_cols
+    )
diff --git a/src/fastflowtransform/templates/index.html.j2 b/src/fastflowtransform/templates/index.html.j2
index 149809c..b879e52 100644
--- a/src/fastflowtransform/templates/index.html.j2
+++ b/src/fastflowtransform/templates/index.html.j2
@@ -73,6 +73,7 @@
     .badge-table { background:#eef7ff; color:#0a3a77; border-color:#bcd8fb; }
     .badge-view { background:#eefcf4; color:#0b5d2a; border-color:#bdebcf; }
     .badge-ephemeral { background:#fff7e8; color:#7a4a00; border-color:#f6db9b; }
+    .badge-snapshot { background:#f3e8ff; color:#5b21b6; border-color:#d8b4fe; }
     .badge-sql { background: var(--chip-sql-bg); color: var(--chip-sql-fg); }
     .badge-py  { background: var(--chip-py-bg);  color: var(--chip-py-fg);  }
     .subline { display:block; margin-top:2px; font-size:12px; color: var(--muted); line-height:1.35; }
diff --git a/tests/integration/examples/config.py b/tests/integration/examples/config.py
index 4734ffe..2ab89fb 100644
--- a/tests/integration/examples/config.py
+++ b/tests/integration/examples/config.py
@@ -88,4 +88,15 @@ class ExampleConfig:
             "databricks_spark": "dev_databricks",
         },
     ),
+    ExampleConfig(
+        name="snapshot_demo",
+        path=ROOT / "examples" / "snapshot_demo",
+        make_target="demo",
+        env_by_engine={
+            "duckdb": "dev_duckdb",
+            "postgres": "dev_postgres",
+            "databricks_spark": "dev_databricks",
+        },
+        spark_table_formats=["parquet", "delta", "iceberg"],
+    ),
 ]
diff --git a/tests/unit/api/http/test_http_offline_cache_unit.py b/tests/unit/api/http/test_http_offline_cache_unit.py
index f183179..b908d2c 100644
--- a/tests/unit/api/http/test_http_offline_cache_unit.py
+++ b/tests/unit/api/http/test_http_offline_cache_unit.py
@@ -43,3 +43,25 @@ def test_get_json_offline_cache_hit_records_stats(monkeypatch, tmp_path):
     assert snap["used_offline"] is True
     assert snap["bytes"] > 0
     assert isinstance(snap["keys"], list) and len(snap["keys"]) == 1
+
+
+@pytest.mark.unit
+@pytest.mark.http
+def test_get_json_cache_hit_online_not_reported_offline(monkeypatch, tmp_path):
+    monkeypatch.setenv("FF_HTTP_OFFLINE", "0")
+    monkeypatch.setenv("FF_HTTP_CACHE_DIR", str(tmp_path))
+    importlib.reload(http)
+
+    url = "https://api.example.com/users"
+    params = {"page": 1}
+    payload = {"data": [{"id": 1}]}
+    _seed_cache(http, Path(tmp_path), url, params, payload)
+
+    ctx.reset_for_node("online_node")
+
+    out = http.get_json(url, params=params)
+    assert out == payload
+
+    snap = ctx.snapshot()
+    assert snap["cache_hits"] == 1
+    assert snap["used_offline"] is False
diff --git a/tests/unit/api/http/test_http_pagination_df_unit.py b/tests/unit/api/http/test_http_pagination_df_unit.py
index 3ace3e2..8c5565c 100644
--- a/tests/unit/api/http/test_http_pagination_df_unit.py
+++ b/tests/unit/api/http/test_http_pagination_df_unit.py
@@ -1,6 +1,7 @@
 import importlib
 import json
 from pathlib import Path
+from typing import Any
 
 import pandas as pd
 import pytest
@@ -55,3 +56,35 @@ def paginator(u: str, p: dict | None, js: dict):
     cache_hit_count = 2
     assert snap["requests"] == request_count
     assert snap["cache_hits"] == cache_hit_count
+
+
+@pytest.mark.unit
+@pytest.mark.http
+def test_raw_get_pagination_returns_pages(monkeypatch, tmp_path):
+    monkeypatch.setenv("FF_HTTP_OFFLINE", "0")
+    monkeypatch.setenv("FF_HTTP_CACHE_DIR", str(tmp_path))
+    importlib.reload(http)
+
+    calls: list[dict[str, Any]] = []
+
+    def fake_http_request(method, u, *, params=None, headers=None, timeout=None):
+        calls.append({"url": u, "params": params, "headers": headers})
+        if "page=1" in u:
+            body = json.dumps({"next": "https://api.example.com/users?page=2"}).encode("utf-8")
+        else:
+            body = json.dumps({"next": None}).encode("utf-8")
+        return 200, {}, body
+
+    monkeypatch.setattr(http, "_http_request", fake_http_request)
+
+    def paginator(u: str, p: dict | None, payload: Any):
+        nxt = payload.get("next") if isinstance(payload, dict) else None
+        if not nxt:
+            return None
+        return {"next_request": {"url": nxt, "headers": {"X-Token": "abc"}}}
+
+    pages = http.get("https://api.example.com/users?page=1", paginator=paginator)
+    assert isinstance(pages, list)
+    assert len(pages) == 2
+    assert calls[0]["headers"] == {}
+    assert calls[1]["headers"] == {"X-Token": "abc"}