diff --git a/_scripts/concat_docs.py b/_scripts/concat_docs.py
index 1c00d6e..c0ca6ff 100644
--- a/_scripts/concat_docs.py
+++ b/_scripts/concat_docs.py
@@ -1,13 +1,13 @@
#!/usr/bin/env python3
# concat_docs.py
"""
-Fügt alle Markdown-Dateien aus dem docs-Verzeichnis zu einer einzelnen Datei zusammen.
-- Respektiert die Reihenfolge in mkdocs.yml (nav).
-- Ignoriert doppelte Einträge / Anker (#...).
-- Hängt übrige .md-Dateien (nicht in nav) am Ende an.
-- Optional: Headings demoten (um mehrfaches H1 zu vermeiden).
+Concatenates all Markdown files from the docs directory into a single file.
+- Respects the order in mkdocs.yml (nav).
+- Ignores duplicate entries / anchors (#...).
+- Appends remaining .md files (not in nav) at the end.
+- Optional: Demote headings (to avoid multiple H1s).
-Beispiel:
+Example:
python concat_docs.py -o Combined.md
python concat_docs.py -o Combined.md --demote --exclude "reference/**" --exclude "site/**"
"""
@@ -30,18 +30,18 @@
def load_nav_order(project_root: Path) -> list[Path]:
- """Liest mkdocs.yml und extrahiert eine geordnete Liste der Markdown-Pfade (ohne Anker)."""
+ """Read mkdocs.yml and extract an ordered list of Markdown paths (without anchors)."""
yml_path = project_root / MKDOCS_YML
ordered: list[Path] = []
if yaml is None or not yml_path.exists():
- return ordered # keine Order-Info -> leere Liste
+ return ordered # no ordering info -> empty list
data = yaml.safe_load(yml_path.read_text(encoding="utf-8"))
nav = data.get("nav") if isinstance(data, dict) else None
if not isinstance(nav, list):
return ordered
def normalize_nav_item(item) -> list[str]:
- # item kann dict ({"Title": "path.md" | ["subitems"]}) oder string sein
+ # Item can be dict ({"Title": "path.md" | ["subitems"]}) or string
out: list[str] = []
if isinstance(item, str):
out.append(item)
@@ -60,12 +60,12 @@ def normalize_nav_item(item) -> list[str]:
seen = set()
for p in paths:
- # Nur Dateien unter docs berücksichtigen; Anker entfernen
+ # Only consider files under docs; strip anchors
p_no_anchor = p.split("#", 1)[0]
if not p_no_anchor.lower().endswith(".md"):
continue
- # mkdocs erlaubt relative Pfade; wir interpretieren sie relativ zu docs/
- # Falls der Pfad bereits "docs/..." enthält, normalisieren wir trotzdem
+ # mkdocs allows relative paths; interpret them relative to docs/
+ # If the path already contains "docs/...", normalize it anyway
if p_no_anchor.startswith(DOCS_DIR_DEFAULT + "/"):
rel = Path(p_no_anchor).relative_to(DOCS_DIR_DEFAULT)
else:
@@ -94,8 +94,8 @@ def apply_excludes(paths: list[Path], patterns: list[str]) -> list[Path]:
def demote_headings(text: str, levels: int = 1) -> str:
"""
- Erhöht die Anzahl der '#' um 'levels' für alle ATX-Headings (Markdown #).
- Lässt Codeblöcke unberührt.
+ Increase the number of '#' by 'levels' for all ATX headings (Markdown #).
+ Leave code fences untouched.
"""
if levels <= 0:
return text
@@ -160,12 +160,12 @@ def main():
print(f"Fehler: docs-Verzeichnis nicht gefunden: {docs_dir}", file=sys.stderr)
sys.exit(1)
- # 1) Reihenfolge aus mkdocs.yml (falls nicht deaktiviert / vorhanden)
+ # 1) Order from mkdocs.yml (if not disabled / available)
nav_order = load_nav_order(project_root) if not args.no_nav else []
all_md = collect_md_files(docs_dir)
all_md = apply_excludes(all_md, args.exclude)
- # 2) Liste zusammenstellen: zuerst nav, dann Rest (ohne Duplikate)
+ # 2) Build list: nav entries first, then the rest (without duplicates)
ordered: list[Path] = []
seen = set()
for rel in nav_order:
diff --git a/docs/Config_and_Macros.md b/docs/Config_and_Macros.md
index 6c1cd5d..6ec708d 100644
--- a/docs/Config_and_Macros.md
+++ b/docs/Config_and_Macros.md
@@ -64,6 +64,8 @@ Use the `@model` decorator from `fastflowtransform.core` to register a callable.
- `name` (optional) → overrides the logical name (defaults to stem).
- `deps` → list of dependency nodes (file stems or logical names).
- `requires` → column contract per dependency (validated via `validation.validate_required_columns`).
+- `materialized` (optional) → `'table' | 'view' | 'ephemeral'`; mirrors `config(materialized=...)` for SQL.
+- `tags` (optional) → convenience for attaching selection labels without writing `meta={"tags": ...}`.
Dependencies determine the call signature:
@@ -78,7 +80,8 @@ import pandas as pd
@model(
name="users_enriched",
deps=["users.ff"],
- requires={"users": {"id", "email"}}
+ requires={"users": {"id", "email"}},
+ materialized="view",
)
def enrich(df: pd.DataFrame) -> pd.DataFrame:
out = df.copy()
@@ -172,7 +175,7 @@ override those defaults, add per-engine overrides, or point at files:
## 2. `config()` options
-Call `config()` at the top of SQL models (and optionally within Python models via decorator kwargs in future versions).
+Call `config()` at the top of SQL models. Python models get the same options via the `@model(..., materialized=..., tags=...)` decorator kwargs.
```sql
{{ config(
diff --git a/docs/examples/Basic_Demo.md b/docs/examples/Basic_Demo.md
new file mode 100644
index 0000000..baea5de
--- /dev/null
+++ b/docs/examples/Basic_Demo.md
@@ -0,0 +1,49 @@
+# Basic Demo Project
+
+The `examples/basic_demo` project shows the smallest end-to-end FastFlowTransform pipeline. It combines one seed, a staging model, and a final mart while staying portable across DuckDB, Postgres, and Databricks Spark.
+
+## Why it exists
+- **Start small** – demonstrate the minimum folder structure (`seeds/`, `models/`, `profiles.yml`) needed to run `fft`.
+- **Engine parity** – prove that a single project can target multiple engines by swapping profiles.
+- **Understand outputs** – show where documentation and manifests land after a run.
+
+Use it as a sandbox before adding your own sources, macros, or Python models.
+
+## Project layout
+
+| Path | Purpose |
+|------|---------|
+| `seeds/seed_users.csv` | Sample CRM-style user data. `fft seed` materializes it as `crm.users`. |
+| `models/staging/users_clean.ff.sql` | Normalizes emails, casts types, and tags the model for all engines. |
+| `models/marts/mart_users_by_domain.ff.sql` | Aggregates users per email domain and records the first/last signup dates. |
+| `models/engines/*/mart_latest_signup.ff.py` | Engine-specific Python models (pandas for DuckDB/Postgres, PySpark for Databricks) selecting the most recent signup per domain from the staging view. |
+| `profiles.yml` | Declares `dev_duckdb`, `dev_postgres`, and `dev_databricks` profiles driven by environment variables. |
+| `.env.dev_*` | Template environment files you can `source` per engine. |
+| `Makefile` | One command (`make demo ENGINE=…`) to seed, run, document, test, and preview results. |
+
+## Running the demo
+
+1. `cd examples/basic_demo`
+2. Choose an engine and export its environment variables:
+ ```bash
+ set -a; source .env.dev_duckdb; set +a
+ # swap to .env.dev_postgres or .env.dev_databricks for other engines
+ ```
+3. Execute the full flow:
+ ```bash
+ make demo ENGINE=duckdb
+ ```
+ The Makefile runs `fft seed`, `fft run`, `fft dag`, `fft test`, and `fft show basic_demo.mart_users_by_domain`. To preview the Python mart, run `make show ENGINE=duckdb SHOW_MODEL=mart_latest_signup` (or swap `ENGINE` as needed).
+4. Inspect artifacts:
+ - `.fastflowtransform/target/manifest.json` and `run_results.json`
+ - `site/dag/index.html` for the rendered model graph
+ - CLI output from `fft show` displaying the aggregated mart
+
+The demo also enables baseline data quality checks in `project.yml`. Running `fft test` (or `make test`) verifies that primary keys remain unique/not-null across `seed_users`, `users_clean`, `mart_users_by_domain`, and the Python mart, while ensuring aggregate metrics such as `user_count` never drop below zero and each domain appears only once in `mart_latest_signup`.
+
+## Next steps
+
+- Add more CSVs under `seeds/` and declare them in `sources.yml`.
+- Create additional staging models so marts can reuse normalized data.
+- Introduce Python models or macros mirroring how the API demo scales up.
+- Update `.env.dev_*` with real credentials once you connect to shared databases.
diff --git a/docs/index.md b/docs/index.md
index 36754ad..eccc66f 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -17,6 +17,7 @@ Welcome! This page is your starting point for FastFlowTransform docs. Pick the t
- [Sources Declaration](./Sources.md)
- [Project Configuration](./Project_Config.md)
- [State Selection (changed & results)](./State_Selection.md)
+- [Basic Demo Overview](./examples/Basic_Demo.md)
- [Cross-Table Reconciliations](./Technical_Overview.md#cross-table-reconciliations)
- [Auto-Docs & Lineage](./Technical_Overview.md#auto-docs-lineage)
- [Developer Guide](./Technical_Overview.md#part-ii-architecture-internals)
@@ -39,7 +40,7 @@ Welcome! This page is your starting point for FastFlowTransform docs. Pick the t
- **Understand the project layout & CLI workflow:** see *Project Layout*, *Makefile Targets*, and *CLI Flows* in the [Technical Overview](Technical_Overview.md#project-layout).
- **Configure runtimes & profiles:** review executor profiles, environment overrides, and logging options in the [Technical Overview](Technical_Overview.md#profiles-environment-overrides).
- **Model data quality & troubleshoot runs:** the [Technical Overview](Technical_Overview.md#model-unit-tests-fft-utest) covers unit tests, troubleshooting tips, and exit codes.
-- **Explore runnable demos:** browse the `examples/` directory in the repo; each subproject comes with its own README.
+- **Explore runnable demos:** start with the [Basic Demo Overview](examples/Basic_Demo.md) or browse the `examples/` directory; each subproject ships with its own README.
### 2. Extend FastFlowTransform (Developers & Contributors)
diff --git a/examples/_scripts/cleanup_env.py b/examples/_scripts/cleanup_env.py
index abc98dd..fe7ae7c 100644
--- a/examples/_scripts/cleanup_env.py
+++ b/examples/_scripts/cleanup_env.py
@@ -16,11 +16,15 @@
if SRC_DIR.exists() and str(SRC_DIR) not in sys.path:
sys.path.insert(0, str(SRC_DIR))
+from fastflowtransform.logging import LOG_PREFIX
from fastflowtransform.settings import EnvSettings, resolve_profile
def _log(msg: str) -> None:
- print(msg)
+ if LOG_PREFIX:
+ print(f"{LOG_PREFIX} {msg}")
+ else:
+ print(msg)
def _coerce_path(value: str | None, project: Path) -> Path | None:
@@ -261,32 +265,44 @@ def main(argv: list[str] | None = None) -> int:
or ("dev_" + args.engine if args.engine in {"duckdb", "postgres"} else "dev")
)
+ os.environ["FFT_ACTIVE_ENV"] = env_name
+ _load_dotenv_layered(project, env_name)
+
+ profile = None
try:
- os.environ["FFT_ACTIVE_ENV"] = env_name
- _load_dotenv_layered(project, env_name)
profile = _load_profile(project, env_name, args.engine)
+ except Exception as exc: # pragma: no cover - best-effort logging
+ _log(
+ f"Warning: failed to resolve profile '{env_name}' for engine '{args.engine}': {exc}. "
+ "Continuing with environment variables only."
+ )
- warehouse_path: Path | None = None
+ warehouse_path: Path | None = None
+ try:
if args.engine == "duckdb":
- profile_duckdb = getattr(getattr(profile, "duckdb", None), "path", None)
+ profile_duckdb = (
+ getattr(getattr(profile, "duckdb", None), "path", None) if profile else None
+ )
db_path = args.duckdb_path or os.getenv("FF_DUCKDB_PATH") or profile_duckdb
cleanup_duckdb(project=project, db_path=db_path, dry_run=args.dry_run)
elif args.engine == "postgres":
- profile_pg = getattr(profile, "postgres", None)
- profile_dsn = getattr(profile_pg, "dsn", None)
- profile_schema = getattr(profile_pg, "db_schema", None)
+ profile_pg = getattr(profile, "postgres", None) if profile else None
+ profile_dsn = getattr(profile_pg, "dsn", None) if profile_pg else None
+ profile_schema = getattr(profile_pg, "db_schema", None) if profile_pg else None
dsn = args.postgres_dsn or os.getenv("FF_PG_DSN") or profile_dsn
schema = args.postgres_schema or os.getenv("FF_PG_SCHEMA") or profile_schema
cleanup_postgres(dsn=dsn, schema=schema, dry_run=args.dry_run)
elif args.engine == "databricks_spark":
- profile_db = getattr(profile, "databricks_spark", None)
- profile_master = getattr(profile_db, "master", None)
- profile_app = getattr(profile_db, "app_name", None)
- profile_warehouse = getattr(profile_db, "warehouse_dir", None)
- profile_database = getattr(profile_db, "database", None)
- profile_catalog = getattr(profile_db, "catalog", None)
- profile_use_hive = getattr(profile_db, "use_hive_metastore", False)
- profile_extra_conf = getattr(profile_db, "extra_conf", None)
+ profile_db = getattr(profile, "databricks_spark", None) if profile else None
+ profile_master = getattr(profile_db, "master", None) if profile_db else None
+ profile_app = getattr(profile_db, "app_name", None) if profile_db else None
+ profile_warehouse = getattr(profile_db, "warehouse_dir", None) if profile_db else None
+ profile_database = getattr(profile_db, "database", None) if profile_db else None
+ profile_catalog = getattr(profile_db, "catalog", None) if profile_db else None
+ profile_use_hive = (
+ getattr(profile_db, "use_hive_metastore", False) if profile_db else False
+ )
+ profile_extra_conf = getattr(profile_db, "extra_conf", None) if profile_db else None
warehouse_path = cleanup_databricks(
project=project,
master=args.spark_master or profile_master,
diff --git a/examples/basic_demo/Makefile b/examples/basic_demo/Makefile
new file mode 100644
index 0000000..7f7f5a9
--- /dev/null
+++ b/examples/basic_demo/Makefile
@@ -0,0 +1,97 @@
+.PHONY: seed run test dag show artifacts clean demo help
+
+# --- Configuration -----------------------------------------------------------
+
+DB ?= .local/basic_demo.duckdb
+PROJECT ?= .
+UV ?= uv
+
+# Engine selector (duckdb|postgres|databricks_spark)
+ENGINE ?= duckdb
+
+# Resolve profile and tags per engine
+ifeq ($(ENGINE),duckdb)
+ PROFILE_ENV = dev_duckdb
+ ENGINE_TAG = engine:duckdb
+endif
+ifeq ($(ENGINE),postgres)
+ PROFILE_ENV = dev_postgres
+ ENGINE_TAG = engine:postgres
+endif
+ifeq ($(ENGINE),databricks_spark)
+ PROFILE_ENV = dev_databricks
+ ENGINE_TAG = engine:databricks_spark
+endif
+
+BASE_ENV = FFT_ACTIVE_ENV=$(PROFILE_ENV)
+RUN_ENV = $(BASE_ENV)
+
+SELECT_FLAGS = --select tag:example:basic_demo --select tag:$(ENGINE_TAG)
+
+SHOW_MODEL ?= mart_users_by_domain
+
+CLEAN_SCRIPT = ../_scripts/cleanup_env.py
+
+ifeq ($(ENGINE),duckdb)
+ CLEAN_CMD = env $(BASE_ENV) $(UV) run python $(CLEAN_SCRIPT) --engine duckdb --env "$(PROFILE_ENV)" --project "$(PROJECT)" --duckdb-path "$(DB)"
+else ifeq ($(ENGINE),postgres)
+ CLEAN_CMD = env $(BASE_ENV) $(UV) run python $(CLEAN_SCRIPT) --engine postgres --env "$(PROFILE_ENV)" --project "$(PROJECT)"
+else ifeq ($(ENGINE),databricks_spark)
+ CLEAN_CMD = env $(BASE_ENV) $(UV) run python $(CLEAN_SCRIPT) --engine databricks_spark --env "$(PROFILE_ENV)" --project "$(PROJECT)"
+else
+ $(error Unsupported ENGINE=$(ENGINE) - pick duckdb|postgres|databricks_spark)
+endif
+
+# --- Targets ----------------------------------------------------------------
+
+help:
+ @echo "FastFlowTransform Basic Demo"
+ @echo "Targets:"
+ @echo " make seed ENGINE=$(ENGINE)"
+ @echo " make run ENGINE=$(ENGINE)"
+ @echo " make dag ENGINE=$(ENGINE)"
+ @echo " make test ENGINE=$(ENGINE)"
+ @echo " make show ENGINE=$(ENGINE) SHOW_MODEL=$(SHOW_MODEL)"
+ @echo " make demo ENGINE=$(ENGINE)"
+ @echo " make clean ENGINE=$(ENGINE)"
+ @echo
+ @echo "Variables: DB=$(DB) PROJECT=$(PROJECT) UV=$(UV)"
+
+seed:
+ env $(BASE_ENV) $(UV) run fft seed "$(PROJECT)" --env $(PROFILE_ENV)
+
+run:
+ env $(RUN_ENV) $(UV) run fft run "$(PROJECT)" --env $(PROFILE_ENV) $(SELECT_FLAGS)
+
+test:
+ env $(BASE_ENV) $(UV) run fft test "$(PROJECT)" --env $(PROFILE_ENV) $(SELECT_FLAGS)
+
+dag:
+ env $(RUN_ENV) $(UV) run fft dag "$(PROJECT)" --env $(PROFILE_ENV) $(SELECT_FLAGS) --html
+
+show:
+ @if [ -f "$(PROJECT)/site/dag/index.html" ]; then \
+ $(OPENER) "$(PROJECT)/site/dag/index.html" 2>/dev/null || echo "Open manually at: $(PROJECT)/site/dag/index.html"; \
+ else \
+ echo "No HTML found: $(PROJECT)/site/dag/index.html"; \
+ fi
+
+artifacts:
+ @echo
+ @echo "== 📦 Artifacts =="
+ @echo " $(PROJECT)/.fastflowtransform/target/{manifest.json,run_results.json,catalog.json}"
+ @echo " DAG HTML: $(PROJECT)/site/dag/index.html"
+
+clean:
+ $(CLEAN_CMD)
+
+demo: clean
+ @echo "== 🚀 Basic Demo ($(ENGINE)) =="
+ @echo "Profile=$(PROFILE_ENV) PROJECT=$(PROJECT)"
+ +$(MAKE) seed ENGINE=$(ENGINE)
+ +$(MAKE) run ENGINE=$(ENGINE)
+ +$(MAKE) dag ENGINE=$(ENGINE)
+ +$(MAKE) test ENGINE=$(ENGINE)
+ +$(MAKE) show ENGINE=$(ENGINE)
+ +$(MAKE) artifacts
+ @echo "✅ Demo complete."
diff --git a/examples/basic_demo/README.md b/examples/basic_demo/README.md
index 5e977f7..106a814 100644
--- a/examples/basic_demo/README.md
+++ b/examples/basic_demo/README.md
@@ -1,7 +1,52 @@
-# FastFlowTransform project scaffold
+# Basic demo
-This project was created with `fft init`.
-Next steps:
-1. Update `profiles.yml` with real connection details (docs/Profiles.md).
-2. Add sources in `sources.yml` and author models under `models/` (docs/Config_and_Macros.md).
-3. Seed sample data with `fft seed` and execute models with `fft run` (docs/Quickstart.md).
+This project is a minimal FastFlowTransform pipeline that works unchanged on DuckDB, Postgres, and Databricks Spark. It ships with:
+- `seeds/seed_users.csv` – three sample users that bootstrap the project.
+- `models/staging/users_clean.ff.sql` – normalizes emails and signup timestamps.
+- `models/marts/mart_users_by_domain.ff.sql` – aggregates users by email domain.
+- `models/engines/*/mart_latest_signup.ff.py` – engine-scoped Python models (pandas for DuckDB/Postgres, PySpark for Databricks) that grab the latest signup per domain from the staging view.
+
+## Quickstart
+
+1. Install the package and CLI (see repository root instructions).
+2. `cd examples/basic_demo` (this folder) so relative paths line up.
+3. Load one of the provided engine environments, then seed and run the project.
+
+> ⚠️ `make clean` (or direct calls to `cleanup_env.py`) rely on the same environment variables as the run commands. Always export the `.env.dev_*` file for the engine you are cleaning so paths, schemas, and credentials are available.
+
+### DuckDB
+
+```bash
+cp .env.dev_duckdb .env.local # optional convenience copy
+set -a; source .env.dev_duckdb; set +a # export FF_DUCKDB_PATH
+fft seed basic_demo --env dev_duckdb
+fft run basic_demo --env dev_duckdb
+fft show basic_demo.mart_users_by_domain --env dev_duckdb
+fft show basic_demo.mart_latest_signup --env dev_duckdb
+```
+
+### Postgres
+
+```bash
+cp .env.dev_postgres .env.local # fill in FF_PG_DSN with your credentials
+set -a; source .env.dev_postgres; set +a
+fft seed basic_demo --env dev_postgres
+fft run basic_demo --env dev_postgres
+fft show basic_demo.mart_users_by_domain --env dev_postgres
+fft show basic_demo.mart_latest_signup --env dev_postgres
+```
+
+### Databricks Spark (local or hosted)
+
+```bash
+cp .env.dev_databricks .env.local # adjust Spark master / credentials as needed
+set -a; source .env.dev_databricks; set +a
+fft seed basic_demo --env dev_databricks
+fft run basic_demo --env dev_databricks
+fft show basic_demo.mart_users_by_domain --env dev_databricks
+fft show basic_demo.mart_latest_signup --env dev_databricks
+```
+
+The resulting tables report user counts per email domain and spotlight the most recent signup per domain. Extend any of the CSV, SQL, or Python assets to explore more complex scenarios.
+
+Further background is documented in [`docs/examples/Basic_Demo.md`](../../docs/examples/Basic_Demo.md).
diff --git a/examples/basic_demo/models/README.md b/examples/basic_demo/models/README.md
index 32818bb..7ec1560 100644
--- a/examples/basic_demo/models/README.md
+++ b/examples/basic_demo/models/README.md
@@ -1,4 +1,8 @@
# Models directory
-Place SQL (`*.ff.sql`) and Python (`*.ff.py`) models here.
-See docs/Config_and_Macros.md for modeling guidance and config options.
+This demo ships with:
+- `staging/users_clean.ff.sql` – normalizes the seeded users table.
+- `marts/mart_users_by_domain.ff.sql` – aggregates signups per email domain.
+- `engines/*/mart_latest_signup.ff.py` – engine-scoped Python models (pandas for DuckDB/Postgres, PySpark for Databricks) that select the most recent signup per domain using the staging view as input.
+
+Add further SQL (`*.ff.sql`) or Python (`*.ff.py`) models alongside them to grow the pipeline.
diff --git a/examples/basic_demo/models/engines/databricks_spark/mart_latest_signup.ff.py b/examples/basic_demo/models/engines/databricks_spark/mart_latest_signup.ff.py
new file mode 100644
index 0000000..0b1a77a
--- /dev/null
+++ b/examples/basic_demo/models/engines/databricks_spark/mart_latest_signup.ff.py
@@ -0,0 +1,34 @@
+from pyspark.sql import DataFrame
+from pyspark.sql import Window
+from pyspark.sql import functions as F
+
+from fastflowtransform import engine_model
+
+
+@engine_model(
+ only="databricks_spark",
+ name="mart_latest_signup",
+ materialized="table",
+ tags=[
+ "example:basic_demo",
+ "scope:mart",
+ "engine:databricks_spark",
+ ],
+ deps=["users_clean.ff"],
+ require={"users_clean.ff": ["user_id", "email", "email_domain", "signup_date"]},
+)
+def build(users_clean: DataFrame) -> DataFrame:
+ """Return the latest signup per email domain using PySpark DataFrame operations."""
+ window = Window.partitionBy("email_domain").orderBy(F.col("signup_date").desc())
+
+ latest = (
+ users_clean.withColumn("row_number", F.row_number().over(window))
+ .filter(F.col("row_number") == 1)
+ .select(
+ F.col("email_domain"),
+ F.col("user_id").alias("latest_user_id"),
+ F.col("email").alias("latest_email"),
+ F.col("signup_date").alias("latest_signup_date"),
+ )
+ )
+ return latest
diff --git a/examples/basic_demo/models/engines/duckdb/mart_latest_signup.ff.py b/examples/basic_demo/models/engines/duckdb/mart_latest_signup.ff.py
new file mode 100644
index 0000000..d33277e
--- /dev/null
+++ b/examples/basic_demo/models/engines/duckdb/mart_latest_signup.ff.py
@@ -0,0 +1,33 @@
+import pandas as pd
+
+from fastflowtransform import engine_model
+
+
+@engine_model(
+ only="duckdb",
+ name="mart_latest_signup",
+ materialized="table",
+ tags=[
+ "example:basic_demo",
+ "scope:mart",
+ "engine:duckdb",
+ ],
+ deps=["users_clean.ff"],
+ require={"users_clean.ff": ["user_id", "email", "email_domain", "signup_date"]},
+)
+def build(users_clean: pd.DataFrame) -> pd.DataFrame:
+ """Return the latest signup per email domain using pandas (DuckDB)."""
+ latest = (
+ users_clean.sort_values("signup_date", ascending=False)
+ .drop_duplicates("email_domain")
+ .loc[:, ["email_domain", "user_id", "email", "signup_date"]]
+ .rename(
+ columns={
+ "user_id": "latest_user_id",
+ "email": "latest_email",
+ "signup_date": "latest_signup_date",
+ }
+ )
+ .reset_index(drop=True)
+ )
+ return latest
diff --git a/examples/basic_demo/models/engines/postgres/mart_latest_signup.ff.py b/examples/basic_demo/models/engines/postgres/mart_latest_signup.ff.py
new file mode 100644
index 0000000..f465fd0
--- /dev/null
+++ b/examples/basic_demo/models/engines/postgres/mart_latest_signup.ff.py
@@ -0,0 +1,33 @@
+import pandas as pd
+
+from fastflowtransform import engine_model
+
+
+@engine_model(
+ only="postgres",
+ name="mart_latest_signup",
+ materialized="table",
+ tags=[
+ "example:basic_demo",
+ "scope:mart",
+ "engine:postgres",
+ ],
+ deps=["users_clean.ff"],
+ require={"users_clean.ff": ["user_id", "email", "email_domain", "signup_date"]},
+)
+def build(users_clean: pd.DataFrame) -> pd.DataFrame:
+ """Return the latest signup per email domain using pandas (Postgres)."""
+ latest = (
+ users_clean.sort_values("signup_date", ascending=False)
+ .drop_duplicates("email_domain")
+ .loc[:, ["email_domain", "user_id", "email", "signup_date"]]
+ .rename(
+ columns={
+ "user_id": "latest_user_id",
+ "email": "latest_email",
+ "signup_date": "latest_signup_date",
+ }
+ )
+ .reset_index(drop=True)
+ )
+ return latest
diff --git a/examples/basic_demo/models/marts/mart_users_by_domain.ff.sql b/examples/basic_demo/models/marts/mart_users_by_domain.ff.sql
new file mode 100644
index 0000000..d74c06d
--- /dev/null
+++ b/examples/basic_demo/models/marts/mart_users_by_domain.ff.sql
@@ -0,0 +1,26 @@
+{{ config(
+ materialized='table',
+ tags=[
+ 'example:basic_demo',
+ 'scope:mart',
+ 'engine:duckdb',
+ 'engine:postgres',
+ 'engine:databricks_spark'
+ ],
+) }}
+
+with base as (
+ select
+ email_domain,
+ signup_date
+ from {{ ref('users_clean.ff') }}
+)
+
+select
+ email_domain,
+ count(*) as user_count,
+ min(signup_date) as first_signup,
+ max(signup_date) as last_signup
+from base
+group by email_domain
+order by email_domain;
diff --git a/examples/basic_demo/models/staging/users_clean.ff.sql b/examples/basic_demo/models/staging/users_clean.ff.sql
new file mode 100644
index 0000000..3795e73
--- /dev/null
+++ b/examples/basic_demo/models/staging/users_clean.ff.sql
@@ -0,0 +1,25 @@
+{{ config(
+ materialized='view',
+ tags=[
+ 'example:basic_demo',
+ 'scope:staging',
+ 'engine:duckdb',
+ 'engine:postgres',
+ 'engine:databricks_spark'
+ ],
+) }}
+
+with raw_users as (
+ select
+ cast(id as integer) as user_id,
+ lower(email) as email,
+ cast(signup_date as date) as signup_date
+ from {{ source('crm', 'users') }}
+)
+
+select
+ user_id,
+ email,
+ regexp_replace(email, '^.*@', '') as email_domain,
+ signup_date
+from raw_users;
diff --git a/examples/basic_demo/profiles.yml b/examples/basic_demo/profiles.yml
index ca66403..130a7e7 100644
--- a/examples/basic_demo/profiles.yml
+++ b/examples/basic_demo/profiles.yml
@@ -1,13 +1,40 @@
-# Profiles generated by `fft init`.
-# Update these placeholders as described in docs/Profiles.md.
-dev:
+# Connection profiles for the basic demo.
+# Populate environment variables as shown in the accompanying .env.dev_* files.
+
+dev_duckdb:
engine: duckdb
- # DuckDB profile example. See docs/Profiles.md#duckdb for details.
+ vars:
+ demo_target_schema: main
duckdb:
- path: "{{ env('FF_DUCKDB_PATH', '.local/dev.duckdb') }}" # Path to your DuckDB database file.
+ path: "{{ env('FF_DUCKDB_PATH', '.local/basic_demo.duckdb') }}"
+
+dev_postgres:
+ engine: postgres
+ vars:
+ demo_target_schema: "{{ env('FF_PG_SCHEMA', 'public') }}"
+ postgres:
+ dsn: "{{ env('FF_PG_DSN') }}"
+ db_schema: "{{ env('FF_PG_SCHEMA', 'public') }}"
+
+dev_databricks:
+ engine: databricks_spark
+ vars:
+ demo_target_schema: "{{ env('FF_DBR_DATABASE', 'basic_demo') }}"
+ databricks_spark:
+ master: "{{ env('FF_SPARK_MASTER', 'local[*]') }}"
+ app_name: "{{ env('FF_SPARK_APP_NAME', 'basic_demo') }}"
+ warehouse_dir: "{{ project_dir() }}/.local/spark_warehouse"
+ schema: "{{ env('FF_DBR_DATABASE', 'basic_demo') }}"
+ extra_conf:
+ spark.hadoop.javax.jdo.option.ConnectionURL: "jdbc:derby:{{ project_dir() }}/.local/metastore_db;create=true"
+ spark.hadoop.datanucleus.rdbms.datastoreAdapterClassName: "org.datanucleus.store.rdbms.adapter.DerbyAdapter"
+ spark.hadoop.datanucleus.schema.autoCreateAll: "true"
+ spark.hadoop.javax.jdo.option.ConnectionDriverName: "org.apache.derby.jdbc.EmbeddedDriver"
+ spark.driver.extraJavaOptions: "-Dderby.stream.error.file={{ project_dir() }}/.local/derby.log"
-# Default in-memory profile for quick experiments.
default:
engine: duckdb
+ vars:
+ demo_target_schema: main
duckdb:
- path: ":memory:"
+ path: "{{ env('FF_DUCKDB_PATH', ':memory:') }}"
diff --git a/examples/basic_demo/project.yml b/examples/basic_demo/project.yml
index 4d8ded1..3986ec5 100644
--- a/examples/basic_demo/project.yml
+++ b/examples/basic_demo/project.yml
@@ -15,4 +15,49 @@ docs:
vars: {}
# Declare project-wide data quality checks under `tests`. See docs/Data_Quality_Tests.md.
-tests: []
+tests:
+ - type: not_null
+ table: seed_users
+ column: id
+ tags: [example_basic_demo]
+
+ - type: unique
+ table: seed_users
+ column: id
+ tags: [example_basic_demo]
+
+ - type: unique
+ table: users_clean
+ column: user_id
+ tags: [example_basic_demo]
+
+ - type: not_null
+ table: users_clean
+ column: email_domain
+ tags: [example_basic_demo]
+
+ - type: not_null
+ table: mart_users_by_domain
+ column: email_domain
+ tags: [example_basic_demo]
+
+ - type: unique
+ table: mart_latest_signup
+ column: email_domain
+ tags: [example_basic_demo]
+
+ - type: not_null
+ table: mart_latest_signup
+ column: latest_user_id
+ tags: [example_basic_demo]
+
+ - type: not_null
+ table: mart_latest_signup
+ column: latest_signup_date
+ tags: [example_basic_demo]
+
+ - type: greater_equal
+ table: mart_users_by_domain
+ column: user_count
+ threshold: 0
+ tags: [example_basic_demo]
diff --git a/examples/basic_demo/seeds/README.md b/examples/basic_demo/seeds/README.md
index 2e553ed..2d39b58 100644
--- a/examples/basic_demo/seeds/README.md
+++ b/examples/basic_demo/seeds/README.md
@@ -1,4 +1,3 @@
# Seeds directory
-Add CSV or Parquet files for reproducible seeds.
-Usage examples are covered in docs/Quickstart.md and docs/Config_and_Macros.md#13-seeds-sources-and-dependencies.
+`seed_users.csv` ships with the demo and feeds the staging model. Extend or replace it with your own CSV or Parquet files when experimenting.
diff --git a/examples/basic_demo/seeds/seed_users.csv b/examples/basic_demo/seeds/seed_users.csv
new file mode 100644
index 0000000..e890383
--- /dev/null
+++ b/examples/basic_demo/seeds/seed_users.csv
@@ -0,0 +1,4 @@
+id,email,signup_date
+1,anna@example.com,2024-01-05
+2,bob@example.net,2024-02-11
+3,cara@example.org,2024-02-27
diff --git a/examples/basic_demo/site/dag/index.html b/examples/basic_demo/site/dag/index.html
new file mode 100644
index 0000000..19e7a44
--- /dev/null
+++ b/examples/basic_demo/site/dag/index.html
@@ -0,0 +1,248 @@
+
+
+
+
+
+ FastFlowTransform - DAG & Mini Docs
+
+
+
+
+
+
+
+
+
+
+
+ DAG
+
+ SQL
+ Python
+ •
+ Materialization:
+
+ table
+
+ view
+
+ ephemeral
+
+ incremental
+
+
+ flowchart TD
+ classDef sql fill:#e8f1ff,stroke:#5b8def,color:#0a1f44;
+ classDef py fill:#e9fbf1,stroke:#2bb673,color:#0b2e1f;
+ mart_latest_signup("mart_latest_signup
(mart_latest_signup)")
+ class mart_latest_signup py;
+ mart_users_by_domain_ff["mart_users_by_domain.ff
(mart_users_by_domain)"]
+ class mart_users_by_domain_ff sql;
+ users_clean_ff["users_clean.ff
(users_clean)"]
+ class users_clean_ff sql;
+ users_clean_ff --> mart_users_by_domain_ff
+ users_clean_ff --> mart_latest_signup
+
+
+
+
+
+
+ Macros
+
+ No macros found.
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/examples/basic_demo/site/dag/mart_latest_signup.html b/examples/basic_demo/site/dag/mart_latest_signup.html
new file mode 100644
index 0000000..f6561dc
--- /dev/null
+++ b/examples/basic_demo/site/dag/mart_latest_signup.html
@@ -0,0 +1,246 @@
+
+
+
+
+
+ mart_latest_signup – FastFlowTransform
+
+
+
+ ← Back to overview
+
+
+
+
+
+
+ Metadata
+
+
Materialized
+
table
+
+
Relation
+
mart_latest_signup
+
+
Path
+
+ /Users/markolekic/Dev/FlowForge/fastflowtransform/examples/basic_demo/models/engines/databricks_spark/mart_latest_signup.ff.py
+
+
+
+
Dependencies
+
+
+
+
+
+
+
+
+
+
+ Columns
+
+
+
+
+
+
+
+
+
+
+ | Name |
+ Type |
+ Nullable |
+ Description |
+ Lineage |
+
+
+
+
+
+ email_domain |
+ string |
+
+
+ yes
+
+ |
+
+
+ —
+
+ |
+
+
+ unknown
+
+ |
+
+
+
+ latest_user_id |
+ int |
+
+
+ yes
+
+ |
+
+
+ —
+
+ |
+
+
+ unknown
+
+ |
+
+
+
+ latest_email |
+ string |
+
+
+ yes
+
+ |
+
+
+ —
+
+ |
+
+
+ unknown
+
+ |
+
+
+
+ latest_signup_date |
+ date |
+
+
+ yes
+
+ |
+
+
+ —
+
+ |
+
+
+ unknown
+
+ |
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/examples/basic_demo/site/dag/mart_users_by_domain.ff.html b/examples/basic_demo/site/dag/mart_users_by_domain.ff.html
new file mode 100644
index 0000000..4cccafe
--- /dev/null
+++ b/examples/basic_demo/site/dag/mart_users_by_domain.ff.html
@@ -0,0 +1,252 @@
+
+
+
+
+
+ mart_users_by_domain.ff – FastFlowTransform
+
+
+
+ ← Back to overview
+
+
+
+
+
+
+ Metadata
+
+
Materialized
+
table
+
+
Relation
+
mart_users_by_domain
+
+
Path
+
+ /Users/markolekic/Dev/FlowForge/fastflowtransform/examples/basic_demo/models/marts/mart_users_by_domain.ff.sql
+
+
+
+
Dependencies
+
+
+
+
+
+
+
+
+
+
+ Columns
+
+
+
+
+
+
+
+
+
+
+ | Name |
+ Type |
+ Nullable |
+ Description |
+ Lineage |
+
+
+
+
+
+ email_domain |
+ string |
+
+
+ yes
+
+ |
+
+
+ —
+
+ |
+
+
+
+ ?.email_domain
+
+ direct
+
+
+
+
+ |
+
+
+
+ user_count |
+ bigint |
+
+
+ yes
+
+ |
+
+
+ —
+
+ |
+
+
+ unknown
+
+ |
+
+
+
+ first_signup |
+ date |
+
+
+ yes
+
+ |
+
+
+ —
+
+ |
+
+
+ unknown
+
+ |
+
+
+
+ last_signup |
+ date |
+
+
+ yes
+
+ |
+
+
+ —
+
+ |
+
+
+ unknown
+
+ |
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/examples/basic_demo/site/dag/users_clean.ff.html b/examples/basic_demo/site/dag/users_clean.ff.html
new file mode 100644
index 0000000..ba353fd
--- /dev/null
+++ b/examples/basic_demo/site/dag/users_clean.ff.html
@@ -0,0 +1,271 @@
+
+
+
+
+
+ users_clean.ff – FastFlowTransform
+
+
+
+ ← Back to overview
+
+
+
+
+
+
+ Metadata
+
+
Materialized
+
view
+
+
Relation
+
users_clean
+
+
Path
+
+ /Users/markolekic/Dev/FlowForge/fastflowtransform/examples/basic_demo/models/staging/users_clean.ff.sql
+
+
+
+
Dependencies
+
+
+ –
+
+
+
+
+
Referenced by
+
+
+
+
+
+
+
+
+
+ Columns
+
+
+
+
+
+
+
+
+
+
+ | Name |
+ Type |
+ Nullable |
+ Description |
+ Lineage |
+
+
+
+
+
+ user_id |
+ int |
+
+
+ yes
+
+ |
+
+
+ —
+
+ |
+
+
+
+ ?.?
+
+ transformed
+
+
+
+
+ |
+
+
+
+ email |
+ string |
+
+
+ yes
+
+ |
+
+
+ —
+
+ |
+
+
+
+ ?.?
+
+ transformed
+
+
+
+
+ |
+
+
+
+ email_domain |
+ string |
+
+
+ yes
+
+ |
+
+
+ —
+
+ |
+
+
+ unknown
+
+ |
+
+
+
+ signup_date |
+ date |
+
+
+ yes
+
+ |
+
+
+ —
+
+ |
+
+
+
+ ?.?
+
+ transformed
+
+
+
+
+ |
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/examples/basic_demo/sources.yml b/examples/basic_demo/sources.yml
index cf52a95..25c9c73 100644
--- a/examples/basic_demo/sources.yml
+++ b/examples/basic_demo/sources.yml
@@ -1,9 +1,9 @@
-# Source declarations describe external tables. See docs/Sources.md for details.
version: 2
+
sources:
- # Example:
- # - name: raw
- # schema: staging
- # tables:
- # - name: users
- # identifier: seed_users
+ - name: crm
+ description: Seeded CRM-style data for the demo.
+ tables:
+ - name: users
+ identifier: seed_users
+ description: Three sample users that populate the seed table.
diff --git a/examples/env_matrix/models/env_vars.ff.py b/examples/env_matrix/models/env_vars.ff.py
index 646b969..3e40c1c 100644
--- a/examples/env_matrix/models/env_vars.ff.py
+++ b/examples/env_matrix/models/env_vars.ff.py
@@ -15,7 +15,7 @@
name="env_vars.ff",
tags=["demo", "env"],
kind="python",
- meta={"materialized": "table"},
+ materialized="table",
)
def build(_: pd.DataFrame | None) -> pd.DataFrame:
"""
diff --git a/examples/postgres/.fastflowtransform/target/catalog.json b/examples/postgres/.fastflowtransform/target/catalog.json
index 833aa53..9bf6fca 100644
--- a/examples/postgres/.fastflowtransform/target/catalog.json
+++ b/examples/postgres/.fastflowtransform/target/catalog.json
@@ -1,6 +1,6 @@
{
"metadata": {
- "generated_at": "2025-10-30T18:29:08+00:00",
+ "generated_at": "2025-10-31T16:46:28+00:00",
"tool": "fastflowtransform"
},
"relations": {
diff --git a/examples/postgres/.fastflowtransform/target/manifest.json b/examples/postgres/.fastflowtransform/target/manifest.json
index 1a5eed6..e41b5dc 100644
--- a/examples/postgres/.fastflowtransform/target/manifest.json
+++ b/examples/postgres/.fastflowtransform/target/manifest.json
@@ -1,7 +1,7 @@
{
"macros": {},
"metadata": {
- "generated_at": "2025-10-30T18:29:08+00:00",
+ "generated_at": "2025-10-31T16:46:28+00:00",
"tool": "fastflowtransform"
},
"nodes": {
diff --git a/examples/postgres/.fastflowtransform/target/run_results.json b/examples/postgres/.fastflowtransform/target/run_results.json
index 36067fb..07e2a05 100644
--- a/examples/postgres/.fastflowtransform/target/run_results.json
+++ b/examples/postgres/.fastflowtransform/target/run_results.json
@@ -1,55 +1,55 @@
{
"metadata": {
- "generated_at": "2025-10-30T18:29:08+00:00",
+ "generated_at": "2025-10-31T16:46:28+00:00",
"tool": "fastflowtransform"
},
"results": [
{
"duration_ms": 0,
- "finished_at": "2025-10-30T18:29:08+00:00",
+ "finished_at": "2025-10-31T16:46:28+00:00",
"http": null,
"message": null,
"name": "mart_orders_enriched",
- "started_at": "2025-10-30T18:29:08+00:00",
+ "started_at": "2025-10-31T16:46:28+00:00",
"status": "success"
},
{
"duration_ms": 0,
- "finished_at": "2025-10-30T18:29:08+00:00",
+ "finished_at": "2025-10-31T16:46:28+00:00",
"http": null,
"message": null,
"name": "mart_users.ff",
- "started_at": "2025-10-30T18:29:08+00:00",
+ "started_at": "2025-10-31T16:46:28+00:00",
"status": "success"
},
{
"duration_ms": 0,
- "finished_at": "2025-10-30T18:29:08+00:00",
+ "finished_at": "2025-10-31T16:46:28+00:00",
"http": null,
"message": null,
"name": "orders.ff",
- "started_at": "2025-10-30T18:29:08+00:00",
+ "started_at": "2025-10-31T16:46:28+00:00",
"status": "success"
},
{
"duration_ms": 0,
- "finished_at": "2025-10-30T18:29:08+00:00",
+ "finished_at": "2025-10-31T16:46:28+00:00",
"http": null,
"message": null,
"name": "users.ff",
- "started_at": "2025-10-30T18:29:08+00:00",
+ "started_at": "2025-10-31T16:46:28+00:00",
"status": "success"
},
{
"duration_ms": 0,
- "finished_at": "2025-10-30T18:29:08+00:00",
+ "finished_at": "2025-10-31T16:46:28+00:00",
"http": null,
"message": null,
"name": "users_enriched",
- "started_at": "2025-10-30T18:29:08+00:00",
+ "started_at": "2025-10-31T16:46:28+00:00",
"status": "success"
}
],
- "run_finished_at": "2025-10-30T18:29:08+00:00",
- "run_started_at": "2025-10-30T18:29:08+00:00"
+ "run_finished_at": "2025-10-31T16:46:28+00:00",
+ "run_started_at": "2025-10-31T16:46:28+00:00"
}
diff --git a/examples/simple_duckdb/.fastflowtransform/target/catalog.json b/examples/simple_duckdb/.fastflowtransform/target/catalog.json
index 4690161..88b0fd6 100644
--- a/examples/simple_duckdb/.fastflowtransform/target/catalog.json
+++ b/examples/simple_duckdb/.fastflowtransform/target/catalog.json
@@ -1,6 +1,6 @@
{
"metadata": {
- "generated_at": "2025-10-30T18:29:03+00:00",
+ "generated_at": "2025-10-31T16:46:22+00:00",
"tool": "fastflowtransform"
},
"relations": {
diff --git a/examples/simple_duckdb/.fastflowtransform/target/manifest.json b/examples/simple_duckdb/.fastflowtransform/target/manifest.json
index 0aea182..c96a216 100644
--- a/examples/simple_duckdb/.fastflowtransform/target/manifest.json
+++ b/examples/simple_duckdb/.fastflowtransform/target/manifest.json
@@ -6,7 +6,7 @@
"upper_col": "models/macros/util.sql"
},
"metadata": {
- "generated_at": "2025-10-30T18:29:03+00:00",
+ "generated_at": "2025-10-31T16:46:22+00:00",
"tool": "fastflowtransform"
},
"nodes": {
diff --git a/examples/simple_duckdb/.fastflowtransform/target/run_results.json b/examples/simple_duckdb/.fastflowtransform/target/run_results.json
index f257d89..7dad55a 100644
--- a/examples/simple_duckdb/.fastflowtransform/target/run_results.json
+++ b/examples/simple_duckdb/.fastflowtransform/target/run_results.json
@@ -1,82 +1,82 @@
{
"metadata": {
- "generated_at": "2025-10-30T18:29:03+00:00",
+ "generated_at": "2025-10-31T16:46:22+00:00",
"tool": "fastflowtransform"
},
"results": [
{
"duration_ms": 0,
- "finished_at": "2025-10-30T18:29:03+00:00",
+ "finished_at": "2025-10-31T16:46:22+00:00",
"http": null,
"message": null,
"name": "ephemeral_ids.ff",
- "started_at": "2025-10-30T18:29:03+00:00",
+ "started_at": "2025-10-31T16:46:22+00:00",
"status": "success"
},
{
"duration_ms": 0,
- "finished_at": "2025-10-30T18:29:03+00:00",
+ "finished_at": "2025-10-31T16:46:22+00:00",
"http": null,
"message": null,
"name": "mart_orders_enriched",
- "started_at": "2025-10-30T18:29:03+00:00",
+ "started_at": "2025-10-31T16:46:22+00:00",
"status": "success"
},
{
"duration_ms": 1,
- "finished_at": "2025-10-30T18:29:03+00:00",
+ "finished_at": "2025-10-31T16:46:22+00:00",
"http": null,
"message": null,
"name": "mart_users.ff",
- "started_at": "2025-10-30T18:29:03+00:00",
+ "started_at": "2025-10-31T16:46:22+00:00",
"status": "success"
},
{
"duration_ms": 4,
- "finished_at": "2025-10-30T18:29:03+00:00",
+ "finished_at": "2025-10-31T16:46:22+00:00",
"http": null,
"message": null,
"name": "orders.ff",
- "started_at": "2025-10-30T18:29:03+00:00",
+ "started_at": "2025-10-31T16:46:22+00:00",
"status": "success"
},
{
"duration_ms": 1,
- "finished_at": "2025-10-30T18:29:03+00:00",
+ "finished_at": "2025-10-31T16:46:22+00:00",
"http": null,
"message": null,
"name": "users.ff",
- "started_at": "2025-10-30T18:29:03+00:00",
+ "started_at": "2025-10-31T16:46:22+00:00",
"status": "success"
},
{
- "duration_ms": 0,
- "finished_at": "2025-10-30T18:29:03+00:00",
+ "duration_ms": 1,
+ "finished_at": "2025-10-31T16:46:22+00:00",
"http": null,
"message": null,
"name": "users_enriched",
- "started_at": "2025-10-30T18:29:03+00:00",
+ "started_at": "2025-10-31T16:46:22+00:00",
"status": "success"
},
{
"duration_ms": 1,
- "finished_at": "2025-10-30T18:29:03+00:00",
+ "finished_at": "2025-10-31T16:46:22+00:00",
"http": null,
"message": null,
"name": "v_users.ff",
- "started_at": "2025-10-30T18:29:03+00:00",
+ "started_at": "2025-10-31T16:46:22+00:00",
"status": "success"
},
{
"duration_ms": 0,
- "finished_at": "2025-10-30T18:29:03+00:00",
+ "finished_at": "2025-10-31T16:46:22+00:00",
"http": null,
"message": null,
"name": "v_users_enriched.ff",
- "started_at": "2025-10-30T18:29:03+00:00",
+ "started_at": "2025-10-31T16:46:22+00:00",
"status": "success"
}
],
- "run_finished_at": "2025-10-30T18:29:03+00:00",
- "run_started_at": "2025-10-30T18:29:03+00:00"
+ "run_finished_at": "2025-10-31T16:46:22+00:00",
+ "run_started_at": "2025-10-31T16:46:22+00:00"
}
diff --git a/mkdocs.yml b/mkdocs.yml
index 0bc9890..df787ba 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -36,7 +36,8 @@ nav:
- Examples:
- Environment Matrix: examples/Environment_Matrix.md
- API Demo Overview: examples/API_Demo.md
- - API Demo Local Setup: examples/Local_Engine_Setup.md
+ - Basic Demo Overview: examples/Basic_Demo.md
+ - Local Engine Setup: examples/Local_Engine_Setup.md
- Contributing: Contributing.md
- License: License.md
diff --git a/pytest.ini b/pytest.ini
index 9a86424..c488100 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -4,6 +4,8 @@ markers =
duckdb: marks tests that require DuckDB
postgres: marks tests that require Postgres
spark: marks tests that require Spark
+ bigquery: marks tests that require BigQuery
+ snowflake: marks tests that require Snowflake
artifacts: marks tests covering artifacts generation
render: marks tests for render-time helpers/templates
schema: marks schema loader/validation tests
diff --git a/src/fastflowtransform/cli/docgen_cmd.py b/src/fastflowtransform/cli/docgen_cmd.py
index fa6588e..5e24319 100644
--- a/src/fastflowtransform/cli/docgen_cmd.py
+++ b/src/fastflowtransform/cli/docgen_cmd.py
@@ -1,3 +1,4 @@
+# fastflowtransform/cli/docgen_cmd.py
from __future__ import annotations
import json
diff --git a/src/fastflowtransform/cli/seed_cmd.py b/src/fastflowtransform/cli/seed_cmd.py
index 444f02e..5270e1f 100644
--- a/src/fastflowtransform/cli/seed_cmd.py
+++ b/src/fastflowtransform/cli/seed_cmd.py
@@ -7,22 +7,6 @@
from fastflowtransform.logging import echo
from fastflowtransform.seeding import _human_int, seed_project
-# def seed(
-# project: ProjectArg = ".",
-# env_name: EnvOpt = "dev",
-# engine: EngineOpt = None,
-# vars: VarsOpt = None,
-# ) -> None:
-# ctx = _prepare_context(project, env_name, engine, vars)
-# execu, _, _ = ctx.make_executor()
-
-# schema: str | None = None
-# if ctx.profile.engine == "postgres":
-# schema = ctx.profile.postgres.db_schema
-
-# n = seed_project(ctx.project, execu, schema)
-# echo(f"✓ Seeded {n} table(s)")
-
def seed(
project: ProjectArg = ".",
diff --git a/src/fastflowtransform/decorators.py b/src/fastflowtransform/decorators.py
index 86345dc..1f35f54 100644
--- a/src/fastflowtransform/decorators.py
+++ b/src/fastflowtransform/decorators.py
@@ -58,6 +58,7 @@ def model(
*,
tags: Sequence[str] | None = None,
kind: str = "python",
+ materialized: str | None = None,
meta: Mapping[str, Any] | None = None,
) -> Callable[[Callable[P, R_co]], HasFFMeta[P, R_co]]:
"""
@@ -72,7 +73,8 @@ def model(
(dep_name = logical name or physical relation).
tags: Optional tags for selection (e.g. ['demo','env']).
kind: Logical kind; defaults to 'python' (useful for selectors kind:python).
- meta: Arbitrary metadata, e.g. {'materialized': 'table'|'view'|'incremental'}.
+ materialized: Shorthand for meta['materialized']; mirrors config(materialized='...').
+ meta: Arbitrary metadata for executors/docs (merged with materialized if provided).
"""
def deco(func: Callable[P, R_co]) -> HasFFMeta[P, R_co]:
@@ -92,7 +94,11 @@ def deco(func: Callable[P, R_co]) -> HasFFMeta[P, R_co]:
f_any.__ff_tags__ = list(tags) if tags else []
f_any.__ff_kind__ = kind or "python"
- f_any.__ff_meta__ = dict(meta) if meta else {}
+
+ metadata = dict(meta) if meta else {}
+ if materialized is not None:
+ metadata["materialized"] = materialized
+ f_any.__ff_meta__ = metadata
# Determine the source path (better error message if it fails)
src: str | None = inspect.getsourcefile(func)
diff --git a/src/fastflowtransform/executors/duckdb_exec.py b/src/fastflowtransform/executors/duckdb_exec.py
index 3a97f58..d937dd7 100644
--- a/src/fastflowtransform/executors/duckdb_exec.py
+++ b/src/fastflowtransform/executors/duckdb_exec.py
@@ -35,6 +35,17 @@ def clone(self) -> DuckExecutor:
"""
return DuckExecutor(self.db_path)
+ def _exec_many(self, sql: str) -> None:
+ """
+ Execute multiple SQL statements separated by ';' on the same connection.
+ DuckDB normally accepts one statement per execute(), so we split here.
+ """
+ # very simple splitter - good enough for what we emit in the executor
+ for stmt in (part.strip() for part in sql.split(";")):
+ if not stmt:
+ continue
+ self.con.execute(stmt)
+
# ---- Frame hooks ----
def _read_relation(self, relation: str, node: Node, deps: Iterable[str]) -> pd.DataFrame:
try:
@@ -143,20 +154,25 @@ def incremental_insert(self, relation: str, select_sql: str) -> None:
def incremental_merge(self, relation: str, select_sql: str, unique_key: list[str]) -> None:
"""
- Fallback strategy:
- - Staging-CTE: data from SELECT
- - Delete-Merge: delete collisions in target
- - Insert all staging rows
+ Fallback strategy for DuckDB:
+ - DELETE collisions via DELETE ... USING (