FFTLabs · FFTLabs · Nov 14, 2025 · Nov 10, 2025 · Nov 11, 2025 · Nov 12, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -66,75 +66,66 @@ jobs:
       - name: Unit tests (fast)
         env:
           PYTHONWARNINGS: default
-        run: uv run pytest -q tests -m "not slow and not postgres" --maxfail=1
+        run: uv run pytest -q tests -m unit --maxfail=1
 
-  # ---------- smoke: examples/simple_duckdb with view + ephemeral ----------
-  smoke-duckdb:
+  # ---------- Examples: Integration Tests ----------
+  examples-matrix:
     runs-on: ubuntu-latest
     needs: checks
+    strategy:
+      fail-fast: false
+      matrix:
+        engine: [duckdb, postgres, databricks_spark]
+
+    services:
+      postgres:
+        image: postgres:16
+        env:
+          POSTGRES_USER: postgres
+          POSTGRES_PASSWORD: postgres
+          POSTGRES_DB: fastflowtransform
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd "pg_isready -U postgres"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+
     steps:
-      - uses: actions/checkout@v4
+      - name: Checkout
+        uses: actions/checkout@v4
 
       - name: Setup uv (and Python)
         uses: astral-sh/setup-uv@v5
         with:
           python-version: "3.12"
           enable-cache: true
 
-      - name: Sync deps
-        run: uv sync
-
-      - name: Prepare ephemeral + view models in example
-        shell: bash
-        run: |
-          set -euo pipefail
-          PROJECT="examples/simple_duckdb"
-          mkdir -p "${PROJECT}/models"
-
-          cat > "${PROJECT}/models/ephemeral_ids.ff.sql" <<'SQL'
-          {{ config(materialized='ephemeral') }}
-          select id from {{ source('crm','users') }}
-          SQL
-
-          cat > "${PROJECT}/models/v_users.ff.sql" <<'SQL'
-          {{ config(materialized='view') }}
-          select u.id
-          from {{ ref('users.ff') }} u
-          join {{ ref('ephemeral_ids.ff') }} e using(id)
-          SQL
-
-      - name: Seed example (DuckDB file db)
-        env:
-          FF_ENGINE: duckdb
-          FF_DUCKDB_PATH: examples/simple_duckdb/.local/demo.duckdb
-        run: uv run fft seed examples/simple_duckdb --env dev
+      - name: Sync deps (dev)
+        run: uv sync --extra dev --frozen
 
-      - name: Run models (ephemeral inline + view materialization)
-        env:
-          FF_ENGINE: duckdb
-          FF_DUCKDB_PATH: examples/simple_duckdb/.local/demo.duckdb
-        run: uv run fft run examples/simple_duckdb --env dev
+      - name: Setup Java for Spark
+        if: matrix.engine == 'databricks_spark'
+        uses: actions/setup-java@v4
+        with:
+          distribution: temurin
+          java-version: '17'
 
-      - name: Smoke assertions (query DuckDB)
-        run: |
-          uv run python - <<'PY'
-          import duckdb, pathlib
-          db = "examples/simple_duckdb/.local/demo.duckdb"
-          assert pathlib.Path(db).exists(), "DuckDB file not found"
-          con = duckdb.connect(db)
-          n = con.execute("select count(*) from v_users").fetchone()[0]
-          assert n >= 1, f"v_users empty (count={n})"
-          existing = {r[0] for r in con.execute(
-              "select table_name from information_schema.tables where table_schema in ('main','temp')"
-          ).fetchall()}
-          assert "ephemeral_ids" not in existing, "ephemeral_ids should not be materialized"
-          print("✓ smoke ok: v_users present, ephemeral inlined")
-          PY
-
-      - name: Build DAG (optional sanity)
+      - name: Run example/integration tests for engine
         env:
-          FF_ENGINE: duckdb
-          FF_DUCKDB_PATH: examples/simple_duckdb/.local/demo.duckdb
+          FF_PG_DSN: postgresql+psycopg://postgres:postgres@localhost:5432/fastflowtransform
+          FF_PG_SCHEMA: ci_examples
         run: |
-          uv run fft dag examples/simple_duckdb --env dev --html
-          test -f examples/simple_duckdb/site/dag/index.html
+          echo "Running integration tests for engine=${{ matrix.engine }}"
+          case "${{ matrix.engine }}" in
+            duckdb)
+              uv run pytest -m "integration and duckdb" --maxfail=1 -q tests
+              ;;
+            postgres)
+              uv run pytest -m "integration and postgres" --maxfail=1 -q tests
+              ;;
+            databricks_spark)
+              uv run pytest -m "integration and databricks_spark" --maxfail=1 -q tests
+              ;;
+          esac
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,6 @@
 # Envs & Secrets
-.env
-.env.*
+.env.local
+.env.*.local
 
 # Local DBs / Artifacts
 *.duckdb
@@ -36,7 +36,7 @@ spark-warehouse
 metastore_db
 derby.log
 .fastflowtransform
-Combined.md
+_exports/**
 
 # Editors / IDEs
 .vscode/

diff --git a/Makefile.dev b/Makefile.dev
@@ -59,3 +59,6 @@ act-commit:
 
 concat-docs:
 	$(UV) run python _scripts/concat_docs.py -o Combined.md
+
+export-demo:
+	$(UV) python _scripts/export_subdir_md.py examples/incremental_demo -o _exports/incremental_demo_export.md --exclude-ext html css
diff --git a/Makefile.pipeline b/Makefile.pipeline
@@ -5,6 +5,9 @@
 
 FFT := FF_ENGINE=duckdb FF_DUCKDB_PATH="$(FF_DB)" fft
 
+init:
+	$(UV) fft init examples/materializations_demo  
+
 seed:
 	$(FFT) seed "$(FF_PROJECT)" --env dev
 

diff --git a/_scripts/concat_docs.py b/_scripts/concat_docs.py
@@ -1,5 +1,4 @@
-#!/usr/bin/env python3
-# concat_docs.py
+# _scripts/concat_docs.py
 """
 Concatenates all Markdown files from the docs directory into a single file.
 - Respects the order in mkdocs.yml (nav).
@@ -15,7 +14,6 @@
 from __future__ import annotations
 import argparse
 import fnmatch
-import os
 from pathlib import Path
 import re
 import sys

diff --git a/_scripts/export_subdir_md.py b/_scripts/export_subdir_md.py
@@ -0,0 +1,220 @@
+#!/usr/bin/env python3
+import argparse
+import subprocess
+from pathlib import Path
+
+
+def get_git_root() -> Path:
+    """Return the root directory of the current Git repository."""
+    try:
+        out = subprocess.check_output(["git", "rev-parse", "--show-toplevel"], text=True).strip()
+        return Path(out)
+    except subprocess.CalledProcessError:
+        raise SystemExit("Error: This script must be run inside a Git repository.")
+
+
+def get_git_files(git_root: Path) -> list[Path]:
+    """
+    Return all files that are not ignored by Git
+    (tracked + untracked, but excluding standard ignored files).
+    """
+    try:
+        out = subprocess.check_output(
+            ["git", "ls-files", "--cached", "--others", "--exclude-standard"],
+            text=True,
+            cwd=git_root,
+        )
+    except subprocess.CalledProcessError as e:
+        raise SystemExit(f"Error while running 'git ls-files': {e}")
+    paths = [git_root / line.strip() for line in out.splitlines() if line.strip()]
+    return paths
+
+
+def is_under_dir(path: Path, directory: Path) -> bool:
+    """Return True if 'path' is located under 'directory'."""
+    try:
+        path.relative_to(directory)
+        return True
+    except ValueError:
+        return False
+
+
+def is_binary_file(path: Path, chunk_size: int = 2048) -> bool:
+    """
+    Simple heuristic to check if a file is binary.
+
+    Reads the first 'chunk_size' bytes and checks for NUL bytes or
+    decoding errors when interpreting as UTF-8.
+    """
+    try:
+        with path.open("rb") as f:
+            chunk = f.read(chunk_size)
+        # NUL byte or decode error => treat as binary
+        if b"\0" in chunk:
+            return True
+        try:
+            chunk.decode("utf-8")
+        except UnicodeDecodeError:
+            return True
+        return False
+    except OSError:
+        # If file cannot be read for some reason, treat it as binary
+        return True
+
+
+def build_tree_structure(files: list[Path], base_dir: Path) -> str:
+    """
+    Build a textual tree structure relative to 'base_dir'.
+
+    'files' should be the list of all files under 'base_dir'.
+    """
+    # Work with paths relative to base_dir
+    rel_paths = [f.relative_to(base_dir) for f in files]
+    # Nested dict-based tree representation
+    tree = {}
+
+    for rel in rel_paths:
+        parts = rel.parts
+        current = tree
+        for part in parts[:-1]:
+            current = current.setdefault(part + "/", {})
+        # Store files under special key
+        current.setdefault("__files__", []).append(parts[-1])
+
+    lines = []
+    root_name = base_dir.name + "/"
+    lines.append(root_name)
+
+    def walk(node: dict, prefix: str = "  "):
+        """Recursively traverse the tree and build the text representation."""
+        # Files in the current directory
+        files_here = sorted(node.get("__files__", []))
+        for fname in files_here:
+            lines.append(f"{prefix}{fname}")
+        # Subdirectories
+        for key in sorted(k for k in node.keys() if k != "__files__"):
+            lines.append(f"{prefix}{key}")
+            walk(node[key], prefix + "  ")
+
+    walk(tree)
+    return "\n".join(lines)
+
+
+def normalize_ext_list(exts: list[str]) -> set[str]:
+    """
+    Normalize a list of file extensions:
+
+    - ensure each starts with a dot (.)
+    - convert all to lowercase
+    """
+    norm = set()
+    for e in exts:
+        e = e.strip()
+        if not e:
+            continue
+        if not e.startswith("."):
+            e = "." + e
+        norm.add(e.lower())
+    return norm
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Concatenate the contents of all non-ignored files in a subdirectory into a Markdown file."
+    )
+    parser.add_argument(
+        "subdir", help="Subdirectory inside the Git repository (relative or absolute)."
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        default="combined.md",
+        help="Path to the output Markdown file (default: combined.md)",
+    )
+    parser.add_argument(
+        "--exclude-ext",
+        nargs="*",
+        default=[],
+        help="File extensions to exclude, e.g. --exclude-ext .html .css js",
+    )
+    args = parser.parse_args()
+
+    git_root = get_git_root()
+    subdir_path = Path(args.subdir).resolve()
+
+    # Ensure that the given subdirectory is inside the Git repository
+    if not is_under_dir(subdir_path, git_root):
+        raise SystemExit(
+            f"Error: The given subdirectory is not inside the Git repository: {subdir_path}"
+        )
+
+    if not subdir_path.is_dir():
+        raise SystemExit(f"Error: {subdir_path} is not a directory.")
+
+    all_git_files = get_git_files(git_root)
+
+    # Filter to files under the given subdirectory
+    files_in_subdir = [f for f in all_git_files if is_under_dir(f, subdir_path) and f.is_file()]
+
+    # Normalize and apply excluded extensions
+    excluded_exts = normalize_ext_list(args.exclude_ext)
+    if excluded_exts:
+        files_in_subdir = [f for f in files_in_subdir if f.suffix.lower() not in excluded_exts]
+
+    files_in_subdir = sorted(files_in_subdir)
+
+    if not files_in_subdir:
+        raise SystemExit(
+            "No matching files found in the subdirectory (or all are excluded/ignored)."
+        )
+
+    # Build directory tree for Markdown
+    tree_md = build_tree_structure(files_in_subdir, subdir_path)
+
+    output_path = Path(args.output).resolve()
+
+    skipped_binary = []
+
+    with output_path.open("w", encoding="utf-8") as out:
+        # Title
+        out.write(f"# Export from `{subdir_path.relative_to(git_root)}`\n\n")
+
+        # Directory structure
+        out.write("## Directory structure\n\n")
+        out.write("```text\n")
+        out.write(tree_md)
+        out.write("\n```\n\n")
+
+        # Files
+        out.write("## Files\n\n")
+
+        for file_path in files_in_subdir:
+            rel = file_path.relative_to(git_root)
+            if is_binary_file(file_path):
+                skipped_binary.append(rel)
+                continue
+
+            out.write(f"### `{rel}`\n\n")
+            out.write("```text\n")
+            try:
+                content = file_path.read_text(encoding="utf-8")
+            except UnicodeDecodeError:
+                skipped_binary.append(rel)
+                out.write("[File could not be read as UTF-8]\n")
+                out.write("```\n\n")
+                continue
+            out.write(content)
+            if not content.endswith("\n"):
+                out.write("\n")
+            out.write("```\n\n")
+
+        if skipped_binary:
+            out.write("## Skipped files (binary or not readable)\n\n")
+            for rel in skipped_binary:
+                out.write(f"- `{rel}`\n")
+
+    print(f"Done! Output written to: {output_path}")
+
+
+if __name__ == "__main__":
+    main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,6 +5,9 @@ @@
     FFT := FF_ENGINE=duckdb FF_DUCKDB_PATH="$(FF_DB)" fft
+    init:
+    	$(UV) fft init examples/materializations_demo
     seed:
     	$(FFT) seed "$(FF_PROJECT)" --env dev
@@ Expand Down @@