Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .fastflowtransform/cache/dev-duckdb.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"engine": "duckdb",
"entries": {},
"profile": "dev",
"version": 1
}
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,13 @@ jobs:
env:
FF_ENGINE: duckdb
FF_DUCKDB_PATH: examples/simple_duckdb/.local/demo.duckdb
run: uv run flowforge seed examples/simple_duckdb --env dev
run: uv run fft seed examples/simple_duckdb --env dev

- name: Run models (ephemeral inline + view materialization)
env:
FF_ENGINE: duckdb
FF_DUCKDB_PATH: examples/simple_duckdb/.local/demo.duckdb
run: uv run flowforge run examples/simple_duckdb --env dev
run: uv run fft run examples/simple_duckdb --env dev

- name: Smoke assertions (query DuckDB)
run: |
Expand All @@ -136,5 +136,5 @@ jobs:
FF_ENGINE: duckdb
FF_DUCKDB_PATH: examples/simple_duckdb/.local/demo.duckdb
run: |
uv run flowforge dag examples/simple_duckdb --env dev --html
uv run fft dag examples/simple_duckdb --env dev --html
test -f examples/simple_duckdb/site/dag/index.html
8 changes: 4 additions & 4 deletions Contributing.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Contributing to FlowForge
# Contributing to FastFlowTransform

First off, thanks for taking the time to contribute! 🎉

The following is a set of guidelines for contributing to FlowForge.
The following is a set of guidelines for contributing to FastFlowTransform.

---

Expand All @@ -11,7 +11,7 @@ The following is a set of guidelines for contributing to FlowForge.
### Reporting Bugs

* Use GitHub Issues.
* Include reproduction steps, expected vs actual behavior, logs and version (`flowforge --version`).
* Include reproduction steps, expected vs actual behavior, logs and version (`fft --version`).

### Suggesting Features

Expand Down Expand Up @@ -41,7 +41,7 @@ The following is a set of guidelines for contributing to FlowForge.

```bash
# 1) clone
git clone https://github.com/<org>/<repo>.git && cd flowforge
git clone https://github.com/<org>/<repo>.git && cd fastflowtransform

# 2) create venv
python -m venv .venv && source .venv/bin/activate
Expand Down
11 changes: 7 additions & 4 deletions Makefile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,22 @@ ci:
pytest -q

test-pg-batch:
FLOWFORGE_SQL_DEBUG=1 $(UV) run pytest -q tests/test_smoke_postgres.py::test_pg_batch_tests_green
FFT_SQL_DEBUG=1 $(UV) run pytest -q tests/test_smoke_postgres.py::test_pg_batch_tests_green

unittest:
FLOWFORGE_SQL_DEBUG=1 $(UV) run pytest -q tests
FFT_SQL_DEBUG=1 $(UV) run pytest -q tests

cover:
uv run pytest --cov=src/fastflowtransform --cov-report=term-missing --cov-report=xml --cov-report=html

cover:
uv run pytest --cov=src/flowforge --cov-report=term-missing --cov-report=xml --cov-report=html

utest:
flowforge utest "$(FF_PROJECT)" --env "$(FF_ENV)"
fft utest "$(FF_PROJECT)" --env "$(FF_ENV)"

utest-duckdb:
flowforge utest "$(FF_PROJECT)" --env "$(FF_ENV)" --model users_enriched
fft utest "$(FF_PROJECT)" --env "$(FF_ENV)" --model users_enriched

# Lint & format helpers
fmt:
Expand Down
26 changes: 13 additions & 13 deletions Makefile.pipeline
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
# FlowForge pipeline and project lifecycle targets
# FastFlowTransform pipeline and project lifecycle targets


.PHONY: seed run dag demo demo-open test clean

FLOWFORGE := FF_ENGINE=duckdb FF_DUCKDB_PATH="$(FF_DB)" flowforge
FFT := FF_ENGINE=duckdb FF_DUCKDB_PATH="$(FF_DB)" fft

seed:
$(FLOWFORGE) seed "$(FF_PROJECT)" --env dev
$(FFT) seed "$(FF_PROJECT)" --env dev

# Run/DAG/Test reuse the same duckdb path (FF_ENV can switch engine)
run:
$(FLOWFORGE) run "$(FF_PROJECT)" --env "$(FF_ENV)" --jobs=1
$(FFT) run "$(FF_PROJECT)" --env "$(FF_ENV)" --jobs=1

run_parallel:
# Two independent staging nodes ("users", "orders") run in parallel
FF_ENGINE=duckdb FF_DUCKDB_PATH="$(DB)" flowforge run "$(PROJECT)" --env dev --jobs 4
FF_ENGINE=duckdb FF_DUCKDB_PATH="$(DB)" fft run "$(PROJECT)" --env dev --jobs 4

run-parallel:
$(FLOWFORGE) run "$(FF_PROJECT)" --env "$(FF_ENV)" --jobs=4 --keep-going
$(FFT) run "$(FF_PROJECT)" --env "$(FF_ENV)" --jobs=4 --keep-going

dag:
$(FLOWFORGE) dag "$(FF_PROJECT)" --env "$(FF_ENV)" --html
$(FFT) dag "$(FF_PROJECT)" --env "$(FF_ENV)" --html

# Opens the generated DAG HTML on macOS/Linux; Windows users open it manually.
demo-open:
Expand All @@ -31,7 +31,7 @@ demo-open:
fi

test:
$(FLOWFORGE) test "$(FF_PROJECT)" --env "$(FF_ENV)" --select batch
$(FFT) test "$(FF_PROJECT)" --env "$(FF_ENV)" --select batch

# End-to-end showcase: Seed → Run → DAG → Open → Tests
demo: seed run dag demo-open test
Expand All @@ -44,17 +44,17 @@ clean:

cache_rw_first:
# first run writes cache and meta
FF_ENGINE=duckdb FF_DUCKDB_PATH="$(DB)" flowforge run "$(PROJECT)" --env dev --cache=rw
FF_ENGINE=duckdb FF_DUCKDB_PATH="$(DB)" fft run "$(PROJECT)" --env dev --cache=rw

cache_rw_second:
# second run: should be a no-op (skips) if nothing changed
FF_ENGINE=duckdb FF_DUCKDB_PATH="$(DB)" flowforge run "$(PROJECT)" --env dev --cache=rw
FF_ENGINE=duckdb FF_DUCKDB_PATH="$(DB)" fft run "$(PROJECT)" --env dev --cache=rw

cache_invalidate_env:
# changing an FF_* env var invalidates fingerprints
FF_ENGINE=duckdb FF_DUCKDB_PATH="$(DB)" FF_DEMO_TOGGLE=1 flowforge run "$(PROJECT)" --env dev --cache=rw
FF_ENGINE=duckdb FF_DUCKDB_PATH="$(DB)" FF_DEMO_TOGGLE=1 fft run "$(PROJECT)" --env dev --cache=rw

rebuild_users:
# force rebuild of a single model regardless of cache
FF_ENGINE=duckdb FF_DUCKDB_PATH="$(DB)" flowforge run "$(PROJECT)" --env dev --cache=rw --rebuild users.ff

FF_ENGINE=duckdb FF_DUCKDB_PATH="$(DB)" fft run "$(PROJECT)" --env dev --cache=rw --rebuild users.ff

50 changes: 25 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# FlowForge (PoC 0.3)
# FastFlowTransform (PoC 0.3)

[![CI](https://github.com/<org>/<repo>/actions/workflows/ci.yml/badge.svg)](https://github.com/<org>/<repo>/actions/workflows/ci.yml)
[![PyPI version](https://img.shields.io/pypi/v/flowforge.svg)](https://pypi.org/project/flowforge/)
[![PyPI version](https://img.shields.io/pypi/v/fastflowtransform.svg)](https://pypi.org/project/fastflowtransform/)

> ⚠️ **Project status:** early proof-of-concept. Stable enough for demos and smaller workflows. Public APIs may still change.

Expand All @@ -22,7 +22,7 @@

## Overview

FlowForge combines SQL and Python models in a lightweight DAG engine. A project is simply a directory with models, optional seeds, and configuration. The CLI renders SQL, runs Python models, materialises results, generates HTML documentation, and executes data-quality checks against multiple execution backends.
FastFlowTransform combines SQL and Python models in a lightweight DAG engine. A project is simply a directory with models, optional seeds, and configuration. The CLI renders SQL, runs Python models, materialises results, generates HTML documentation, and executes data-quality checks against multiple execution backends.

> ℹ️ **Project layout & CLI overview**
> Curious about the full folder structure, Makefile targets, or example models? See the *Project Layout* and related sections in the [User Guide](docs/Technical_Overview.md#project-layout).
Expand All @@ -33,10 +33,10 @@ FlowForge combines SQL and Python models in a lightweight DAG engine. A project

- **Polyglot modelling:** build transformation nodes in SQL (`*.ff.sql`) or Python (`*.ff.py`) and wire them together with `ref()`/`source()` and `deps=[...]`.
- **Multiple executors:** DuckDB (local default), Postgres, BigQuery (classic + BigFrames), Databricks Spark, and Snowflake Snowpark are supported via pluggable executors.
- **Deterministic DAG:** dependencies are resolved statically; `flowforge dag` renders either Mermaid source or a ready-to-view HTML mini site.
- **Deterministic DAG:** dependencies are resolved statically; `fft dag` renders either Mermaid source or a ready-to-view HTML mini site.
- **Data quality built in:** configure checks such as `not_null`, `unique`, `row_count_between`, `greater_equal`, `non_negative_sum`, and `freshness` in `project.yml`.
- **Environment-aware configuration:** `profiles.yml` plus environment variables (`FF_*`) drive executor settings; CLI flags can override at runtime.
- **Seeds & docs:** `flowforge seed` loads CSV/Parquet seeds, and `flowforge dag --html` produces browsable documentation for every model.
- **Seeds & docs:** `fft seed` loads CSV/Parquet seeds, and `fft dag --html` produces browsable documentation for every model.

---

Expand All @@ -60,7 +60,7 @@ pre-commit install
You can also bootstrap everything with the provided Makefile:

```bash
make install # upgrades pip + installs FlowForge in editable mode
make install # upgrades pip + installs FastFlowTransform in editable mode
```

---
Expand All @@ -86,10 +86,10 @@ xdg-open examples/simple_duckdb/site/dag/index.html # Linux
If you prefer manual control:

```bash
flowforge seed examples/simple_duckdb --env dev
flowforge run examples/simple_duckdb --env dev
flowforge dag examples/simple_duckdb --env dev --html
flowforge test examples/simple_duckdb --env dev --select batch
fft seed examples/simple_duckdb --env dev
fft run examples/simple_duckdb --env dev
fft dag examples/simple_duckdb --env dev --html
fft test examples/simple_duckdb --env dev --select batch
```

---
Expand All @@ -98,7 +98,7 @@ flowforge test examples/simple_duckdb --env dev --select batch

## Parallelism & Cache (v0.3)

FlowForge 0.3 adds a level-wise parallel scheduler and an opt-in build cache.
FastFlowTransform 0.3 adds a level-wise parallel scheduler and an opt-in build cache.

### Parallel execution
- DAG is split into **levels** (all nodes with the same maximum distance from sources).
Expand All @@ -108,10 +108,10 @@ FlowForge 0.3 adds a level-wise parallel scheduler and an opt-in build cache.
**Examples**
```bash
# run with 4 workers per level
flowforge run examples/simple_duckdb --env dev --jobs 4
fft run examples/simple_duckdb --env dev --jobs 4

# keep tasks in the current level running even if one fails
flowforge run examples/simple_duckdb --env dev --jobs 4 --keep-going
fft run examples/simple_duckdb --env dev --jobs 4 --keep-going
```

### Cache modes
Expand All @@ -127,7 +127,7 @@ The cache decides whether a node can be **skipped** when nothing relevant change
```

**When is a node skipped?**
FlowForge computes a **fingerprint** from:
FastFlowTransform computes a **fingerprint** from:
- SQL/Python source (rendered SQL or function source)
- environment context (engine, profile name, selected `FF_*` env vars, normalized `sources.yml`)
- **dependency fingerprints** (change upstream ⇒ downstream fingerprint changes)
Expand All @@ -136,21 +136,21 @@ The node is skipped if the fingerprint matches the on-disk cache **and** the phy
**Examples**
```bash
# first run (build + cache write)
flowforge run . --env dev --cache=rw
fft run . --env dev --cache=rw

# second run (no-op if nothing changed)
flowforge run . --env dev --cache=rw
fft run . --env dev --cache=rw

# force rebuild of a specific model
flowforge run . --env dev --cache=rw --rebuild marts_daily.ff
fft run . --env dev --cache=rw --rebuild marts_daily.ff

# diagnose a surprising skip: change an FF_* env var to invalidate fingerprints
FF_DEMO_TOGGLE=1 flowforge run . --env dev --cache=rw
FF_DEMO_TOGGLE=1 fft run . --env dev --cache=rw
```

**Troubleshooting**
- *“Why did it skip?”* → Compare your last changes: SQL/Python code, `sources.yml`, `FF_*` env vars, profile/engine. Any change alters the fingerprint.
- *“Relation missing but cache says skip”* → FlowForge also checks relation existence; if it was dropped externally, it will **rebuild**.
- *“Relation missing but cache says skip”* → FastFlowTransform also checks relation existence; if it was dropped externally, it will **rebuild**.
- *“Parallel tasks interleave logs”* → Logs are serialized via an internal queue to keep lines readable; use `-v`/`-vv` for more detail.

---
Expand All @@ -163,8 +163,8 @@ Use patterns to run only a subgraph.
- `--exclude <pattern>`: excludes matching targets from the build (deps remain if still required).

Examples:
flowforge run . --select marts_daily.ff
flowforge run . --exclude 'mart_*'
fft run . --select marts_daily.ff
fft run . --exclude 'mart_*'

---

Expand All @@ -177,10 +177,10 @@ These flags compose with `--select/--exclude`.

Examples:
# Rebuild everything that matches --select
flowforge run . --select marts_daily.ff --rebuild
fft run . --select marts_daily.ff --rebuild

# Rebuild only a specific node
flowforge run . --rebuild-only marts_daily.ff
fft run . --rebuild-only marts_daily.ff

---

Expand All @@ -195,10 +195,10 @@ Examples:

## Contributing

Issues and pull requests are welcome! Please read [`Contributing.md`](./Contributing.md) for guidelines, development setup, and testing instructions. Sharing minimal reproduction steps plus `flowforge --version` output greatly speeds up reviews.
Issues and pull requests are welcome! Please read [`Contributing.md`](./Contributing.md) for guidelines, development setup, and testing instructions. Sharing minimal reproduction steps plus `fft --version` output greatly speeds up reviews.

---

## License

FlowForge is licensed under the [Apache License 2.0](./License).
FastFlowTransform is licensed under the [Apache License 2.0](./License).
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ services:
environment:
POSTGRES_PASSWORD: postgres
POSTGRES_USER: postgres
POSTGRES_DB: flowforge
POSTGRES_DB: fastflowtransform
ports:
- "5432:5432"
volumes:
Expand Down
Loading
Loading